
    uki=                       d Z ddlmZ ddlmZmZmZ ddlZddlZddl	m
Z
mZ ddlZddlmZ ddlmZ ddlmc mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlmZ" ddl#m$Z$ ddl%m&Z& ddl%m'Z' ddl%m(Z( ddl%m)Z* ddl+mZ, ddl+m-Z- ddl+m.Z. ddl/m0Z1 ddl/mZ2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8 ddlm9Z: e4e;cZ;Z<e5e=cZ=Z>e,j~                  Z?e,j                  Z@e,j                  ZAe,j                  ZBe,j                  ZCe,j                  ZD ej                  d      ZFd eF_G        d! ZHeFj                  eH       	 	 dHd"ZJeFj                  eJ       d# ZLeLeF_M        dId$ZN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dJd%ZOeOeF_P        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dJd&ZQeQe&j                  eF<   	 	 	 	 	 	 	 	 	 	 	 	 dKd'ZS	 	 	 	 	 	 	 	 	 	 dLd(ZT	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dMd)ZUddd*	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dNd+ZVeVe'j                  eF<   	 	 	 	 	 	 	 	 dOd,ZX	 	 	 	 	 	 dPd-ZY	 	 	 	 	 	 	 	 	 	 	 	 dQd.ZZeZej                  eF<   	 dR	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dSd0Z\ ej                  d1 ej                  d2d/      d34      Z_dTd5Z`	 	 	 	 	 	 dUd6Za e(j                  eFea       	 	 	 	 	 	 dVd7Zcecej                  eF<   d8 Zeeeej                  eF<   dWd9Zg e1j                  eF      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dJd:       Zidd;eAeAd;i d/d/dddddd<	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dXd=Zj	 	 	 	 dYd>Zk e
e d?@      di d/d/ddddddA		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dZdB       Zl	 	 	 	 	 	 d[dCZm	 ddDlnmoZp 	 ddElrmsZt y# eq$ r dZpY w xY w# eq$ r2  ej                   ej                  dFej                  f      G      ZtY yw xY w)\z-Module for calling pallas functions from JAX.    )annotations)CallableMappingSequenceN)partialreduce)Any)api)ad_util)api_util)checkify)config)core)effects)hijax)linear_util)state)api_boundary)	tree_util)typing)
FrozenDict)ad)batching)mlir)partial_eval)hlo_interpreter)
primitives)	discharge)types)safe_mapsafe_zip
split_listtuple_insertunzip2)numpypallas_callTc                     t        t        j                  d      fd       }t        j                  d      5   ||  cd d d        S # 1 sw Y   y xY w)NTinlinec                 .    t        j                  | i S N)pallas_call_pbind)argsparamss    V/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/pallas/pallas_call.py_jit_runz#_pallas_call_impl.<locals>._jit_runM   s     t.v..    F)r   r
   jitr   disable_jit)r.   r/   r1   s    ` r0   _pallas_call_implr5   K   sL    
3774 / !/ %  T?  s   A		Ac           
        t        |t        j                        rt        j                         }nGt	        |j                  dd       dd      rt        j                  t              }nt        j                  }t        d |D              }t        |||j                  g      \  }	}	}t        |      }
t        |      D ch c]!  \  }}t        |t        j                         r|# }}}|t#        |
      z
  x}rt%        d|       |
j'                         D ci c]  \  }}||
 }}}t        |       D cg c]n  \  }}t        |t(        j*                        rAt        j,                  |j.                  |j0                  |j2                  |j4                        n||v r|||      n|p } }}| |fS c c}}w c c}}w c c}}w )Ncompiler_paramshas_side_effectsFc              3  P   K   | ]  }t        |t        j                           y wr+   
isinstancer   AbstractRef.0as     r0   	<genexpr>z-_pallas_call_abstract_eval.<locals>.<genexpr>l   s     AaAu001A   $&z2input pinned buffers without input_output_aliases:)sharding)r;   mosaic_tpu_interpretInterpretParamsget_interpret_effectsgetattrgetjax_coreGenericEffectr,   
no_effectssumr"   num_dynamic_grid_boundsdict	enumeratestate_typesAbstractLinValset
ValueErroritemspallas_coreShapedArrayWithMemorySpaceShapedArrayshapedtype	weak_typerB   )	out_avals	interpretbackendinput_output_aliasesgrid_mappingavalsr/   effsnum_refs_inout_aliasesir?   	lin_avalsmissingin_idxout_idxoutin_aliasess                     r0   _pallas_call_abstract_evalrj   X   s    	/??@  557Dvzz+T24FN!!-0DD A5AA(58\-Q-Q"RS+!Q+,-&u- =TQQ : :;  =) =S///g/
Iy" # #:G:M:M:OPvw7F?P-P
 )2)(<	> %'1 Q F FG ##AGGQWWakk-.ZZ9 18=0Hu]7+,	 >) > 
D=
 Q>s   >&F0F66A3F<c                    ~| j                   S r+   )is_high)jaxprrb   r/   s      r0   _pallas_call_is_highrn      s    	r2   c                    i }d}t        |       D ]R  \  }}g }t        t        |j                                     D ]  }|j	                  |       |dz  } t        |      ||<   T |S )Nr      )rN   rangelenlo_tyappendtuple)r_   indicescounterrd   in_avallocal_counterrb   s          r0   _get_index_mappingrz      sv    ''e$ &jaM3w}}'( 7#lg }%GAJ& 
.r2   c                   t        d |D              rt        d      t        d |D              rt        d      t        j                  | d      }|j	                         5  t        j                  |      }d d d        j                  rJ |j                  }|j                  D ]/  }|j                  }|j                  j                  s&t        d       |D cg c]  }t        j                  |       }}t        ||      D cg c]:  \  }}|j                  r|j                  |      n|j!                  |      D ]  }| < }}}}|D cg c](  }|j                  r|j#                         n|gD ]  }| * }}}|j%                         }|j&                  D cg c]  }|j(                   }}||j*                     }||j,                     }||j.                     }t1        |      t1        |      z   t1        |      z   t1        |      k(  sJ t1        |j                        t1        |j2                        z   |j4                  z   t1        |j&                        k(  sNJ t1        |j                        t1        |j2                        |j4                  t1        |j&                        f       t7        |      } t7        |      }!g }"|D ]>  \  }#}$|#| v sJ |$|!v sJ t        | |#   |!|$         D ]  \  }%}&|"j9                  |%|&f        @ t;        j<                  ||||||	|
|||t?        |"      t?        |      |d}'t        j@                  ||'      S # 1 sw Y   xY wc c}w c c}}}w c c}}w c c}w )Nc              3  Z   K   | ]#  }t        j                  |      j                   % y wr+   )rH   get_avalhas_qddr>   xs     r0   r@   z(_pallas_call_to_lojax.<locals>.<genexpr>   s!     7!		1		%	%7s   )+z+pallas_call does not support QDD for inputsc              3  4   K   | ]  }|j                     y wr+   )r~   r>   avals     r0   r@   z(_pallas_call_to_lojax.<locals>.<genexpr>   s     ,$,s   z,pallas_call does not support QDD for outputs z0pallas_call does not support hijax for index_map)rm   r^   meshcost_estimater\   metadatar7   debugr[   r]   rZ   name)!anyNotImplementedErrorrH   ClosedJaxpr	trace_envpelower_jaxprconstsrm   block_mappingsindex_map_jaxprrl   r}   zipr~   
read_loval	lower_valrs   to_lojaxinvarsr   slice_index_opsslice_block_opsslice_scratch_opsrr   scratch_avalsnum_index_operandsrz   rt   r,   r-   ru   raise_lo_outs)(rm   r]   r^   r   r   r[   r7   r   rZ   r\   r   r   hi_argsclosed_jaxprclosed_lo_jaxprlo_jaxprblock_mappingr   r?   r_   r   r   lo_vallo_argslo_avallo_out_avalslo_grid_mappingvin_avalsscalar_prefetch_avalsoperand_avalsr   input_index_mappingoutput_index_mappingnew_input_output_aliasesrd   oi_loo_lolo_outss(                                           r0   _pallas_call_to_lojaxr      s    	7w77
K
LL,),,
L
MM%%eR0, 3nn\2O3###	#""(#22 m#33O$$
<  *1
1A8Q
1%
1#&ug#6 5 5a26,,DOOA.!%!25  5V 5' 5
 
&*lldjjl  , 
 !))+/&oo.aff.(."?#B#BC?::;-?<<=-	"	#c-&8	83< 

8}
  
 
_++	,s##0 
((
),/,@
A  
/
(
()	/
'
'(((	(//	C 
A +51+I6" 4da####$$$$-a02Fq2IJ 4
d%%tTl344 "!% !9:l#' 
		)W	--C3 3 25 /s$   !MM	?M#-M*M0Mc                  |j                   }|j                  rt        d      |j                  rt        |rt        d      |t        d      |D cg c]  }t	        |t
        j                          }}|D cg c]   }t        |      t
        j                  us|" }}|dg|j                  z  z   }t        j                  |d      }t        j                  ||g       \  }}|j                  |j                  c}\   t        |j                   t#        |       |j                  t#        |      g      \  }}}}g ||||}g }|j$                  D ]e  }t	        |t$        j&                        r8|j)                  |j+                  |j                   |j,                                 }|j/                  |       g |j)                  ||      }|r$t1        d|j2                   d	       t1        |       t        |j4                  t#        |       g      \  }}g ||||}|j)                  ||j6                  d
z  |j                  d
z        } |	6t9        d
|	j:                  z  d
|	j<                  z  d
|	j>                  z        }!nd }!tA        jB                  g | ||| |||d||!g |
|
|||d}"t        |"t#        |"      d
z  g      \  }#}$|#|$fS c c}w c c}w )Nz.interpret with dynamic grid bounds unsupportedz JVP with aliasing not supported.z,pallas_call with a mesh does not support JVPTr   )input_index)r   r   z&
The jaxpr for the jvp of pallas_call :   r   
num_inputsnum_outputsflopsbytes_accessedtranscendentals)rm   r^   r   r[   r   r]   r7   r   rZ   r\   r   r   )"
debug_inforL   r   r   r;   r   Zerotyper   rH   r   r   	jvp_jaxprrm   r   r"   r   rr   r   JaxprInputEffectreplaceindexr   rt   printfunc_src_infor   r   CostEstimater   r   r   r,   r-   )%primalstangentsrm   r]   r^   r   r   r[   r7   r   rZ   r\   r   r   r   tnonzero_tangentsnonzero_tangents_with_outputsr   
jvp_jaxpr_rb   r   primal_refsprimal_out_refstangent_refstangent_out_refsr   r`   effin_bmsout_bmsjvp_bmsjvp_grid_mappingjvp_cost_estimateout_flatout_primalsout_tangentss%                                        r0   _pallas_call_jvp_ruler      s   " *))
N
OO$$

@
AA	
L
MM?GH!*Q55HH!AAT!WGLL%@aA(A"2dVl>V>V5V"V%%eR0,,,|-JBO-*a""J$5$5-)R BLW|'?'?XOB>+.> N[M<M/M<LM&	$ c#w//0KKll9#3#3COO#DE  c 	KK vt<)
	3J4L4L3MQ
OP	)|::S\NK/&'2f2v22'2'!))((1,**Q. * 
 $-%%%=777M999   #%%()(i((  )CMQ4F3GH+|	l	""y IAs   "K< KKc                F   fd}t         j                  gj                  j                  }t	        j
                  t        j                  |j                  j                  j                  j                               t        j                  |            \  }}| j                         5  t        j                  ||      \  }	}
}d d d         |       }j                   }t"        j$                  u r|}j&                  }nht)        |t         j*                        }j&                  j,                  }t)        ||      }t/        j0                  |j&                  j2                        }t/        j4                  	      }j7                  ||||      S # 1 sw Y   xY w)Nc                `   |}t        j                  j                  j                  j                  j                  g| }t        j                  j                  |      }t        |t              s|f}t        |      }t        j                  ur|j                  |        t        |      S r+   )rH   
eval_jaxprr   rm   r   r   tree_unflattenindex_map_out_treer;   ru   listr   
not_mappedinsert)new_idxr.   drop_last_argsrv   unflat_indicesr   dims        r0   _block_map_functionz1_batch_block_mapping.<locals>._block_map_functionP  s    N!!%%++%%,, 
G
 --(('3Nne,&(n.)N
(%%%C)  r2   r   )block_shape
array_avalr   r   )rT   index_map_grid_avalr   r   r   flatten_fun_nokwargslu	wrap_initrm   r   with_unknown_namesr   tree_structurer   r   trace_to_jaxpr_dynamicr   r   r   r   r#   squeezedrW   rH   rV   rX   r   r   )r^   	axis_sizer   r   r   r   	idx_avalsblock_mapping_flat_fnout_tree_thunkblock_mapping_jaxprrb   r   new_index_map_out_treerW   new_block_shapenew_array_avalarray_shaperm   s      ``             r0   _batch_block_mappingr   I  s~   !  ..X1N1N1W1WX)*2*G*Gll&+;;AALL__acy)++'  %'%>%>&"F *+

#
#%HO"--N"5#{/C/CDO**00K{C;K))]--33N 

2F
;%			?*8/42H 
 
J J) s   -FF c               *   t        |       }t        |      }|D ]c  \  }}||   }d||<   |t        j                  u rt        j                  ||   |dd      ||<   A|dk7  sGt	        j
                  | |   |d      ||<   e t        |      t        |      fS )a%  Broadcast input/output operands.

  When we have input/output aliasing, since the output will be mapped, we need
  to make sure to broadcast the input across that dimension if it is not
  mapped. If the input is mapped, but on a different axis, we transpose the input
  to match the output.
  r   N)r   r   r   	broadcastjnpmoveaxisru   )	r.   dimsr]   r   args_dims_r   rb   r   s	            r0   _broadcast_input_output_aliasesr    s      t*%
t*%, Cnk1

CE+
h!!!#--

iD2eK	<<[(93BeKC 
uuU|	##r2   c               .   	
 st        d      t               D ch c]&  \  }}|t        j                  ur|j                  |   ( c}}\  }t         |      \   j                  D cg c]L  }t        j                  t        |j                  j                  d|      |j                  j                        N }}d	 	
fd}t        j                  d|||d      }|dt        |      z  fS c c}}w c c}w )
a  Batch the pallas_call by calling it in loop over the batch size.

  This function provides a fallback implementation of batching a pallas_call
  for the cases in which adding a batch dimension to the pallas grid is not
  supported. This is currently the case when the batched dimension corresponds
  to a dynamic axis or a scalar prefetch argument.

  This implementation builds a HLO loop that dynamic_slices the inputs according
  to the current iteration index and dynamic_updates an (initially empty) output
  allocation.
  'vmapping pallas_call with no arguments.r]   r   r   )rX   c                   g }t              D ]f  \  }}|t        j                  u r|j                  |       *|j                  t	        j
                  t        j                  || d|      |             h t        j                  |
	d}t        |      D ]$  \  }}t        j                  ||   || d      ||<   & |S )Nrp   )operandstart_index
slice_sizeaxisr  rm   r^   r   r]   r   r[   r7   r   rZ   r\   r   r   r   )r   r   r   rt   r  squeezelaxdynamic_slice_in_dimr,   r-   rN   dynamic_update_index_in_dim)batch_indexr   
batch_argsargr   	batch_outrd   batch_out_arrayr.   r\   r7   r   r   r  r^   r]   r[   rm   r   r   r   rZ   s           r0   bodyz'_batch_with_explicit_loop.<locals>.body  s    JdO 
S 
##	##KK(( + 	 
	

" ""	!1'#I (	2 ?00
(

	eAh Lr2   F)unrollr   )r  jax_typing.Arrayr   list[jax_typing.Array]returnr  )r   r   r   r   rW   r  block_mappings_outputr  emptyr#   r   rX   r  	fori_looprr   )r.   r  rm   r^   r   r]   r   r[   r7   r   rZ   r\   r   r   r  r   r   bminitial_stater  results   ``````````````       r0   _batch_with_explicit_loopr&    s   8 

G
HH $o
#s	H''	' 
iin,9 /

/	*$ 22  
iiR]]00!Y?mm))+- , , ,\ ==It]5I&	F#	##Ks   +D<AD)r   r   c               8	   |t        d      	 	 	 	 	 	 dd}t        t        | |            D ch c])  \  }\  }}|t        j                  ur|j
                  |   + c}}}\  }|dk(  r_t        || |      } t        j                  | ||||||||	|
|||d}|D cg c]  }t        j                  |d       c}dt        |      z  fS t        | |j                  g      \  }} t        ||j                  g      \  }}t        d t        ||      D              rt        |||      }n2t!        d |D              rt#        || z   ||z   ||||||||	|
|||	      S 	 ~|j$                  rt        | |j$                  g      \  }} t        ||j$                  g      \  }}t        d
 t        ||      D              r9t        |||      }t        j                  gt        |      z  }g || } g ||}nt#        || z   ||z   ||||||||	|
|||	      S |st        d      |j&                  }|j(                  D cg c]  }|j*                   }}t-        | |||      \  } }t/        |      dg|j0                  z  z   }|j$                  }|j2                  }||t        |      |z
   }t        t5        t6        ||      |||d  |      } |j8                  j;                  |j<                        \  }!}"|"rJ t>        j@                  f|!z   }#tC        jD                  |#i f      \  }$}%|jG                  |g|jH                  tK        |       tK        |$      |%|dtK        d |jL                  D              z         }&|	FtO        |tP              r6tS        |	jT                  |z  |	jV                  |z  |	jX                  |z        }'nd }'t        d |
D              sJ g }(|
D ]v  })|)jZ                  j]                  t_        |)jZ                  j`                  dd             }*t_        |)j
                  d|      }+|(jc                  |)j]                  |+|*             x tK        |(      }(t        j                  g || ||&||||||'|(|||d}|dt        |      z  fS c c}}}w c c}w c c}w )Nz1pallas_call with a mesh does not support batchingc                X    |t         j                  u r| S t        j                  | |      S )Nr  )r   r   r  r  )r   bdims     r0   _maybe_squeeze_out_bdimz;_pallas_call_batching_rule.<locals>._maybe_squeeze_out_bdim  s(     x"""h;;qt$$r2   rp   r  r   r  c              3  n   K   | ]-  \  }}|t         j                  u xs |j                  |   d k(   / ywrp   Nr   r   rW   r>   r  r)  s      r0   r@   z-_pallas_call_batching_rule.<locals>.<genexpr>A  s<      	
#t h!!!9SYYt_%99	   35c              3  @   K   | ]  }|t         j                  u  y wr+   )r   r   )r>   r)  s     r0   r@   z-_pallas_call_batching_rule.<locals>.<genexpr>H  s     
It4x***
Is   )r.   r  rm   r^   r   r]   r   r[   r7   r   rZ   r\   r   r   c              3  n   K   | ]-  \  }}|t         j                  u xs |j                  |   d k(   / ywr,  r-  r.  s      r0   r@   z-_pallas_call_batching_rule.<locals>.<genexpr>e  s<      C 	###;syy!';;r/  r  r	  c              3  &   K   | ]	  }|d z     ywr,  r   r=   s     r0   r@   z-_pallas_call_batching_rule.<locals>.<genexpr>  s     I!AIs   )gridr   index_map_avalsindex_map_treer   vmapped_dimsr   c              3  P   K   | ]  }t        |t        j                           y wr+   )r;   rH   rV   r   s     r0   r@   z-_pallas_call_batching_rule.<locals>.<genexpr>  s     JZh223JrA   )spec)rW   rB   )r   r  r)  int | batching.NotMappedr  r  )2r   rN   r   r   r   rW   mapr,   r-   r  expand_dimsrr   r"   rL   allr    r   r&  r   r   r   r   r  r   r   num_scratch_operandsr   r   r5  	unflattenr4  rT   r   r   tree_flattenr   r3  ru   r6  r;   intr   r   r   r   rB   updater#   r8  rt   ),r.   r  rm   r^   r   r]   r   r[   r7   r   rZ   r\   r   r   r*  rd   r   dr   outdynamic_grid_argsdynamic_grid_dimsscalar_argsscalar_bdimsbdimsr   r   r_   all_dimsr   r=  avals_to_batchbatched_block_mappingsindex_map_tree_argsindex_map_tree_kwargsbatched_index_map_argsbatched_index_map_avalsbatched_index_map_treebatched_grid_mappingbatched_cost_estimatebatched_out_avalsr   rB   rW   s,                                               r0   _pallas_call_batching_rulerT    s   " 

; %	%!9%% -6c$o,F 1 1yq&1aH/// 
 1*)!^&d3D


	!1'#C ,//aCOOAq!/C@@ '
\112T '
\112T 	 	,.?@	  !!24E 
I7H
II %%%!1'# " 	$$"4,*I*I)JKK$TL,K,K+LML%
  [,7  4k<Pk))*S-==l"{"T"d$|$e$d 'T!e##3)% " 

G
HH... <<
(a166
(%
( /
D';y*$ $Z1# 8 888(#66%::
 +SZ:N-NP.



 !"#	 0</J/J/T/T""0$,,""	"';;=@SS4=4J4Jr"5$11 &--*))*1234++%I|/H/HIII .  :i#=(!!I-$33i?%55	A !	J	J	JJ	J Jd}}##dmm6H6H!T)R#SHQ	2ET[[ux[HIJ -. 			 '/%)!	#  
dSXo	m1( 0Z )s   .RR6Rc                2   t        j                  |      \  }}t        t        j                  |      }g || j
                  }t        j                  |j                  d      5  t        j                  | ||g| \  }}}	d d d        	fS # 1 sw Y   xY w)Nr   )r   r?  r:  rH   r}   r   rT   tracing_grid_envr3  r   jaxpr_to_checkify_jaxpr)

body_jaxprenabled_errorserrorr^   err_valserr_treeflat_err_and_in_valschecked_jaxprout_treeerror_effectss
             r0   !checkify_pallas_kernel_body_jaxprra    s     !--e4(H""H-(:8:j&9&9:##L$5$5r: E-5-M-MNH.E/C.E*M8]E 
-	//E Es   #BBc                8   t         j                  |vr| S t        |j                  g      \  }}t	        j
                  j                  ||      t        |j                  j                  g      \  }t        |      t        fdj                  D              t        j                  d      ft              z  rt        t        j                         ndj"                  D cg c]  }t        d |j$                  D                c}j"                  D cg c]!  }t'        j(                  |j$                        # c}fdfdfd}t+        j,                  t        j                  d      f      \  }	}
t/        j0                  t3        j4                  |t/        j6                  d|d	i       
      |
      \  }}t'        j8                  j                  d      5  t;        t<        j>                  |	      }tA        jB                  |tE        |            \  }}}t=        jF                  ||      }d d d        t        jH                  t         jJ                  | |	      \  }}|S c c}w c c}w # 1 sw Y   >xY w)Nc              3  \   K   | ]#  }|t         j                  ur|n
t               % y wr+   )rT   dynamic_grid_dimnext)r>   r?   dynamic_grid_args_iters     r0   r@   z0pallas_call_checkify_oob_grid.<locals>.<genexpr>  s6        K000a&'(s   ),r   rp   c              3  P   K   | ]  }t        |t        j                           y wr+   )r;   rT   Squeezed)r>   bs     r0   r@   z0pallas_call_checkify_oob_grid.<locals>.<genexpr>  s     HAJq+../HrA   c                    | ^}}|k  S r+   r   )carryrd   rb   num_iterationss      r0   condz+pallas_call_checkify_oob_grid.<locals>.cond  s    EA~r2   c           	        | \  }}}	j                   	j                  |      }n't        	fdt        t        |            D              }t	        j
                  |      5  	j                  D cg c]  }|d n |j                  |g  }}d d d        t        t        j                  g 
      }|dz   t        j                  |      |fS c c}w # 1 sw Y   KxY w)Nc              3  r   K   | ].  \  }\  }}|j                   vrt        j                  ||       0 y wr+   )r6  rT   GridAxis)r>   r   idxri  r^   s       r0   r@   z>pallas_call_checkify_oob_grid.<locals>.body.<locals>.<genexpr>  s<      c8C111 

sA
&s   47rp   )local_grid_envru   rN   r   rT   grid_envr   compute_start_indices_interpretr:  r   _dynamic_slice_get_next_indices)rk  rd   loop_idxblocksrr  r#  start_indicesblock_shapesr3  r^   
input_argsis_indexing_dimoutput_argsscalarss          r0   r  z+pallas_call_checkify_oob_grid.<locals>.body  s   Ax"".#228TBn (Xt)<= n
 
		n	- 1 !//1 *$"D""D"DX"XPW"X
X1m 11 //#*#{#_6FE?44T8DfMM11 1s   #C2CCCC%c                    t        j                  t        j                  d      D cg c]  }t        j                  |       c}f      S c c}w )Nr   )r  
while_loopr  int32zeros)rb   rW   rz  r  rm  grid_start_indicess     r0   fz(pallas_call_checkify_oob_grid.<locals>.f,  sC    >>dSYYq\#5Vb7cU		%8H7cd 7cs   Azcheckify oob_grid_accessr  r   r   )&r   OOBErrorr"   rL   r   _initialize_output_valsr   r   r   iterru   r3  r  r  rr   r   multiplyr   r   rT   _get_block_shaper   r?  r   r   r   r   r   rV  r:  rH   r}   r   r   r   r   checkify_jaxprindex_checks)rZ  rY  r.   r^   r]   rD  rb   r#  r  	flat_argsjaxpr_in_treewrapped_loopavals_intraced_loopr   	out_errorrz  r  rm  rf  r3  r  r{  r|  rl  r}  r~  s      `            @@@@@@@@@@@r0   pallas_call_checkify_oob_gridr    sa   
 n,L&
\112T  778Z8Z$(*>@+%
\,,$$&':q   12	     
$
 		!T2	CLL$/N N ++
 HHH/ ++
 ""2>>2,N N& '33SYYq\OD)]11ll1&112L23T2?@ 	/,
 ##L$5$5r: <8$$i0H66d8n&KF&&{F;K	<
 ((8((%<,)Q	eN< <s   :#J.&J?AJJc          
       56789 t        | ||||      } t        ||j                  |j                  g      \  }	}
}t	        |
      9t	        |      7|j
                  8t        j                  |      }t        ||| |      \  }}}| j                  |      } t        j                  |       \  }}t        t        j                  |      }|j                  D cg c]  }|j                    }}t	        |      6t#        |      }g ||}t        j                  |      }t%        j&                  |j(                  d      5  t+        j,                  |||g| \  5}}d d d        56789fd}d }t        ||      }t        ||      }|D cg c]0  }t/        j0                  |t$        j2                  j4                        2 }}t        |978g      \  }}}}g ||||||} t        j                  |       \  }!}"t7        j8                  d|| i       }#t7        j:                  t=        j>                  ||#      |"      \  }$}%t%        j&                  |j(                  d      5  t        j@                  |$|!      \  }&}}d d d        t%        jB                  d d       gt	        |      z  }'tE        t        jF                  |'      d         \  }(}t#        d |(D              })t        tI        t$        jJ                  |jL                  |jN                  |j(                  |jP                  d	      |'|)|      }*t        |jR                  7g      \  }+},|jU                  g |*|+|*|,|jV                  t	        |*      z   |j
                  t	        |*      z   
      }-t#        6fd|D              }t#        9fdtY        6      D              |z   }.g |
||}/g ||}0t[        j\                  g |	|/&||-|.|0d|}1t        |16g      \  }2}3|2D cg c]  }|d   	 }2}t        j^                  |2      \  }4}|4|3fS c c}w # 1 sw Y   xY wc c}w # 1 sw Y   xY wc c}w )Nr   c                    t        | g      \  }}}}}}|D cg c]  }|d   	 }}g |||||}	t        j                  j                        t        |	      k(  sJ t	        j
                  j                  j                  g|	 }
t        |
g      \  }}t        |||      D ]  \  }}}||d<   ||d<    g S c c}w )Nr   r   )r"   rr   rm   r   rH   r   r   r   )r.   r~  in_error_refsinputsout_error_refsoutputsscratcherr_refinput_error_vals
jaxpr_argsresult_flatoutput_errorsrb   in_refout_refrZ  r^  num_err_valsnum_kernel_inputsnum_kernel_outputsnum_scalarss                   r0   checked_kernel_fnz4pallas_call_checkify_rule.<locals>.checked_kernel_fnt  s   		l	L*<	>
?WmV^Wg 6CC'CC L#KgKKK7KJ}""))*c*o===%%]11@4>@K!+~>M1"%~}#6 fTlgdm I Ds   Cc                T   t        | t        j                        r;| j                  }t        j                  d| j                  z   || j
                        S t        | t        j                        r#t        j                  | d| j                  z         S t        j                  | gg      S )N)rp   rp   )rX   rY   )r;   rH   rV   rX   rW   rY   
jax_typingArrayr  reshapearray)r  rX   s     r0   _ensure_2d_error_shapez9pallas_call_checkify_rule.<locals>._ensure_2d_error_shape  s    #x++,iie!!&399"4E,/MM; ;	C))	*[[fsyy011YYwr2   checkify_pallasr   r   c              3  L   K   | ]  }d t        j                  |         yw)zerrors[Nr   keystrr>   ps     r0   r@   z,pallas_call_checkify_rule.<locals>.<genexpr>  s#     MA')"2"21"5!67M   "$T)r4  r5  r3  r6  r   r   c              3  8   K   | ]  \  }}|z   |z   f  y wr+   r   )r>   rd   r   r  s      r0   r@   z,pallas_call_checkify_rule.<locals>.<genexpr>  s)      K+1Aqq~q~&Ks   c              3  ,   K   | ]  }|z   |f  y wr+   r   )r>   rd   r  s     r0   r@   z,pallas_call_checkify_rule.<locals>.<genexpr>  s      *7q}a*7s   )rm   r[   r^   r]   rZ   r  )0r  r"   rL   r   rr   r   r   close_jaxprra  _add_placeholder_effectsr   r?  r:  rH   r}   r   r   ru   rT   rV  r3  r   rW  r   r<   MemorySpaceERRORr   r   r   r   r   r   	BlockSpecr$   tree_flatten_with_pathr   $_convert_block_spec_to_block_mappingr4  r5  r6  r   r   r   rq   r,   r-   r   ):rZ  rY  rm   r[   r]   r^   rZ   r.   kwargsdynamic_grid_boundsr~  r   _jaxprrb   r`  r[  err_in_treeshaped_err_avalsr   input_avalsshaped_input_avalscheckify_in_avalsclosed_kernel_jaxprerror_out_treer  r  err_valerror_memref_avalshaped_scalar_avals
input_avaloutput_avalscratch_avalretrace_in_avalsjaxpr_flat_avalsr  r   wrapped_kernel_with_errr   final_jaxprerror_block_specserror_pathserror_originserror_block_mappingsinput_block_mappingsoutput_block_mappingsgrid_mapping_with_errorinput_output_aliases_with_errornew_vals_innew_out_avalsr%  errorsresults	new_errorr^  r  r  r  r  s:                                                        @@@@@r0   pallas_call_checkify_ruler  ?  s    (~(,l(<>% (2
\11,,.($w G+$i#// &,>NE<9&!]

(
(
7%#007(K**H5 "'.A.+.X,[),( ,*,u-##L$5$5r: N'/'G'G^[(N;L(N$M>1N :  /1AB'2( >NO29 (({&&,,. O O?I;(9;MN@P<z;G* G-> G G(G+6G9EG$-$:$:;K$L!M""#46G.4*,4,I,Ill$<m-M)> ##L$5$5r: 311!13KA3 #,,T489C@P<QQ)::;LMaPQ.+qMMM-

:
:&66%44  #00  1;!!$5#719--(00E+ E.B E+E.CE((3/C+DD**S1E-FF	 1   K5IK K$) *7"'"5*7 %79M%N! -',H,t,+1$1y1- 2 [ 
(8 & v~6/&')/0gGDM0&0)).&A,)Q	G	i /N N`O3 3` 1s*   P3;P85QQ
Q8Q
QFc           
     f   t        j                  t        j                  | |      |      \  }}t	        j
                  ||      }|j                         5  t        j                  d      5  t        j                  d      5  t        j                  ||      \  }	}
}d d d        r|D cg c]3  }t        t        j                  |      x}t        j                         s|5 }}|rJt        j"                         dj%                  fd|D              }t'        d|j(                   d| d      d d d        d d d         |       }|s3|t+        j,                  d       k7  rt'        d|j(                   d|       	t/              fS # 1 sw Y   xY wc c}w # 1 sw Y   kxY w# 1 sw Y   oxY w)	Nr   Fz, c              3  J   K   | ]  }t        j                  |        y wr+   )rH   pp_aval)r>   r   ctxs     r0   r@   z)_trace_kernel_to_jaxpr.<locals>.<genexpr>  s#      $
,0HT3'$
s    #z'The kernel function in the pallas_call z captures constants [z"]. You should pass them as inputs.z* should return None. It returns a PyTree: )r   r   r   r   r   wrap_with_transformsr   r   
_check_vmamutable_array_checksr   r   r;   rH   r}   r   r<   JaxprPpContextjoinrR   r   r   r   ru   )funr   r^   kernel_avalskernel_in_treekernel_in_transformsindexerwrapped_kernel_funr   rm   rb   r   cr   consts_avalspp_consts_avalskernel_out_treer  s                    @r0   _trace_kernel_to_jaxprr    s    (0'D'Dll3:.(@$n!66.  
!2!25!9 
		$	$U	+ ,22
l,eQ,  H$5$5a$88D%:K:KL l 
 
%%')) $
4@$
 
 (() * !!CE
 	

 
* #$/	_	(@(@(FF

1*2J2J1K L44C3D	FG G 
f	3, ,
 
 
 
sI   F',FF
F+8F#AF1F'
FFF$	 F''F0jax_pallas_use_mosaic_gpuJAX_PALLAS_USE_MOSAIC_GPUz[If True, lower Pallas kernels to the experimental Mosaic GPU dialect, instead of Triton IR.)defaulthelpc                     t        d|  d      S )Nz&Cannot lower pallas_call on platform: z. To use Pallas on GPU, install jaxlib GPU 0.4.24 or newer. To use Pallas on TPU, install jaxlib TPU and libtpu. See https://docs.jax.dev/en/latest/installation.html.rR   )platforms    r0   _unsupported_lowering_errorr    s     	.xj 9; ;
 r2   c          
        |d   j                   rt        d      |rqt        |t        j                        rt        t        j                  fd|i|}nt        t        j                  fdi|} t        j                  |d      | g| S 	 	 dd}	 	 dfd}	 	 dfd	}t        j                  | d
t        ||||      d t        j                  g|d|i|S )Nrm   z*Cannot lower a pallas_call with constants.interpret_paramsr\   T)multiple_resultsc                    t        d      )Nz0Only interpret mode is supported on CPU backend.r  )r  in_nodesr/   s      r0   cpu_loweringz+_pallas_call_lowering.<locals>.cpu_lowering1  s     G
HHr2   c                |    rdk7  rt        d      t        t        d      t        j                  | g|i |S )N
mosaic_tpuz%Only mosaic backend supported for TPUtpu)rR   mosaic_tpu_backendr  pallas_call_tpu_lowering_rule)r  r  r/   r\   s      r0   tpu_loweringz+_pallas_call_lowering.<locals>.tpu_lowering6  sR     7l*>??!'..;;  r2   c                    	 xdk(  r ddl m} n;xdk(  r ddlm} n.t        j                  rddl m} nddlm} n	 t        d        |j                  | g|i |S # t        $ r}t        d      d }~ww xY w)N
mosaic_gpur   pallas_call_registrationtritonzUnsupported backend: gpu)	jax._src.pallas.mosaic_gpur
  jax._src.pallas.triton_PALLAS_USE_MOSAIC_GPUvaluerR   ImportErrorr  pallas_call_lowering)r  r  r/   r
  er\   s        r0   gpu_loweringz+_pallas_call_lowering.<locals>.gpu_loweringA  s    /
I
E#))KG27)<=
= 9#88    /'../s   A	A" "	A;+A66A;r&   )cpur  cudarocmr[   )r  mlir.LoweringRuleContextr  z'mlir.ir.Value | Sequence[mlir.ir.Value])	constvarsrR   r;   rC   rD   r   interpret_pallas_callr   pallas_call_hlo_interpretr   	lower_funlower_per_platformrM   r   rJ   )	r  r[   r\   r  r/   implr   r  r  s	     `      r0   _pallas_call_loweringr    s    G_
A
BB)1AAB)?? &/d _>> $d 74>>$6sFXFFIEI
	E	E. 
	 	 m!%,*6+7+7"9 "&!(!3!3	
+ #+	
+ ,5	
+ $*	
+ 	+r2   c                     ~ ~y)Nr&   r   )primr/   s     r0   _pallas_custom_str_eqn_compactr"  g  s     F	r2   c                   |D cg c]  }|j                    }}|j                         5  t        j                  |d|i|cd d d        S c c}w # 1 sw Y   y xY w)Nr^   )r   r   r,   abstract_eval)ctx_factoryr^   in_atomsr/   r   r   s         r0   _pallas_call_typecheck_ruler'  q  se    &'aff'(' &&	 ,06  ( s   AAAc                R   | xt         j                  d x\    t        j                  j                  r`| j
                  t        d      t        j                  | j                  | j                  t        j                         | j
                        S t        j                  | j                  | j                  t        j                               S  xt        j                  d x\    | j                         S  t        j                  d x\   | S  	 t!        |       t        j"                  v r!t        j"                  t!        |          |       S t%        | d      rt%        | d      st        dt!        |              t        j                  | j                  | j                        S )	Nr   zWhen `check_vma=True` on `jax.shard_map`, `vma` on `jax.ShapeDtypeStruct` must not be `None`. Please specify how the output should be varying across mesh axes using the `vma` argument of `jax.ShapeDtypeStruct` or set `check_vma=False` on `jax.shard_map`.)rW   rX   rB   vma)rW   rX   rB   rW   rX   zInvalid out_shape type: )rW   rX   )rH   ShapeDtypeStructr   r  r  r)  rR   rV   rW   rX   get_cur_mesh_shardingrT   	MemoryRefget_array_avalr   HiTyper   _out_shape_to_aval_mappinghasattr)	out_shapes    r0   _convert_out_shape_to_avalr2  y  s\   	$	"	"	$				 	 == "# # ##//3359==J 	J !!	y+3+I+I+KM M 
% 
!			 %%'' 
!	 
	
	iKBB	B55d9oF
 	
 i)gi.I3DO3DEFF!!	yOOr2   c                 ./ ~t        d j                  D              sJ t        j                        /t        | /g      \  }}t        d |D              sJ |D cg c]:  }t	        j
                  |j                  t        j                  j                        < }}t        j                  t        j                  j                        g/z  }t        ||      D cg c]O  \  }}|j                  d|j                  |j                  |j                  |j                  |j                   |      Q }}}t        |j"                  |j$                  g      \  }}g ||||}|j'                  ||j$                  /z   |j(                  /z         .t+        /      D cg c]  }||j,                  z   |f }}|D ]  \  }}|j/                  |/z   |/z   f         |D cg c]  }|j                   }}g ||
}t        |/|j0                  |j,                  g      \  }}}} ./fd}!t        j2                  D "cg c]  }"|"j4                   c}"|j,                  |j$                  |j(                  g      \  }#}$}%}&t7        j8                  t;        j<                  |!j>                  jA                               g |#||$||%|&      \  }'}(})tC        jD                  g |)|||| |'tG        |      .|||||	||||d	}*t        |*/g      \  }+},|+d gt        |      z  z   }-|-|,fS c c}w c c}}w c c}w c c}w c c}"w )
Nc              3  d   K   | ](  }t        |j                  t        j                         * y wr+   )r;   r   r   r<   r>   r   s     r0   r@   z4_pallas_call_state_discharge_rule.<locals>.<genexpr>  s!     LqZ 1 12Ls   .0c              3  P   K   | ]  }t        |t        j                           y wr+   r:   )r>   ref_avals     r0   r@   z4_pallas_call_state_discharge_rule.<locals>.<genexpr>  s     OZ%"3"34OrA   )memory_space )originr   r4  r5  r3  r6  r   r   c                     t        | j                  j                  j                  g      \  }}}}t        |	g      \  }}t        |	g      \  }}~t	        j
                  |g||||  g S r+   )r"   r   r   r   rH   r   )
r.   
index_argsin_argsout_args	rest_argsref_in_argsref_out_argsrm   new_grid_mappingra   s
          r0   _rewritten_bodyz:_pallas_call_state_discharge_rule.<locals>._rewritten_body  s    /9224D4O4O++-0.,J9 &gz:K'8*=L({'*14<?H Ir2   r   )rm   r]   r^   r   r   r[   r7   r   rZ   r\   r   r   )$r<  r  rr   r"   r   r<   
inner_avalrT   r  ANYr  r   to_block_mappingr4  r5  r3  r6  r   r   r   r   rq   r   rt   rL   r   r   r   r   r   r   r   r   r,   r-   ru   )0r  	avals_outrm   r]   r^   r   r   r[   r7   r   rZ   r\   r   r   r.   	ref_avalsrest_in_avalsr7  ref_block_specs
block_specref_block_mappingsin_block_mappingsout_block_mappingsnew_block_mappingsrd   r   r   ref_out_avalsr  ref_argsr  index_operandsr?  rC  r   r4  jaxpr_in_avalsjaxpr_out_avalsjaxpr_rest_avals	new_jaxprrb   r   r   refs_outrestupdated_vals_inrB  ra   s0     `                                           @@r0   !_pallas_call_state_discharge_rulerZ    s   $ 	LEOOL	LL	L!('8*=)]	OYO	OO	O
  	  


{66::)  )@)@)D)DE/ #&i"A (J !!((&66%44  #00 "   +5!!L$;$;#<+''  	 "))'((83**X5 * 7
 9>h34q<***A.  # Bda##Q\1x<$@AB7@A88&&A-A.M.I.-=G



.
.

)
)>:(  <<
(a166
(--%%&& E/>?4D 22ll?#..AACE  	
  )Q    	
   !9:#%!#(& h
3.(DvM(:::/	$	Q2
 B0 )s    ?L0"AL5#L;$M Mr   )	grid_specr3  in_specs	out_specsscratch_shapesr]   r   r[   r   r7   r   r\   r   c               R   |t        j                  ||||      }nL|rt        d|       |t        urt        d|       |t        urt        d|       |rt        d|       ~~~|&t	        |t         j
                        r|j                  }t        | |||||	|
||||      S )a  Entry point for creating a Pallas kernel.

  In contrast to :func:`jax.experimental.pallas.kernel`, this entry point
  assumes that the kernel will be executed over a ``grid``.

  See `Pallas Quickstart <https://docs.jax.dev/en/latest/pallas/quickstart.html>`_.

  Args:
    kernel: the kernel function, that receives a Ref for each input and output.
      The shape of the Refs are given by the ``block_shape`` in the
      corresponding ``in_specs`` and ``out_specs``.
    out_shape: a PyTree of :class:`jax.ShapeDtypeStruct` describing the shape
      and dtypes of the outputs.
    grid_spec: An alternative way to specify ``grid``, ``in_specs``,
      ``out_specs`` and ``scratch_shapes``. If given, those other parameters
      must not be also given.
    grid: the iteration space, as a tuple of integers. The kernel is executed
      as many times as ``prod(grid)``.
      See details at :ref:`pallas_grid`.
    in_specs: a PyTree of :class:`jax.experimental.pallas.BlockSpec` with
      a structure matching that of the positional arguments.
      The default value for ``in_specs`` specifies the whole array for all
      inputs, e.g., as ``pl.BlockSpec(x.shape, lambda *indices: (0,) * x.ndim)``.
      See details at :ref:`pallas_blockspec`.
    out_specs: a PyTree of :class:`jax.experimental.pallas.BlockSpec` with
      a structure matching that of the outputs.
      The default value for ``out_specs`` specifies the whole array,
      e.g., as ``pl.BlockSpec(x.shape, lambda *indices: (0,) * x.ndim)``.
      See details at :ref:`pallas_blockspec`.
    scratch_shapes: a PyTree of backend-specific temporary objects required
      by the kernel, such as temporary buffers, synchronization primitives,
      etc.
    input_output_aliases: a dictionary mapping the index of some inputs to
      the index of the output that aliases them. These indices are in the
      flattened inputs and outputs (ignoring None values).
    debug: if True, Pallas prints various intermediate forms of the kernel
      as it is being processed.
    interpret: runs the ``pallas_call`` as a ``jax.jit`` of a scan over the
      grid whose body is the kernel lowered as a JAX function. This does not
      require a TPU or a GPU, and is the only way to run Pallas kernels on CPU.
      This is useful for debugging.
    name: if present, specifies the name to use for this kernel call in
      debugging and error messages. To this name we append the file and line
      where the kernel function is defined, .e.g: `{name} for kernel function
      {kernel_name} at {file}:{line}`. If missing, then we use `{kernel_name} at
      {file}:{line}`.
    compiler_params: Optional compiler parameters. The value should either be a
      backend-specific dataclass
      (:class:`jax.experimental.pallas.tpu.CompilerParams`,
      :class:`jax.experimental.pallas.triton.CompilerParams`,
      :class:`jax.experimental.pallas.mosaic_gpu.CompilerParams`) or a dict
      mapping backend name to the corresponding platform-specific dataclass.
    backend: Optional string literal one of  ``"mosaic_tpu"``, ``"triton"`` or
      ``"mosaic_gpu"`` determining the backend to be used. None means let Pallas
      decide.
    metadata: Optional dictionary of information about the kernel that will be
      serialized as JSON in the HLO. Can be used for debugging and analysis.

  Returns:
    A function that can be called on a number of positional array arguments to
    invoke the Pallas kernel.
  z=If `grid_spec` is specified, then `grid` must be `()`. It is zLIf `grid_spec` is specified, then `in_specs` must be `no_block_spec`. It is zMIf `grid_spec` is specified, then `out_specs` must be `no_block_spec`. It is zGIf `grid_spec` is specified, then `scratch_shapes` must be `()`. It is )	r[  r]   r   r[   r   r7   r   r\   r   )rT   GridSpecrR   no_block_specr;   CompilerParamsBACKEND_pallas_call)kernelr1  r[  r3  r\  r]  r^  r]   r   r[   r   r7   r   r\   r   s                  r0   r&   r&     s   h $$T8YOI 6#$ $ }$''/j23 3 %''0k34 4 *+-. . Hi_O[5O5OP%%G	/%!
 r2   c                   | t        i       S t        | t              r| j                  | i} t        | t              sJ | j                         D ]b  \  }}|dvrt        d|       t        |t              st        d| d|       |j                  |k7  sIt        d|j                   d|        t        | t               st        |       } | S )N)r  r  r  z$Unknown backend in compiler_params: z'Unexpected compiler_params for backend z: z)Inconsistent backend in compiler_params: z != )r   r;   rb  rc  r   rS   rR   )r7   r\   r/   s      r0   _normalize_compiler_paramsrg    s     b>0&..@O	OW	--	-(..0 ogv<<=gYGHHfn-3G9Bvh
G  ~~ 5fnn5E FY  
OZ	0 1O	r2   z#jax.experimental.pallas.pallas_call)repro_api_name)	r   r]   r   r[   r   r7   r   r\   r   c       
           	
 t         j                  j                  xs t              t	        j
                  j                               j                  k7  r=t        dt	        j
                  j                                dj                   d      
t        d      j                  
t        j                        \  t        |t              rt	        |      }t        j                  |      \  }t!        |      \  t#        t$        j&                  d      
	 fd       }|S )NzMesh shape z does not match grid shape .z6If `mesh` is specified, then `backend` must be `None`.Tr(   c                 *   t        j                  |       \  }}t        |      \  }}t        d |D              }t        d  D              }t        d |D              }t        d (D              }t	        j
                  !||||)|      \  }	}
t        j                  |	      \  }}t        d |D              }t        j                  j                  rt        d |D              }t        d |D              }t        j                  d$|	i       }'$|j                  t        j                  '            }t        $||
|||      \  }}"j!                         D ]  \  }}|t#        t%        |            vr!t'        d	| d
| d| dt%        |       d	      |t#        t%        |            vr!t'        d	| d
| d| dt%        |       d	      ||   }||   }|j(                  |j(                  k7  s|j*                  |j*                  k7  st'        d	| d
| dt        j,                  ||          d| dt        j,                  (|          d| d       t/        ||
j0                  g      \  }}'t3        j4                  '      nt7        j8                         }|5  t;        j<                  g |||||#|
%t        "j!                               &t?        &      nd 'd}d d d        t        j@                  )      }|S # 1 sw Y   !xY w)Nc              3  F   K   | ]  }t        j                  |        y wr+   )rH   r}   r=   s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s     B1(++A.Bs   !c              3  2   K   | ]  }t        |        y wr+   )r2  r5  s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s      5  6a8 5s   c              3  L   K   | ]  }d t        j                  |         yw)r.   Nr  r  s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s#     Fi..q123Fr  c              3  L   K   | ]  }d t        j                  |         yw)r  Nr  r  s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s#     KA')"2"21"5!67Kr  c              3  l   K   | ],  }t        |t        j                        r|j                  n| . y wr+   )r;   rO   TransformedRefrefr   s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s0       A{99:A   24c              3  N   K   | ]  }|j                  t                       y wr+   )
update_vma	frozensetr=   s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s#       <$% !"Y[ 9  <s   #%c              3  l   K   | ],  }t        |t        j                        r|j                  nd  . yw)r   N)r;   rO   rq  
transformsr   s     r0   r@   z0_pallas_call.<locals>.wrapped.<locals>.<genexpr>  s0      " #1k&@&@ArI"rs  zpallas_call kernelz+input_output_aliases contains the mapping 'r   z' with input index z outside the range [0, )z' with output index z' referring to inputz with abstract value z and to outputz! with a different abstract value rj  )rZ   rm   r   r[   r^   r   r]   r7   r   r\   r   r   )!r   r  r$   ru   rT   get_grid_mappingr?  r   r  r  r   r   replace_func_namer   sanitize_namer  rS   rq   rr   rR   rW   rX   r  r"   r   r
   named_scope
contextlibnullcontextr,   r-   r   r   )*r.   flat_args_with_pathsin_treein_pathsr  flat_in_avalsflat_out_avals
in_originsout_originskernel_argsr^   flat_kernel_argsr  flat_kernel_avalskernel_arg_transforms
kernel_dbgrm   r   i_idxo_idxrx   out_avalr<  r?  r  r   rC  r\   r7   r   r   r  flat_out_shapesr[  r]   r[   re  r   r   r   	out_pathsr_  s*                              r0   wrappedz_pallas_call.<locals>.wrapped  s   $-$D$DT$J!' !56HiB	BBM 5$35 5N FXFFJKKKK + < <	!K (1'='=k'J$n !    <): < < " "!"  $$%96&127J//0B0B40HIj*
L*;-/ME6 -224 7u	eC./	/9%% I ' "}%&a)* 	* 
eC/0	09%% I!7 #~&'q*+ 	+ e$g&h	(..	(GMMX^^,K9%% I!!*!1!1(5/!B C D%Y '%,,Yu-=>? @**21	67 	77* 'y<3R3R2STJ	!%!1z7M7M7O  
 ##   	
 ##$%9%?%?%AB)%+3+?:h'T!h& 
"
"8X
6CJ) s   AL		L)r   )pallas_tpu_interpret_mode_context_managerr  rg  ru   rW   valuesr3  rR   r\   rT   unzip_dynamic_grid_boundsr;   r   r   r  r$   r   r
   r3   )re  r1  r[  r   r]   r   r[   r   r7   r   r\   r   flat_out_shapes_with_pathsr  r  r  r  r_  s   ` ``````````  @@@@r0   rd  rd    s,   & 66<<I	 .?/	TZZ !Y^^3djj//123 4>>"!%  OPPllG#.#H#H#S )  	4 i I)2)I)I))T&h%&@A)_
3774 W W W !Wp 
.r2   c                   |dt        j                  |        S t        |       dk(  ry| ^}}t        |t         j                        rm|j
                  t        |      k  rU||j
                     dt        j                  |        S ||j
                     t        j                  t        |            z   S dt        j                  t        |              S )z1Converts `args[k]<rest>` into `arg_k_name<rest>`.r.   r   )r   r  rr   r;   SequenceKeyrq  ru   )in_path	arg_namesarg_idx	rest_paths       r0   in_path_to_input_originr  1  s     )""7+,--\Q'I../GKK#C 5 % I$$W-.//W[[!I$4$4U95E$FFF)""5>2344r2   r	  )r  _NoInstances)rD   )rZ   "tuple[jax_core.AbstractValue, ...])r  zdict[int, tuple[int, ...]])rm   jax_core.Jaxprr]   tuple[tuple[int, int], ...]r^   GridMappingr   pallas_core.Mesh | Noner   boolr[   r	   r7   r	   r   CostEstimate | NonerZ   r  r\   Backend | Noner   FrozenDict[str, str] | Noner   
str | None)r^   r  r   r@  r   zjax_core.ShapedArrayr   r9  r   BlockMappingr  r  )
r.   Sequence[jax_typing.Array]r  "Sequence[int | batching.NotMapped]r]   r  r   r@  r  zItuple[tuple[jax_typing.Array, ...], tuple[int | batching.NotMapped, ...]])r.   r  r  r  rm   r  r^   r  r   r  r]   r  r   r  r[   r	   r7   r	   r   r  rZ   r  r\   r  r   r  r   r  )rm   r  r^   r  r   r  r]   r  r   r  r[   r	   r7   r	   r   r  rZ   r  r\   r  r   r  r   r  )rX  zjax_core.ClosedJaxprrZ  checkify.Errorr^   r  r  zKtuple[jax_core.ClosedJaxpr, tree_util.PyTreeDef, set[checkify.ErrorEffect]])rZ  r  r.   jax_core.Valuer^   r  r  r  )rZ  r  r.   r  rm   r  r[   r	   r]   r  r^   r  rZ   r  )F)r  r   r   zjax_core.DebugInfor^   r  r  z&tuple[pallas_core.AbstractMemRef, ...]r  ztree_util.PyTreeDefr  z-tuple[tuple[pallas_core.Transform, ...], ...]r  r  r  z3tuple[jax_core.Jaxpr, tuple[jax_typing.Array, ...]])r  strr  	Exception)r  r  r[   r	   r\   r  )r!  zjax_core.Primitiver/   zdict[Any, Any]r  r  )r1  r	   r  zjax_core.AbstractValue) re  Callable[..., None]r1  r	   r[  zpallas_core.GridSpec | Noner3  zpallas_core.TupleGridr\  pallas_core.BlockSpecTreer]  r  r^  zpallas_core.ScratchShapeTreer]   Mapping[int, int]r   r  r[   r	   r   r  r7   PMapping[Backend, pallas_core.CompilerParams] | pallas_core.CompilerParams | Noner   r  r\   r  r   dict[str, str] | Noner  zCallable[..., Any])r7   r  r  z,Mapping[Backend, pallas_core.CompilerParams])re  r  r1  r	   r[  zpallas_core.GridSpecr   r  r]   r  r   r  r[   r	   r   r  r7   z8Mapping[Backend, CompilerParams] | CompilerParams | Noner   r  r\   r  r   r  )r  ztree_util.KeyPathr  ztuple[str, ...] | Noner  zpallas_core.OriginStr)x__doc__
__future__r   collections.abcr   r   r   r~  enum	functoolsr   r   r   r   r	   jax._srcr
   jax._src.lax_srcr  r   r   r   r   r   rH   r   r   r   r   r   jax._src.traceback_utilr   r   r  jax._src.frozen_dictr   jax._src.interpretersr   r   r   r   r   jax._src.pallasrT   r   r   jax._src.stater   state_dischargerO   jax._src.utilr    r!   r"   r#   r$   r%   r  r:  
unsafe_mapr   
unsafe_zipr  r  ra  r   Backendrb  HiPrimitiver,   r  r5   def_implrj   def_effectful_abstract_evalrn   rl   rz   r   r   r   primitive_jvpsr   r  r&  rT  primitive_batchersra  r  r  error_checksr  
bool_statebool_envr  r  r  register_loweringr"  custom_str_eqn_compact_rulesr'  custom_typechecksr2  register_discharge_rulerZ  r&   rg  rd  r  jax._src.pallas.mosaicr
  r  r   jax._src.pallas.mosaic.interpretr  rC   SimpleNamespace	new_classEnumr   r2   r0   <module>r     s   4 " 7 7   %          %   &  0  ) + $ * & 4 / + & 7 /  " CZCZ''%%))''


++ "!!-0!%    ( )%1%P  ) )*D E - 	U.U. 6U. 	U.
 "U. U. U. U. 'U. 2U. U. *U. U.l / V# 	V#
 6V# V# "V# V# V# V# 'V# 2V# V# *V# V#r $9  -  4J4J4J 4J 
"	4J
  4J 4Jn$
$$ -$
 6$ $ O$>d$
$d$
,d$ 	d$
 d$ "d$ 6d$ d$ d$ d$ 'd$ 2d$ d$ *d$ d$j -1S 	S
 S "S 6S S S S 'S 2S S *S Sl .H  M *0$0 0 	0#N	0O(6O 1<O <J	Ob\%3\ &4\ *-	\
 5P\ -8\ *L\z (A  m $ (	("( ( 9	(
 (( H( ( 9(V +**FOO7?	*	 D+	!D+ D+ 	D+N   }&; <
&4 # 	 % %m 4 -H  = )P< )((7~ 	~
 6~ ~ "~ ~ ~ ~ '~ 2~ ~ *~ ~ 8~J .2"$*7+835.0
 	)-"&*)www +	w
  w (w )w 1w ,w w w w	w$ '%w& 'w( $)w* +wte12 	&KL %).0 	)-"&*AAA $	A
 "A ,A A A A 	AA 'A A $A MAH55+A552S\	  
  ...%eoontyylCs$   N) "N6 )N32N364O-,O-