
    uki}                   M   d Z ddlmZ ddlmZmZ ddlmZ ddlZddlZddl	Z	ddl
mZmZ ddlZddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl m"Z# ddl$m%Z% ddl&m'Z( ddl&m	Z) ddl&m*Z+ ddl,m-Z. ddl/mZ0 ddl/m1Z1 ddl/m2Z3 ddl4m5Z5 ddl4m1Z6 dd l7m8Z8 dd!l7m9Z9 ddl:m;Z< ddl;Z= ed"      Z>ej~                  e@cZ@ZAej                  eCcZCZDe5j                  ZEe0j                  ZFe0j                  ZGe0j                  ZHej                   G d# d$             ZJej                   G d% d&             ZKej                   G d' d(             ZLej                   G d) d*             ZM G d+ d,eN      ZO	 	 	 	 d;d-ZPd<d.ZQd=d/ZR	 	 	 	 	 	 	 	 	 	 	 	 d>d0ZSi ZTd?d1ZUd@d2ZVdAd3ZWdBd4ZX	 	 	 	 	 	 	 	 dCd5ZY	 	 	 	 	 	 	 	 dDd6ZZ	 	 	 	 	 	 dEd7Z[dFd8Z\ eUe1j                        dGd9       Z^ eUe1j                        dGd:       Z`de.j                  j                  e.j                  j                  f	 	 	 	 	 	 	 	 	 	 	 	 	 dHd;Ze eUe1j                        	 	 	 	 dId<       Zg eUe1j                        dGd=       ZidGd>Zj eUej                        	 	 	 	 	 	 dJd?       Zl eUej                        dGd@       Zn ej                  dAB       G dC dD             Zo ej                  dAB       G dE dF             Zp	 	 	 	 	 	 dKdGZq eqdH eoe<j                  gdIe<j                         eoe<j                  gdJe<j                         eoe<j                  gdKe<j                         eoe<j                  gdLe<j                         epe<j                  ge)j                         epe<j                  ge)j                        g eoe<j                  gdMe<j                         eoe<j                  gdNe<j                         epe<j                  ge)j                         epe<j                  ge)j                        gO      Zz eqdP eoe<j                  gdQe<j                         eoe<j                  gdRe<j                         epe<j                  ge)j                        g eoe<j                  gdSe<j                         eoe<j                  gdTe<j                         epe<j                  ge)j                        gO      Z| eqdU eoe<j                  gdVe<j                         eoe<j                  gdWe<j                         epe<j                  ge)j                        g eoe<j                  gdXe<j                         eoe<j                  gdYe<j                         epe<j                  ge)j                        gO      Z~ eqdZ eoe<j                  gd[e<j                         eoe<j                  gd\e<j                         epe<j                  ge)j                        g eoe<j                  gd]e<j                         eoe<j                  gd^e<j                         epe<j                  ge)j                        gO      Z eqd_ eoe<j                  gd`e<j                         eoe<j                  gdae<j                         epe<j                  ge)j                        g eoe<j                  gdbe<j                         eoe<j                  gdce<j                         epe<j                  ge)j                        gO      Z eqdd eoe<j                  gdee<j                         eoe<j                  gdfe<j                         epe<j                  ge)j                        g eoe<j                  gdge<j                         eoe<j                  gdhe<j                         epe<j                  ge)j                        gO      Z eqdi eoe<j                  gdje<j                         eoe<j                  gdke<j                         epe<j                  ge)j
                        g eoe<j                  gdle<j                         eoe<j                  gdme<j                         epe<j                  ge)j
                        gO      Z eqdn eoe<j                  gdoe<j                         eoe<j                  gdpe<j                         epe<j                  ge)j                        g eoe<j                  gdqe<j                         eoe<j                  gdre<j                         epe<j                  ge)j                        gO      Z eqds eoe<j                  gdte<j                         eoe<j                  gdue<j                         epe<j                  ge)j                        g eoe<j                  gdve<j                         eoe<j                  gdwe<j                         epe<j                  ge)j                        gO      Z eqdx eoe<j                  e<j                  gdye<j                         eoe<j                  e<j                  gdze<j                         epe<j                  e<j                  ge)j                         eoe<j                  e<j                  gd{e<j                         eoe<j                  e<j                  gd|e<j                         epe<j                  e<j                  ge)j                        g eoe<j                  e<j                  gd}e<j                         eoe<j                  e<j                  gd~e<j                         epe<j                  e<j                  ge)j                         eoe<j                  e<j                  gde<j                         eoe<j                  e<j                  gde<j                         epe<j                  e<j                  ge)j                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j                         g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j                         gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j$                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j$                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j(                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j(                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j,                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j,                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j0                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j0                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j4                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j4                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j8                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j8                        gO      Z eqd eoe<j                  e<j                  gde<j                         eoe<j                  e<j                  gde<j                         epe<j                  e<j                  ge)j<                        g eoe<j                  e<j                  gde<j                         eoe<j                  e<j                  gde<j                         epe<j                  e<j                  ge)j<                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j@                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j@                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jD                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jD                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jH                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jH                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jL                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jL                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jP                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jP                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jT                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jT                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jX                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)jX                        gO      Z eqd eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j\                        g eoe<j                  gde<j                         eoe<j                  gde<j                         epe<j                  ge)j\                        gO      Z eqd eoe<j                  e<j                  gde<j                         eoe<j                  e<j                  gde<j                        g eoe<j                  e<j                  gde<j                         eoe<j                  e<j                  gde<j                        gO      ZeTjc                  i ejd                  ezejf                  dۄ ejh                  e|ejj                  e~ejl                  eejn                  eejp                  eejr                  eejt                  eejv                  eejx                  d܄ ejz                  eej|                  eej~                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  eej                  ei       dLd݄ZАdMdބZѐdNd߄ZҐdNdZӐdOdZԐdOdZՐdOdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 dPdZ ej                  ee(j                  j                  e(j                  j                  e(j                  j                        Z ej                  ee(j                  j                  e(j                  j                  e(j                  j                        Z ej                  ee(j                  j                  e(j                  j                  e(j                  j                        Z ej                  ee(j                  j                  e(j                  j                  e(j                  j                        Z ej                  ee(j                  j                  e(j                  j                  e(j                  j                        Z ej                  ee(j                  j                  e(j                  j                  e(j                  j                        ZdLdZej                  eej                  eej                  eej                  e(j                  ej                  e(j                  ej                  e(j                  ej                  e(j                  ej                  e(j                  ej                  e(j                   ej                  ei
Zej                         D ]  \  ZZefdGdZeeTe<    ej                  eej                  eej                  eej                  eej                  eej                  eej                  eiZej                         D ]  \  ZZefdGdZeeTe<     eUej                         	 	 	 	 	 	 dQd       ZdRdZ eUe1j&                        dSd       Z eUe1j*                        dSd       Z eUe6j.                        dTd       Z eUej2                        dUd       Zej6                  d ej8                  d ej:                  d iZej                         D ]  \  ZZ e[ed      eTe<     eUej>                        dGd       Z  eUejB                        dGd       Z" eUejF                        dGd       Z$  eUejJ                         e[e3jL                  d               eUejN                         e[e3jP                  d              eUejR                        dGd       Z*dVdZ+dWdZ,dXdZ-dYdZ.dLdZ/dYdZ0dLdZ1dZdZ2d[d Z3d\dZ4d]dZ5	 	 	 	 	 	 	 	 d]dZ6	 	 	 	 	 	 	 	 d]dZ7	 	 	 	 	 	 	 	 d^dZ8dd	 	 	 	 	 d_dZ9 eUejt                        	 	 dGd       Z; eUejx                        dGd	       Z= eUej|                        	 	 dGd
       Z? eUej                        dGd       ZA eUej                        	 	 dGd       ZCd`dZDdadZE eUej                        dGd       ZG eUej                        dGd       ZI	 	 	 	 	 	 dbdZJ	 	 	 	 	 	 	 	 dcdZK eUe6j                        dGd       ZMe.j                  D  ci c]  }  eO|       |  c} ZPe.j                  D ci c]  } eO|      | c}ZR	 	 dddddd	 	 	 	 	 	 	 	 	 	 	 	 	 dedZSdfdZT	 	 	 	 	 	 dgdZU eUe1j                        	 	 dGd       ZW eUe6j                        dGd       ZY	 dhddd	 	 	 	 	 	 	 	 	 	 	 didZZ eUe1j                        	 	 dGd       Z[ eUe6j                        dGd       Z] eUej                        dGd       Z_ej                  j                  ej                  j                  fZcd Zdd  Ze eUej                        	 	 dGd!       ZgdGd"ZhdGd#Zi ej                  eie<j                        eTej                  <    ej                  eie<j                        eTej                  <    ej                  eie<j                        eTej                  <   	 	 dGd$Zpd% Zq ej                  epeq      eTej                  <   d& Zs ej                  epes      eTej                  <    eUej                        dGd'       Zv eUej                        d(        Zx eUej                         eUej                        	 	 dGd)              Z{ eUej                        dGd*       Z}d+ eTej                  <    eUej                        djd,       Zdkd-Zd.dd/	 	 	 	 	 	 	 	 	 dld0Z eUej                        	 	 dGd1       Z	 	 dGd2Z eUej                        	 	 dGd3       Z eUej                        	 	 dGd4       Zdmd5Zdnd6Zdod7Zdod8Zdpd9Z eUej                        	 	 	 	 	 	 dqd:       Zyc c} w c c}w (r  z0Module for lowering JAX primitives to Triton IR.    )annotations)CallableSequence)HashableN)AnyTypeVar)lax)	tree_util)ad_checkpoint)ad_util)api_util)config)core)custom_derivatives)	debugging)literals)linear_util)pjit)source_info_util)state)util)mlir)partial_eval)ir)arith)math)scf)dialect)
primitives)utils)indexing)foreach)
split_list_Tc                  f    e Zd ZU ded<   ded<   ded<    ej
                  d      Zd	ed
<   ded<   y)ModuleContextstrnameGridMappinggrid_mappingSequence[ir.Value]program_idsF)reprzmlir.TracebackCachestraceback_cachesplatformN)__name__
__module____qualname____annotations__dataclassesfieldr.        Z/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/pallas/triton/lowering.pyr&   r&   K   s1    )!!+<;+<+<%+H(H-r7   r&   c                  6    e Zd ZU ded<   ded<   ded<   ded<   y	)
	BlockInfojax_core.ShapedArrayfull_shape_dtypeSequence[Any]start_indicesSequence[int]start_indices_alignment&tuple[int | pallas_core.Squeezed, ...]block_shapeN)r0   r1   r2   r3   r6   r7   r8   r:   r:   T   s    ((((55r7   r:   c                  N    e Zd ZU ded<   ded<   ded<   ded<   ej
                  Zy)	LoweringRuleContextr&   contextSequence[jax_core.ShapedArray]avals_in	avals_outzSequence[BlockInfo | None]block_infosN)r0   r1   r2   r3   r4   replacer6   r7   r8   rD   rD   \   s%    **++))'r7   rD   c                  &    e Zd ZU dZded<   ded<   y)LoweringResultzKeeps python objects alive.z	ir.Modulemoduletuple[int, ...]gridN)r0   r1   r2   __doc__r3   r6   r7   r8   rL   rL   f   s    #
r7   rL   c                      e Zd Zy)LoweringErrorN)r0   r1   r2   r6   r7   r8   rR   rR   n   s    r7   rR   c                ~   t        | |j                  j                  d g| }t        d |D              }t	        j
                  |j                  |      }|j                  t        d      t        d |j                  D              rt        d      d t        fdt        ||j                        D              S )Nc              3  x   K   | ]2  }t        |t        j                  d t        j                               4 yw)r6   N)_ensure_ir_valuejax_coreShapedArrayjnpint32).0is     r8   	<genexpr>z"_eval_index_map.<locals>.<genexpr>x   s0      
 q(..r399=>s   8:z2Pipeline mode is not supported in Triton lowering.c              3  r   K   | ]/  }t        |t        j                        xr |j                  d k7   1 yw))r   r   N)
isinstancepallas_coreElementpaddingrZ   bs     r8   r\   z"_eval_index_map.<locals>.<genexpr>   s6      	
 K''(@QYY&-@@	s   57zDUnblocked indexing with padding is not supported in Triton lowering.c                $   |xxt         j                  d x\     | S  xt         j                  d x\     | S   t         j                  d x,\   t	        | t        |j                  | j                              S  	 t        dt        |             )Nr6   zUnsupported block dim type: )	r_   Squeezedr`   Blocked_mul_ir_constant
block_sizetype
ValueError)r[   rc   s     r8   _get_start_indexz)_eval_index_map.<locals>._get_start_index   s    
9!;!9 "$9K$7$7$99 %:9 A|ALL!&&9:: !7QyABBr7   c              3  6   K   | ]  \  }} ||        y wNr6   )rZ   r[   rc   rl   s      r8   r\   z"_eval_index_map.<locals>.<genexpr>   s"      !%Aq!s   )lower_jaxpr_to_triton_irindex_map_jaxprjaxprtupler
   tree_unflattenindex_map_out_treepipeline_modeNotImplementedErroranyrB   zip)ctxidxblock_mappingblock_indicesrl   s       @r8   _eval_index_mapr}   r   s     +	=((..7:-   - **&&7-  ,
<  	 	((	  N C 
 	-223 
 r7   c                H    ddt        fd| j                  D              S )Nc                    | xxt         j                  d x\     y xt         j                  d x\     y  t         j                  d x\   | j                  S  y )Nr6      )r_   re   r`   rf   ri   )rc   s    r8   _get_bdim_alignmentz1_get_index_alignment.<locals>._get_bdim_alignment   sX    
9!;!9 "$9K$7$7$99 %:9 || !r7   c              3  .   K   | ]  } |        y wrn   r6   )rZ   rc   r   s     r8   r\   z'_get_index_alignment.<locals>.<genexpr>   s     I!"1%I   )rc   zpallas_core.BlockDim)rr   rB   )r{   r   s    @r8   _get_index_alignmentr      s!     
I}/H/HI	IIr7   c                t   t         j                  j                  | j                        sB|s| S t	        j
                  t         j                  j                  || j                        |       S t        j                  | j                        j                  g |k(  r| S j                  t        |      k7  st        fdt        |      D              st        dj                   dg |       t	        j                  t         j                  j                  |j                  j                        |       S )Nc              3  J   K   | ]  \  }}j                   |   |d fv   ywr   Nshape)rZ   r[   dima_types      r8   r\   z_bcast_to.<locals>.<genexpr>   s+      ,(.3QC8#,s    #zCannot broadcast from  to )r   RankedTensorTyper^   rj   
tt_dialectsplatgetr   ranklenall	enumeraterk   	broadcastelement_typeencoding)ar   r   s     @r8   	_bcast_tor      s    				'	'	/hB//33E166BAFF  (F||xxh{{c%j  ,2;E2B, ) /~T(E(LMM
v':':FOOLa r7   c                   t        | t        j                  t        j                  t        t
        t        j                  f      r9|j                  }|j                  r|j                  }t        | t        |            } t        |t        j                  t        j                  t        t
        t        j                  f      r9|j                  }|j                  r|j                  }t        |t        |            }|j                  |j                  k7  rt        | |j                        } |j                  |j                  k7  rt        ||j                        }| |fS rn   )r^   npndarraynumberintfloatr   TypedNdArraydtype	weak_typerh   _dtype_to_ir_typer   r   )xyx_avaly_avalout_avalx_dtypey_dtypes          r8   _bcastr      s     "**biieX-B-B	C llGgQ)'23A"**biieX-B-B	C llGgQ)'23A\\X^^#!X^^$A\\X^^#!X^^$A	
A+r7   c                      fd}|S )Nc                    | t         <   | S rn   )triton_lowering_rules)fn	primitives    r8   wrapperz"register_lowering.<locals>.wrapper   s    '))$Ir7   r6   )r   r   s   ` r8   register_loweringr      s     
.r7   c                R   g }g }t        | j                        D ]6  \  }}|| j                  vs|j                  |       |j                  |       8 t	        | j                        D ]3  }| j                  |   }|j                  |       |j                  |       5 t        |d d       }d}|dz   t        |      k  r||dz      |kD  r|dz  }n|t        |      k  r||   |kD  r|dz  }|d | }||d  }	t        |      dk(  r>d gt        |	      z  }
t        t        |	            D ]  }t        ||	      |
||   <    |	|
fS t        j                  |      g|	}|d   dk  sJ d       d gt        | j                        z  }t        d|      }t        |      D ]4  \  }}||   }t        |      }t        ||d	      ||<   t        ||d	      }6 t        t        |	            D ]  }|||z      }t        |dz   |      ||<    t        |      t        | j                        k(  sJ ||fS )
Ni  r      r   iz7Cannot fix pallas kernel launch grid within CUDA limitsFsigned)r   rO   vmapped_dimsappendreversedr   range_program_idr   prod_i32_constant_mod	_floordiv)r*   launch_gridlaunch_grid_to_pallas_gridr[   sr   num_collapsecuda_yz_limitcollapse_dimsprog_id_dimsprog_idsnew_gridout_indicesgrid0out_idxs                  r8   _process_grid_to_3d_gridr      s   +! ))* +da))) ''*+ l//0 +c#Aq%%c*+
 ["%&,- Q[)),"#m3ALs;''L!M1ALm|,-\]+,1vL))H3|$% M0;A|0Lh)!,-M !!ii&66(	!y	  D
CD	  \..//+
a
"%& .da(+GaAq7KeQu-E	. \"# 8a()9:G&q1uh7K8 
[	S!2!23	33	3	;	r7   c                     t        j                         } | j                  t         j                         t	        j
                  |        | j                          | S rn   )r   JaxIrContextappend_dialect_registryupstream_dialectsr   register_dialectload_all_available_dialects)ry   s    r8   _new_ir_contextr     sC    #d445c"!!#	*r7   c                r    t        j                  d | D              }||dz
  z  dk(  }|st        d|        y )Nc              3  X   K   | ]"  }t        |t        j                        rd n| $ ywr   r^   r_   re   )rZ   ds     r8   r\   z%_check_tensor_size.<locals>.<genexpr>#  s,      # #1k&:&:;1B #s   (*r   r   zThe Pallas Triton lowering currently requires that all operations have array arguments and results whose size is a power of 2. Encountered an array of shape )r   r   rk   )r   size
power_of_2s      r8   _check_tensor_sizer   "  sV    	 #!# 
#$q!a'*	
	 	  
r7   c                   | j                   }|j                  rt        d      |j                  rt        d      | j                  |j
                     rt        d      t               5  t        j                  j                         5  t        j                  j                         }|j                  j                  }t        j                  |j                         }t        j"                  j%                  |      |d<   | j                  D cg c]?  }t&        j(                  j%                  t+        |j,                  j.                        d      A }}t1        | j2                        dk(  sJ t        j4                  j%                  |g       }	t'        j6                  |t        j8                  j%                  |	      dt        j:                  j%                  t=        t        j>                  j%                  d      	            t        j@                  jC                  |jD                        
      }
t        jF                  j%                  t        j:                  j%                  dt        jH                  d      i      gt1        |      z        |
_%         |
jD                  jL                  jN                  |	jP                    |
jD                  jL                  \  }t        j@                  |      5  tS        |      \  }}tU        |      D cg c]  \  }}||jV                  vr| }}}tY        t        j                  |j                         ||t        jZ                         |      }|j                  rt        d      |j\                  D cg c]H  }t_        |j`                  tc        |||      te        |      tg        d |jh                  D                    J }}tk        || |g|jl                   \   t'        jn                  g        d d d        tq        |      cd d d        cd d d        S c c}w c c}}w c c}w # 1 sw Y   7xY w# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nz7dynamic grid bounds not supported in the Triton backendz5scalar prefetch not implemented in the Triton backendz4scratch memory not implemented in the Triton backendsym_namer   r   publicF)noinline)sym_visibility	res_attrsiptt.divisibility    z1Scalar prefetch not supported in Triton lowering.c              3     K   | ]C  }t        |t        j                        rt        j                  nt        j                  |       E y wrn   )r^   r_   re   squeezed_get_block_dim_sizerb   s     r8   r\   z/lower_jaxpr_to_triton_module.<locals>.<genexpr>l  s@      8 -7q+:N:N,OK(($88;< 8s   A	A)9
debug_infonum_dynamic_grid_boundsrv   num_index_operandsinvarsslice_scratch_opsr   r   LocationunknownModulecreate	operation
attributesr   sanitize_name	func_name
StringAttrr   r   PointerTyper   avalr   r   outvarsFunctionTypeFuncOpTypeAttrDictAttrdictBoolAttrInsertionPointat_block_beginbody	ArrayAttri32_attr	arg_attrsblocksr   inputsr   r   r   r&   TracebackCachesblock_mappingsr:   
array_avalr}   r   rr   rB   ro   	argumentsreturn_rL   )rq   r*   r/   r   rM   attrsmodule_namevarparam_typesfn_typer   entryr   r,   r[   pidlocal_program_idsry   r{   rI   s                       r8   lower_jaxpr_to_triton_moduler  .  s   
 *))
A  $$
?  \\,001
>   4,"++--/ 4,YYF''E$$Z%9%9:K))+6E* << 	""#4SXX^^#DaHK  u}}"""oo!!+r2G			
 ++//$0F"GH++FKK8
B <<##	+T]]2->?	@A
k
	BL BGGNN7>>*ggnnGU			5	! 6|Dh "+.al///  
 


Z11
2
)4+?+?+A8c 
	(	(!?
 	
  ,::
  &&c;>"=1 8*668 8	
k 
 $CNeooNb78 &(+i4, 4, 4,
*
 14, 4, 4, 4, 4,sq   )QBQAP+FQ+P;P0"A&P;AP6
0P;Q	Q+Q0P;;Q QQ	QQ%c                V   i i dfd}dfd}dfd}|&t        |j                  |      D ]  \  }}|	||<    t        ||j                  |       |j                  D ]  }	t	        ||	j                        }
|	j
                  t        vr#t        d|	j
                  j                   d      t        |	j
                     }|	j                  D cg c]  }|j                   }}|	j                  D cg c]  }|j                   }}t	        ||	j                        }t        j                  | |	j
                  |	j                  j                  |	j                  j                        }t!        | |||      }	 t#        j$                  |	j                  j                        5  |5   ||g|
i |	j&                  }d d d        d d d        |	j
                  j2                  rt        ||	j                          ||	j                  d           t	        ||j                        S c c}w c c}w # 1 sw Y   yxY w# 1 sw Y   }xY w# t(        $ r  t*        $ rH}t,        j.                  j0                  s t	        d |
      }t)        d|	 d| d	| d
| d| 
      |d }~ww xY w)Nc                Z    t        | t        j                        r| j                  S |    S rn   )r^   rV   Literalval)atomenvs    r8   read_envz*lower_jaxpr_to_triton_ir.<locals>.read_env  s%    !$(8(89488Hs4yHr7   c                \    t        | t        j                        ry j                  |       S rn   )r^   rV   r  r   )r  block_info_envs    r8   read_block_info_envz5lower_jaxpr_to_triton_ir.<locals>.read_block_info_env  s'    $(()d##r7   c                    || <   y rn   r6   )r  r  r  s     r8   	write_envz+lower_jaxpr_to_triton_ir.<locals>.write_env  s    CHr7   z0Unimplemented primitive in Pallas GPU lowering: z?. Please file an issue on https://github.com/jax-ml/jax/issues.c                    t        | dd       S )Nrj   )getattrts    r8   <lambda>z*lower_jaxpr_to_triton_ir.<locals>.<lambda>  s    '!VT": r7   z Exception while lowering eqn:
  z
With context:
  z
With inval types=z
In jaxpr:
z
msg=r   )r  zjax_core.Atom)r  zjax_core.Var)rx   r   r"   eqnsmapr   r   rv   r(   r   r   r   source_info_to_locationsource_info
name_stack	tracebackrD   r   user_contextparamsrR   	Exceptionr   jax_pallas_verbose_errorsvaluemultiple_results)ry   rq   rI   argsr  r  r   invar
block_infoeqninvalsrulevrG   rH   eqn_block_infoslocrule_ctxoutvalseinval_typesr  r  s                        @@r8   ro   ro   w  s    	#.I$
  {; +z		 *u+ 
)U\\4(ZZ  )c3::&F
}}11
<]]  !JJK K !/D #

+1+H+!$-A-I--szz:O

&
&S]]COO66!!#C #3)_MH(()B)BC 8S 8x7&7CJJ78 8 }}%%ig.A(A )D 
Xu}}	%%5 ,-8 8 8 8  --33:FCk-cU 3Z*;-}UG L 		sU   H,7H1;)I$I'H6>II6H?;II	IJ( AJ##J(c               .     r n fdd fd}|S )Nc                      | i |fS rn   r6   )r2  kwfuns     r8   r%  zlower_fun.<locals>.<lambda>  s    d9Ib9I8K r7   c           
        t        j                  |t        j                  d	||            }t	        j
                  || j                        \  }}}t        j                  ||      }t        | g|d|i}
r|S |d   S )Nzpallas triton lower_funr   
call_jaxprr   )
lu	wrap_initr   r   petrace_to_jaxpr_dynamicrG   rV   ClosedJaxpr_closed_call_lowering_rule)ry   r2  r-  wrapped_funrq   _constsoutr   rB  r1  s           r8   	f_loweredzlower_fun.<locals>.f_lowered  s    ,,
F&&'@#'+V56K 00cllKE1f  /E
$S
B4
BE
BC"3.A.r7   ry   rD   r6   )rB  r1  rP  r   s   `` @r8   	lower_funrR    s     s$K"/ 
r7   c                    | t        d      vrt        d|        ||    dk(  rt        d      S t        j                  |       S )N   !axis must be in [0, 3), but got: r   r   )r   rk   r   r   get_program_id)axisr   s     r8   r   r     sJ    	q
8?
@@!		"	"4	((r7   c               4    | j                   j                  |   S rn   )rE   r,   ry   rW  s     r8   _program_id_lowering_rulerZ    s    		 	 	&&r7   c               b    |t        d      vrt        d|       t        j                  |      S )NrT  rU  )r   rk   r   get_num_programsrY  s     r8   _num_programs_lowering_ruler]    s2    	q
8?
@@		$	$T	**r7   c           	        t         j                  j                  |j                        r~t        j                  |j                        }t	        j
                  |j                        }t         j                  j                  |j                  |j                  |j                        }n)t	        j
                  |j                        j                  }t	        j                  || |||||      S )N)masksemscope)r   r   r^   rj   r   r   r   r   r   pointee_typer   
atomic_rmw)	opptrr  r_  semantic
sync_scopeptr_typer   result_types	            r8   _atomic_rmwrj    s     ##CHH-""388,H))(*?*?@L%%))1183D3DK ((2??K			2sCd

 r7   c                  | j                   ^}}|J |j                  |      \  }}}}	|j                  | j                        ^ }}
}t        |      }|rt	        |d   t
        j                        sQt        j                  || j                  d   j                        }|j                  t        j                  |             t        |      dk7  rt        d      |d   }t        |||      }t        ||
      }|	t        |	|      }	|t         j"                  j$                  k(  rt&        j(                  j$                  }n|t         j"                  j*                  k(  r\t	        |j,                  t.        j0                        rt&        j(                  j*                  }nBt&        j(                  j2                  }n&|t         j"                  j4                  k(  rt&        j(                  j4                  }n|t         j"                  j6                  k(  rt&        j(                  j6                  }n|t         j"                  j8                  k(  rt&        j(                  j8                  }n~|t         j"                  j:                  k(  rt&        j(                  j:                  }nF|t         j"                  j<                  k(  rt&        j(                  j<                  }nt        d|       t?        ||||	      S )Nr   r   z!Only single indexer is supported.zunsupported atomic operation: )r_  ) rI   	unflattenrG   listr^   r!   	NDIndexerr   get_transforms_shaper   r   make_trivial_indexerr   rv   _compute_pointers_from_indicesrU   r   AtomicOpTypeXCHGr   RMWOpADDrj   r   IntegerTypeFADDMINMAXANDORXORrj  )ry   	args_treeatomic_type	args_flatr4  rM  re  indexersr  r_  
value_aval	mask_aval	ref_shaperz   rd  s                  r8   _atomic_lowering_ruler    s>    ??.*q			&00;#xd'11#,,?1j)(^(	HRL(2D2DE**8S\\!_5J5JKIOOI229=>]a
A
BB#&sJ<#j)#	D),DJ++000					Bj--111#((BNN+b  bj--111					Bj--111					Bj--111					Bj--000					Bj--111					B
 >{mL
MM	Rc	--r7   c           	        | j                   \  }}}t        j                  j                  |j                        r~t        j                  |j                        }t        j                  |j                        }t        j                  j                  |j                  |j                  |j                        }	n)t        j                  |j                        j                  }	t        j                  |	|t        ||      t        ||      t
        j                  j                  t
        j                   j"                        S )N)r`  ra  )rG   r   r   r^   rj   r   r   r   r   r   rb  r   
atomic_casrU   MemSemanticACQUIRE_RELEASEMemSyncScopeGPU)
ry   re  cmpr  rM  cmp_avalval_avalrh  r   ri  s
             r8   _atomic_cas_lowering_ruler  !  s    ,,!Xx##CHH-""388,H))(*?*?@L%%))1183D3DK ((2??K				sH%sH%

 
 
0
0##''
 r7   c                   t        j                  |      }|\  }|j                  d   j                  }t	        j
                  d|      t	        j
                  d|      g}t        j                  ||f      }t        j                  t        j                  | t        j                  d| ||fi             |      \  }	}
t        j                  |	|      \  }}} |
       }~|rt        d      |D cg c]  }t        |j                          }}t#        j$                  ||      }|dz  } |j&                  d   j(                  j*                  | }t,        j.                  j1                  |      5  t3        |j4                  |d g|j6                   }t#        j8                  |       d d d        |j;                          t=        |j>                        S c c}w # 1 sw Y   3xY w)Nr   r6   r   zpallas triton associative_scanrD  z.Associative scan with constants not supported.r   ) r
   tree_leavesrG   r   rV   rW   tree_structurer   flatten_fun_nokwargsrF  rG  r   rH  rI  rv   _element_typerj   r   ScanOpregionsr  r   r   r   r   ro   rE   r
  scan_returnverifyrn  result)r  ry   r2  axes	flat_argsrW  r   in_avalsin_treeflat_funout_tree_thunkcombine_jaxprrM  rN  out_treeargelement_typesscan_opr  r  resultss                        r8   _associative_scan_loweringr  6  s   ##D))'4
,,q/

%2U+2U+( $$dD\2'%::ll
(()I)-d|RAB (N  66-F (
N
OO6?@s=*@-@i.'!+
*'//!

#
#
*
*K
8%	''. $&]D+0??G 7#	$
 
..	gnn	 A$ $s   1G/8GG"c               \    |rt        d      t        t        j                  | ||f      d   S )Nz Reverse cumsum is not supported.r   )rv   r  rX   add)ry   r   rW  reverses       r8   _cumsum_lowering_ruler  Z  s/     
@
AA	#CGGS!dW	=a	@@r7   c           	         | j                   \  }t        j                  |t        |j                  |j
                  j	                  d                   S Nr   )rG   arith_dialectxori_fullrj   r   )ry   r   r   s      r8   _not_lowering_ruler  c  s=    \\(6			AuQVVfll.?.?.B-BC	DDr7   T)frozenc                  <    e Zd ZU ded<   ded<   ded<   d	dZd
dZy)_ExternSequence[jax.typing.DTypeLike]	arg_typesr'   symbolri  c                    t        |      t        | j                        k7  ryt        d t        || j                        D              S )NFc              3     K   | ]o  \  }}|j                   t        j                   |      k(  xsD |j                  xr6 |j                   j                  t        j                   |      j                  k(   q y wrn   )r   rX   r   kind)rZ   r   arg_types      r8   r\   z"_Extern.matches.<locals>.<genexpr>r  sd       D( 	

cii)) 	LNNJtzz#))H2E2J2JJ	Ls   A5A7)r   r  r   rx   selfavalss     r8   matchesz_Extern.matcheso  sA    
5zS((  "%8  r7   c                   |j                   \  }g }t        |j                  || j                        D ]  \  }}}t	        t        ||      |j                        }|j                  rL|j                  t        j                  |      k7  r*t        ||j                  t        j                  |            }|j                  |        t        t        j                  | j                              }	|j                  r*t        j                  j!                  |j                  |	      }	t#        j$                  |	|dd| j&                  d      S )N T)libnamelibpathr  pure)rH   rx   rG   r  r   rU   r   r   r   rX   _castr   r   ri  r   r   r   r   extern_elementwiser  )
r  ry   r2  r   
bcast_argsr   r  r  	bcast_argri  s
             r8   lowerz_Extern.lowerx  s    JXJ"3<<t~~F #c8,S$7Hi	DJJ#))H*==)TZZ81DE		"	# $CIId.>.>$?@K~~''++HNNKHk(({{ r7   Nr  rF   returnboolry   rD   r2  r+   r0   r1   r2   r3   r  r  r6   r7   r8   r  r  i  s    ++
+r7   r  c                  2    e Zd ZU ded<   ded<   ddZd	dZy)
	_Fallbackr  arg_classesCallable[..., ir.Value]rd  c                    t        |      t        | j                        k7  ryt        d t        || j                        D              S )NFc              3  b   K   | ]'  \  }}t        j                  |j                  |       ) y wrn   )rX   
issubdtyper   )rZ   r   	arg_classs      r8   r\   z$_Fallback.matches.<locals>.<genexpr>  s+      D) 	tzz9-s   -/)r   r  r   rx   r  s     r8   r  z_Fallback.matches  sC    
5zS))** "5$*:*:;  r7   c           	         |j                   \  }g }t        |j                  |      D ]4  \  }}|j                  t	        t        ||      |j                               6  | j                  | S rn   )rH   rx   rG   r   r   rU   r   rd  )r  ry   r2  r   r  r   r  s          r8   r  z_Fallback.lower  sc    JXJt, P	c	"23"=x~~NOP477D>r7   Nr  r  r  r6   r7   r8   r  r    s    --r7   r  c                $     	 	 	 	 	 	 d fd}|S )Nc                      j                   j                     }t         fd|D        d       }|-t        d  j                  D              }t        d d|        |j                   g| S )Nc              3  Z   K   | ]"  }|j                  j                        s| $ y wrn   )r  rG   )rZ   r=  ry   s     r8   r\   z6_make_dispatch_table.<locals>.inner.<locals>.<genexpr>  s      :A!))CLL"9a:s    ++c              3  4   K   | ]  }|j                     y wrn   r  )rZ   r   s     r8   r\   z6_make_dispatch_table.<locals>.inner.<locals>.<genexpr>  s     BTdjjBs   zunsupported types for z: )rE   r/   nextrr   rG   rv   r  )ry   r2  rM  tableharg_aval_dtypesr(   tabless   `     r8   innerz#_make_dispatch_table.<locals>.inner  sz     3;;''(E::DAAyBS\\BBo"4&?*;
<  1773r7   )ry   rD   r2  ir.Valuer  r  r6   )r(   r  r  s   `` r8   _make_dispatch_tabler    s'    

'/

 
,r7   abs__nv_abs
__nv_llabs
__nv_fabsf	__nv_fabs__ocml_fabs_f32__ocml_fabs_f64)cudarocmceil
__nv_ceilf	__nv_ceil__ocml_ceil_f32__ocml_ceil_f64floor__nv_floorf
__nv_floor__ocml_floor_f32__ocml_floor_f64exp	__nv_expf__nv_exp__ocml_exp_f32__ocml_exp_f64exp2
__nv_exp2f	__nv_exp2__ocml_exp2_f32__ocml_exp2_f64expm1__nv_expm1f
__nv_expm1__ocml_expm1_f32__ocml_expm1_f64log	__nv_logf__nv_log__ocml_log_f32__ocml_log_f64log1p__nv_log1pf
__nv_log1p__ocml_log1p_f32__ocml_log1p_f64sqrt
__nv_sqrtf	__nv_sqrt__ocml_sqrt_f32__ocml_sqrt_f64pow
__nv_powif	__nv_powi	__nv_powf__nv_pow__ocml_pown_f32__ocml_pown_f64__ocml_pow_f32__ocml_pow_f64cbrt
__nv_cbrtf	__nv_cbrt__ocml_cbrt_f32__ocml_cbrt_f64rsqrt__nv_rsqrtf
__nv_rsqrt__ocml_rsqrt_f32__ocml_rsqrt_f64sin	__nv_sinf__nv_sin__ocml_sin_f32__ocml_sin_f64cos	__nv_cosf__nv_cos__ocml_cos_f32__ocml_cos_f64tan	__nv_tanf__nv_tan__ocml_tan_f32__ocml_tan_f64asin
__nv_asinf	__nv_asin__ocml_asin_f32__ocml_asin_f64acos
__nv_acosf	__nv_acos__ocml_acos_f32__ocml_acos_f64atan
__nv_atanf	__nv_atan__ocml_atan_f32__ocml_atan_f64atan2__nv_atan2f
__nv_atan2__ocml_atan2_f32__ocml_atan2_f64sinh
__nv_sinhf	__nv_sinh__ocml_sinh_f32__ocml_sinh_f64cosh
__nv_coshf	__nv_cosh__ocml_cosh_f32__ocml_cosh_f64tanh
__nv_tanhf	__nv_tanh__ocml_tanh_f32__ocml_tanh_f64asinh__nv_asinhf
__nv_asinh__ocml_asinh_f32__ocml_asinh_f64acosh__nv_acoshf
__nv_acosh__ocml_acosh_f32__ocml_acosh_f64atanh__nv_atanhf
__nv_atanh__ocml_atanh_f32__ocml_atanh_f64population_count	__nv_popc__nv_popcll__ockl_popcount_u32__ockl_popcount_u64clz__nv_clz
__nv_clzll__ockl_clz_u32__ockl_clz_u64	nextafter__nv_nextafterf__nv_nextafter__ocml_nextafter_f32__ocml_nextafter_f64c                    t        |      S rn   )_minusry   r   s     r8   r%  r%  X  s
    fQi r7   c                    t        ||      S rn   )rg   rz  s     r8   r%  r%  a  s    a r7   c                    t         j                  j                  t        | j                              rt        d| j                         t        t        |       |       S )Nzunsupported type: )r   r   r^   r  rj   rv   _sub_zeros_liker   s    r8   ry  ry  x  sG    &&}QVV'<=
 2166(;
<<	k!na	  r7   c                2   t        | j                        }t        |j                        }t        j                  j	                  |      rBt        j                  j	                  |      rJ t        j
                  | j                  | |      S t        j                  j	                  |      r!t        j
                  |j                  ||       S | j                  |j                  k(  s/J t        | j                        t        |j                        f       t	        |t        j                        rt        j                  | |      S t	        |t        j                        rt        j                  | |      S t        d| j                   d|j                         Nzunsupported dtypes:  and )r  rj   r   r   r^   addptrr'   r   rw  r  addi	FloatTypeaddfrv   r   r   x_element_typey_element_types       r8   _addr  ~  s%    (. (.&&~6%%00@@@QVVQ**&&~6QVVQ**	
166	5CKQVV55	/a##-a##2166(%xHIIr7   c                   t        | j                        }t        |j                        }t        j                  j	                  |      r*t        j
                  | j                  | t        |            S t        j                  j	                  |      s| j                  |j                  k(  s/J t        | j                        t        |j                        f       t	        |t        j                        rt        j                  | |      S t	        |t        j                        rt        j                  | |      S t        d|j                         )Nzunsupported dtype: )r  rj   r   r   r^   r  ry  r'   r   rw  r  subir  subfrv   r  s       r8   r}  r}    s     (. (.&&~6QVVQq	22!!,,^<66QVV7c!&&k3qvv;77."..11%%	NBLL	11%%1!&&:;;r7   c                   | j                   |j                   k(  s/J t        | j                         t        |j                         f       t        | j                         }t        |t        j
                        rt        j                  | |      S t        |t        j                        rt        j                  | |      S t        d| j                    d|j                          Nunsupported types: r  )rj   r'   r  r^   r   rw  r  mulir  mulfrv   )r   r   r  s      r8   rg   rg     s    	
166	5CKQVV55	 (./a##.",,/a##1!&&qvvhGHHr7   c                  | j                   |j                   k(  s/J t        | j                         t        |j                         f       t        | j                         }t        |t        j
                  t        j                  f      rt        j                  | |      S t        |t        j                        s%t        d| j                    d|j                          |rt        j                  | |      S t        j                  | |      S r  )rj   r'   r  r^   r   F32TypeF64Typer  divfrw  rv   divsidivuir   r   r   r  s       r8   r   r     s    	
166	5CKQVV55	 (.RZZ 89a##	NBNN	3
 3AFF85I
JJq!$$q!$$r7   c               .   | j                   |j                   k(  s/J t        | j                         t        |j                         f       t        | j                         }t        |t        j
                        r:t        j                  j                         }t        | ||      } t        |||      }t        |t        j                  t        j                  f      rt        j                  | |      S t        d| j                    d|j                          )Nr   r  r  )rj   r'   r  r^   r   rw  r  r   _int_float_castr  r  r  rv   r  s       r8   _truedivr    s    	
166	5CKQVV55	 (./ZZ^^%N>&9A>&9ARZZ 89a##1!&&qvvhGHHr7   c                  | j                   |j                   k(  s/J t        | j                         t        |j                         f       t        | j                         }t        |t        j
                        rt        j                  | |      S t        |t        j                        s%t        d| j                    d|j                          |rt        j                  | |      S t        j                  | |      S r  )rj   r'   r  r^   r   r  r  remfrw  rv   remsiremuir  s       r8   r   r     s    	
166	5CKQVV55	 (.-a##	NBNN	3
 3AFF85I
JJq!$$q!$$r7   c                  | j                   |j                   k(  s/J t        | j                         t        |j                         f       t        | j                         }t        |t        j
                        r!t        j                  |r|| |      S || |      S t        |t        j                        rt        j                  || |      S t        d| j                    d|j                          r  )rj   r'   r  r^   r   rw  r  cmpir  cmpfrv   )r   r   si_predui_predf_predr   r  s          r8   _cmpr    s     
166	5CKQVV55	 (./gaCCWaCC.",,/fa++
 3AFF85I
JJr7   )r  r  r  c                `    t        j                  t         j                  j                  | |       S rn   )r  r  CmpFPredicateUNOr  s    r8   _is_nanr    s#    			M77;;Q	BBr7   c                `    t        ||g| j                  | j                   \  }} |||      S rn   )r   rG   rH   )ry   r   r   r   s       r8   signless_ruler    s/    !Q666DAqa8Or7   c                    | j                   \  }}t        ||g| j                   | j                   \  }} |||t        j                  |j
                  t        j                              S Nr   )rG   r   rH   rX   r  r   signedinteger)ry   r   r   r   r   rM  s         r8   signed_ruler  -  sU    IFA!Q666DAqa3>>&,,8I8IJKKr7   c                  ~~|rt        d      |rt        d      t        j                  ||	|      \  }	}
|
rt        d      t	        j
                  d| dd|	t        j                  j                  | j                  D cg c]0  }t        j                  |j                  t        j                        2 c}             yc c}w )Nz/Ordered debug_print is not supported on Pallas.zFpl.debug_print() does not support placeholders when lowering to TritonzAOnly positional arguments are supported by debug_print on Pallas. F)hexr2  	is_signedr6   )rv   rk   r   merge_callback_argsr   print_r   DenseI32ArrayAttrr   rG   rX   r  r   r  )ry   fmtorderedpartitionedr  static_argsnp_printoptionshas_placeholderslogging_recordr2  kwargsr   s               r8   debug_print_lowering_ruler  5  s     ?
O
PP
P  ..wkJ,$
K  	#aj
$$((DGLL*<@#..S%6%6
7* 		 
	*s   5Cc                   t         j                  j                  |       s|| j                  j                  |<   y t        j                  |       }|d|j
                   z  }|j                  }|j                  j                  d   |k(  }|sy |j                  j                  x}r+t        |t        j                        s||j                  |<   y y y )N_argr   )r   BlockArgumentr^   ownerr   
arg_numberregionr  r   r   r   )r8  r(   attrr  r  is_entryrd  s          r8   	_set_attrr  Z  s    				$	$Q	'#AGGt

#D 
!!$
))%\\  #u,(	
KK!!!b!:b*:K:K+LBMM$ ,M!r7   c           
        | j                   \  }t        dt        |j                              t        |      k(  sJ t	        |dt
        j                  j                  t        j                  |t        j                                     |S )Nr   r   r  )rG   maxr   r   r  r   DenseIntElementsAttrr   r   asarrayrY   ry   r   valuesr   s       r8   _multiple_of_ruler  i  sg    \\(6	QFLL!	"c&k	11	1!!"**V288"DE
 
(r7   c           
        | j                   \  }t        |j                        t        |      k(  sJ t        |dt        j
                  j                  t        j                  |t        j                                     |S )Nztt.contiguityr  )
rG   r   r   r  r   r  r   r   r  rY   r  s       r8   _max_contiguous_ruler  u  s`    \\(6	V\\	c&k	))	)!!"**V288"DE
 
(r7   c                J    | j                   \  }t        t        ||      |      S rn   )rG   r   rU   )ry   r   r   r   s       r8   _broadcast_to_ruler    s"    ll)6	#Av.	66r7   c                  |dk(  rt        |      S |dk  }|r| }d }|dkD  r8t        |d      \  }}|r||nt        ||      }|dkD  rt        ||      }|dkD  r8|J | j                  \  }| j                  \  }t        ||j                  |j                        }|rEt        j                  |j                  t        j                        }t        t        |      ||      S |S )Nr   r   r   )
_ones_likedivmodrg   rG   rH   r  r   rX   r  r  r  )	ry   r   r   is_reciprocalaccmodr   r   r   s	            r8   _integer_pow_ruler    s    !Va=a%-	
A#	AAq\FAs
A$sA,c1u
q!*a 	
A 
\\(6}}*8c6<<0#^^HNNC,=,=>FZ_c&99Jr7   c                V    t        j                  t        j                  | |      |      S rn   )rX   minimummaximum)minr   r  s      r8   r%  r%    s    S[[S!1Dc%J r7   c                :    ddt        j                  |        z   z  S Nr   )rX   r  )r   accuracys     r8   r%  r%    s    Q!_(= r7   c                ~    t        j                  t        j                  |        t        j                  |              S rn   )rX   logical_andisnanisinfr  s    r8   r%  r%    s%    s		!}syy|mL r7   F)r1  c                N   | j                   \  }}t        ||g| j                   | j                   \  }}t        j                  |j
                  t        j                        rt        j                  ||      S t        j                  |j
                  t        j                        s%t        d|j
                   d|j
                         t        j                  |j
                  t        j                        rt        j                  ||      S t        j                  ||      S r  )rG   r   rH   rX   r  r   floatingr  minnumfintegerrv   r  minsiminuiry   r   r   r   r   s        r8   _min_lowering_ruler         <<.&&	1	4s||	4cmm	4$!Q^^FLL#,,/  A&&	ckk	2

v||nE&,,@  	^^FLL#"3"34q!$$q!$$r7   c                N   | j                   \  }}t        ||g| j                   | j                   \  }}t        j                  |j
                  t        j                        rt        j                  ||      S t        j                  |j
                  t        j                        s%t        d|j
                   d|j
                         t        j                  |j
                  t        j                        rt        j                  ||      S t        j                  ||      S r  )rG   r   rH   rX   r  r   r  r  maxnumfr  rv   r  maxsimaxuir  s        r8   _max_lowering_ruler    r  r7   c                   | j                   \  }}t        ||g| j                   | j                   \  }}t        j                  |j
                  t        j                        xs. t        j                  |j
                  t        j                        }t        j                  |j
                  t        j                        s.t        j                  |j
                  t        j                        rt        |||      S t        |||      S r  )rG   r   rH   rX   r  r   r  r   r  r  r   )ry   r   r   r   r   r   s         r8   _div_lowering_ruler    s    <<.&&	1	4s||	4cmm	4$!Q>>&,,(9(9: cnnllC%%?& 	^^FLL"++.#..llBKK3 Aq((	1a	''r7   c                   t        d||         }t        |t        j                  |      }t	        t        |            D ]  }||k7  s	t        ||      } t        ||      S r  )_make_ranger  rX   rY   r   r   _expand_dimsr   )ry   r   r   	dimensionshardingiotar[   s          r8   _iota_lowering_ruler    sa     
Qi(	)$	tSYY	&$U #aI~$"d# 
4	r7   c                    t         j                  j                  |       rt        j                  |       j                  S | S rn   )r   r   r^   r   r#  s    r8   r  r    s2    ##A&q!...Hr7   c                   || k  rt        d| d|        t        | |      dk\  rt        d      t        j                  t        j
                  j                  || z
  gt        j                  j                  d            | |      S )Nz)end must be greater than start, but got: z <=         zstart and end must fit in int32r   )	rk   r  r   
make_ranger   r   r   rw  get_signless)startends     r8   r  r    s    E\

3C5UGD  	_
6
77			sU{mR^^-H-H-LM	
 r7   c                z   t        |       }t        |t        j                        r t	        j
                  |t        |            }n@t        |t        j                        r t	        j
                  |t        |            }nt        t        j                  j                  |       rt        j                  | |      S |S rn   )r  r^   r   rw  r  constantr   r  r   rv   r   r   r   )r$  r8  r   r  s       r8   r  r  	  s    q!,bnn-##L#a&9F,-##L%(;F
##A&Av&&Mr7   c                    t        | d      S r  r  r#  s    r8   _zerosr
        	q!r7   c                .    t        | j                  d      S r  r  rj   r  s    r8   r~  r~        	qvvq	r7   c                    t        | d      S r  r	  r#  s    r8   _onesr     r  r7   c                .    t        | j                  d      S r  r  r  s    r8   r  r  $  r  r7   c                    t         j                  j                  | j                        rt	        d      |s| S t        j                  t         j                  j                  || j                        |       S )Nzcannot splat a tensor)r   r   r^   rj   	TypeErrorr   r   r   )r   r   s     r8   _splatr  (  sX    ##AFF+
+
,,	H			"--11%@!	DDr7   c                    t         j                  j                  | j                        sPt	        t        j                  | j                        j
                        }|j                  |d       t        | |      S t        j                  | |      S r  )
r   r   r^   rj   rn  r   insertr  r   expand_dims)r   rW  r   s      r8   r  r  0  sg    				'	'	/$$QVV,223E	LLq!U			4	((r7   c                   t        j                  t        | j                              }t        j                  t        |            }|j                  dk(  s|j                  dk(  rC|j                  dkD  rt
        j                  j                  nd }t        j                  || |      S |j                  |j                  kD  rt        j                  ||       S |j                  |j                  k  rt        j                  ||       S t        )N   )rounding)r   r  r  rj   widthr   RoundingModeRTNEfp_to_fpr  truncfextfrv   )srcdst_typesrc_element_typedst_element_typer  s        r8   _float_float_castr%  8  s    \\-"9:\\-"9:q $4$:$:a$?(8(>(>(B
$$  xx@@.444#.. 0 6 66h,,
r7   c                   t        j                  t        | j                              }t        j                  t        |            }||k7  sJ |j                  dk(  rt        | t        |       |      S |j                  |j                  k(  rt        j                  ||       S |j                  |j                  kD  rt        j                  ||       S |r%|j                  dk7  rt        j                  ||       S t        j                  ||       S )Nr   r   )r   rw  r  rj   r  
_not_equalr~  r  bitcasttrunciextsiextuir!  r"  r   r#  r$  s        r8   _int_int_castr-  H  s    ^^M#(($;<^^M($;<	-	--	-q c;s+F;;/555  3// 0 6 66#.."((A-x--x--r7   c                  t        | j                        }t        |t        j                  t        j
                  t        j                  t        j                  f      st        d|  d|       t        j                  t        |            }|j                  dk(  rt        | t        |       |      S |r)d|j                  dz
  z  dz
  }d|j                  dz
  z   }nd|j                  z  dz
  }d}t        j                  | t        | j                  |            } t        j                   | t        | j                  |            } |rt        j"                  ||       S t        j$                  ||       S )Ncannot cast  tp r   r   r   r   )r  rj   r^   r   BF16TypeF16Typer  r  rv   rw  r  r'  r~  r  minimumfr  maximumffptosifptoui)r!  r"  r   r#  r$  maxintminints          r8   _float_int_castr9  Y  s<    #388,	$r{{BJJ

BJJ&W	X
SEhZ@
AA^^M($;<q c;s+F;;
 #))!+,q0f$**1,--f"(((1,ff

 
 eCHHf&=
>C

 
 eCHHf&=
>C!!(C00!!(C00r7   c                  t        j                  t        | j                              }t        |      }t	        |t         j
                  t         j                  t         j                  t         j                  f      st        d|  d|       |j                  dk(  s|st        j                  ||       S t        j                  ||       S )Nr/  r0  r   )r   rw  r  rj   r^   r1  r2  r  r  rv   r  r  uitofpsitofpr,  s        r8   r  r  t  s     ^^M#(($;<"8,	bjj"**bjjI
 SEhZ@
AAq #..#..r7   c           	         t        | t        |      t        j                  |t        j                        t        j                  |t        j                              S )Nr   
dst_signed)_ir_castr   rX   r  r  )r!  src_typer"  s      r8   r  r    sD    
 
	!^^Hc&7&78#*;*;<	
 r7   )r?  c                  t         j                  j                  | j                        rst         j                  j                  |      sTt        j                  | j                        }t         j                  j	                  |j
                  ||j                        }| j                  |k(  r| S t        | j                        }t        |      }t        |t         j                        st        |t         j                        rt        d      t        |t         j                  t         j                  f      rQt        |t         j                        s7t        t        | t         j                  j	                         d      |d|      S t        |t         j                        r&t        |t         j                        rt        | |      S t        |t         j                         r(t        |t         j                         rt#        | ||      S t        |t         j                        r(t        |t         j                         rt%        | ||      S t        |t         j                         r(t        |t         j                        rt'        | ||      S t(        j*                  j                  |      rt        |t         j                         r|j,                  dk(  rt)        j.                  ||       S |j,                  dk(  rPt        | t         j                   j1                  d      |      }t3        |      }t        t5        |||      ||      S t        |t         j                         r5t(        j*                  j                  |      rt)        j6                  ||       S t(        j*                  j                  |      r5t(        j*                  j                  |      rt)        j8                  ||       S t        d|  d|       )	Nz&cannot cast from or to float8_e4m3fnuzFr   r>  @   r   r/  r   )r   r   r^   rj   r   r   r   r  Float8E4M3FNUZTyperv   r2  r1  r  r@  r  r%  rw  r-  r9  r  r   r   r  
ptr_to_intr  r~  r'  
int_to_ptrr(  )	r!  r"  r   r?  rA  r#  r$  r   zeros	            r8   r@  r@    s   ##	hh**84""388,H""&&H
 	XXJ"388,"8, ""7"78J--= F
GG 2::r{{";<Z

F bjjnn&u5: 
  ",,/J5 S(++ "..1j7 hv66 ",,/J5 3<< "..1j7 388&&'78Z> #""8S11			1	$
333B7
Ga^djD8(6RR))*:;  3//&&))*:;h,,l3%tH:>??r7   c                   | j                   \  }t        ||      }||j                  k(  r|S t        ||j                  |      S rn   )rG   rU   r   r  )ry   r   	new_dtyper   r  r   s         r8   #_convert_element_type_lowering_rulerJ    s?     \\(6q&!!&,,H	q&,,		**r7   c                    | j                   \  }}}| j                  \  }t        |||||      \  }}t        |||||      \  }}t        j                  |||      S rn   )rG   rH   r   r  select)ry   predr   r   	pred_avala_avalb_avalr   s           r8   select_n_lowering_rulerQ    s^    !ll)VV}}*84Ivx8'$4Ivx8'$			dAq	))r7   c               .   ~t        |g| j                   }t        j                  j	                  |j
                        st        ||      S t        t        |            D cg c]	  }||vs| }}|D ]  }t        ||      } t        ||      S c c}w rn   )
rU   rG   r   r   r^   rj   r   r   r   r  )ry   r   broadcast_dimensionsr   r  r[   r  r   s           r8   _broadcast_in_dim_lowering_rulerT    s     q(3<<(!				'	'	/Q!#e*-Oq:N1NO+O cQA	1e	 Ps   #	B-Bc               $    ~t        | |d d d       S )N)	new_sizes
dimensionsr  )_reshape_lowering_rule)ry   r   rW  s      r8   _squeeze_lowering_rulerY    s    	Q$4RV	WWr7   c          
     >   ~|t        d      S t        |g| j                   }| j                  \  }| j                  \  }|j                  dk(  r9t        t        j                  | |t        t        |j                                    S t        ||j                        S )Nz`dimensions` is not supported.r   r  )rk   rU   rG   rH   ndim_reduce_loweringrX   r  rr   r   _reshaper   )ry   r   rV  rW  r  rO  r   s          r8   rX  rX    s     677q(3<<(!\\(6}}*8]]aCGGS!%fkk8J2KLL	!X^^	$$r7   c                h   t         j                  j                  | j                        s t	        d |D              sJ t        | |      S t        j                  | j                        }t        j                  t         j                  j                  ||j                  |j                        | d      S )Nc              3  &   K   | ]	  }|d k(    ywr   r6   )rZ   dim_sizes     r8   r\   z_reshape.<locals>.<genexpr>  s     3x1}3s   F)allow_reorder)r   r   r^   rj   r   r  r   reshaper   r   r   )r   r   tys      r8   r^  r^    s    				'	'	/3U3333!U	166""			eR__bkkB
 r7   c                    | j                   }|j                  d       t        j                  j	                  || j
                  | j                        S )Nr   )r   r   r   r   r   r   r   )old_typer   s     r8   get_join_typerg    s=    
..%,,q/				 	 (=(=x?P?P	QQr7   c                  t        |      dk7  rt        d      | j                  \  }}|\  }}||j                  dz
  k7  rt        d      |j                  d   dk7  s|j                  d   dk7  rt        d      t        ||j                  d d       }t        ||j                  d d       }t        t        j                  |j                              }	t        j                  |	||      S )Nr   z)Only 2-argument concatenate is supported.r   z7Only concatenate along the last dimension is supported.rl  z1Only arguments with shape [..., 1] are supported.)r   rv   rG   r\  r   r^  rg  r   r   rj   r   join)
ry   r  r2  r   r   r   r   lhsrhsret_types
             r8   _concatenate_lowering_rulerm  "  s    Y!^
I
JJ<<.&&	$!Q&++a-
A  \\"fll2.!3
;  	FLL"%&#FLL"%&#2..sxx89(	3	,,r7   c                  	 t              }|t        j                  |      k7  rt        d      t	        fdD              rt        d      fd}|f}t        |      |k  r%t        t        ||      d      }t        |      |k  r%|S )Nz$Only power-of-2 num parts supported.c              3  .   K   | ]  }|d    k7    yw)r   Nr6   )rZ   r   sizess     r8   r\   z'_split_lowering_rule.<locals>.<genexpr>=  s     ,dq	,r   z&Only equal-sized splits are supported.c                Z   t        j                  | j                        j                  }t	        | |d  d|   dz  gz   |dz   d  z         } t        fdt        t        |      dz         D              fz   }t        t        j                  t        j                  | |                  S )Nr   r   c              3  .   K   | ]  }|k7  s	|  y wrn   r6   )rZ   r   rW  s     r8   r\   z=_split_lowering_rule.<locals>.split_into_2.<locals>.<genexpr>C  s     FaAIF   
)r   r   rj   r   r^  rr   r   r   r   splittrans)r   r   permutationrW  s      r8   split_into_2z*_split_lowering_rule.<locals>.split_into_2@  s    '--EE%4LAuT{a'7#885;LLMAF5Ua#8FF$PK!!*"2"21k"BCDDr7   r6   )r   pallas_utilsnext_power_of_2rv   rw   sumr'  )ry   r   rp  rW  	num_partsrw  x_partss     ``   r8   _split_lowering_ruler}  6  s    %j),..y99
D
EE,e,,
F
GGE D'Gy #lG,b1G 	Gy 	.r7   c                	   | j                   j                  }t        d | j                  D              }t	        j
                  |      }|j                         }|j                  }t        |       |j                  }|t        |      d  }d}	t        |      |z   t        |      k(  sJ t        | j                        t        |      k(  sJ t        j                  | j                   j                        }
t        j                  |      |
j                   z  }t"        j$                  j'                  |dkD  rdnd      }|r*t)        t"        j*                  j-                  ||            }nt/        d|      }t1        |      }t3        || j                  | j                        D ]M  \  }}}|xt4        j6                  d x\    t/        d|      }n t8        d x\   t;        |      }n t=        t>              r t@        jB                  jE                  ||      }t=        |t@        jB                        rJ|jF                  s|jH                  dk7  r|jJ                  }|jF                  st/        ||      }tM        ||d	      }tM        tO        d|jP                        |d	      }|jH                  dk7  r*tS        |tU        |jV                  |jH                              }tY        t[        ||jP                  g      |      }n;tO        |jJ                  |jJ                  |jP                  z         }tM        ||d	      }|	dz  }	||	d  D ]7  }t#        j*                  |jV                        j\                  }t_        ||      }9 n|}t=        |t"        j`                        st/        ||      }tM        ||d	      }t"        j*                  j=                  |jV                        r<|D ]7  }t#        j*                  |jV                        j\                  }t_        ||      }9 t"        j*                  j=                  |jV                        rQt#        j*                  |jV                        j\                  }tc        t        |      |z
        D ]  }t_        |d      } t[        ||      }|$tM        ||d	      }tY        |t[        ||            }tS        |tU        |jV                  |            }tY        ||      }P |S )
Nc              3  P   K   | ]  }t        |t        j                           y wrn   r   rb   s     r8   r\   z0_compute_offsets_from_indices.<locals>.<genexpr>P  s%      ; %Q(<(<= ;s   $&r   r  rC  r   r6   r   Fr   )2r<   r   rz  rB   rx  strides_from_shapeget_indexer_shapeint_indexer_shaper   indicesr   r>   rX   r   r   r   itemsizer   rw  r  r
  r   r   rh   iterrx   r_   re   r   r  r^   slicer   Slice
from_sliceis_dynamic_startstrider  r@  r  r   rg   r  rj   r  r   r   r  Valuer   )r4  
nd_indexer
full_shapenum_squeezed_dimsstridesindexer_shaper  r  other_shapeother_shape_idxarray_dtype	full_sizeoffset_eltypeoffsetsindexer_iter
dim_stridedim_block_sizestart_offsetindexr  r  dim_offsetsrM  r   s                           r8   _compute_offsets_from_indicesr  L  s0    **00* ;#-#9#9; ;++J7'..0- 22]#'c"3467+/	W)	)S_	<<	<	Z%%	&#j/	99	9		*55;;<+ii
#k&:&::)..--I4Eb2N-R((,,]MJKG1m,Gg,25z%%z'?'?3 6).j., !;!Q. "5\"  %))%@e%))*			ELLA$5%%um4%e<Auzz2M%P<<1dE$))U\\:;$9UUZZL94@5;;ejj(@At]5Ao?+, 6!"";#3#3499";56
 kRXX.";>[-Fk				'	'(8(8	9 	8A$$[%5%56;;$$[$7+	8 
%%k&6&67  !1!1277dS'$./ 3!";23K7KlM%Hlim&LMk{E+*:*:J$GHK7K(Gm6)p 
.r7   c                b    t        ||      }t        t        | |j                               |      S rn   )r  r  r   r  )root_ptrr4  r  r  s       r8   rr  rr    s-     **jA'	i*">">"@A7	KKr7   c                   t        j                  ||      }t        j                  j	                  |j
                        st        |      dk(  sJ |S t        j                  ||d d f      \  }}t        | g||d d ddS )Nr   F)r~  eviction_policycache_modifieris_volatile)	r
   rs   r   r   r^   rj   r   tree_flatten_masked_load_lowering_rule)ry   re  treerz   r  r  r~  s          r8   _get_lowering_ruler    s    %%dC0(				*	*388	4x=AJ"//hd0KL)Y	#	

 
 r7   )r  r  r  c                  |t         j                  j                  }n"|dk(  s|dk(  r
t        |   }nt	        d|       |t         j
                  j                  }n
	 t        |   }t         j                  j                  | j                        rSt        j                  | j                        }t        j                  j                  |j                        rt        d      t!        | j                        }t         j                  j                  |      st	        d|       t        j                  |      }||t	        d      t        j                  j                  | j                        sl|4t        j                  j                  |j                        rt	        d      |4t        j                  j                  |j                        rt	        d	      |j                  }t        |t        j"                        xr |j$                  d
k(  }|rUt        j"                  j'                  d      }t)        | t         j                  j+                  ||j,                        d      } |t)        ||d      }t        j.                  | |||||      }	|s|	S t)        |	t        j"                  j'                  d
      d      S # t        $ r t	        d|       d w xY w)N.caz.cgunsupported cache modifier: unsupported eviction policy: -loading from a block pointer is not supportedunsupported pointer type: z"other requires mask to be providedz1other cannot be a block if pointer is not a block0mask cannot be a block if pointer is not a blockr   r  Fr   )r_  othercacheevictr  )r   CacheModifierNONE_STR_TO_CACHE_MODIFIERrk   EvictionPolicyNORMAL_STR_TO_EVICTION_POLICYKeyErrorr   r^   rj   r   r   rb  rv   r  rw  r  r  r@  r   address_spaceload)
re  r_  r  r  r  r  rh  rb  is_int1r  s
             r8   _loadr    s    --22N.E"9+N;N
3N3CD
EE //66O/@o &&sxx0%%chh/H	%%h&;&;< OPP388$(				*	*8	4
1(<
==##H-(
4<
9
::				'	'	1R00;;EJJGJKKB//::499EIJJ&&,|R^^4P9K9Kq9P'>>..q1L
""<1G1GHC UL7E??	&   FBNN77:5IW  )/):
;s   	K Kc                   | j                   j                  t        j                  t        j                  fv xr | j
                  xr | j
                  d   dz  dk(  xr t        |j                  d   x}t        j                        xro t        |j                  t              xrS t        |j                  t              xr7 |j                  dz  dk(  xr# |j                  dz  dk(  xr |j                  dk(  S )z>Returns True if the block is contiguous in the last dimension.rl  r   r   r   )r<   r   rX   int4uint4r@   r^   r  r!   r  r  r   r   r  )r4  r  slcs      r8   _is_contiguous_int4r    s     !!''CHHcii+@@ 

,
,--b1A5: J..r22SHNN
C SYY
$	
 SXXs
# 99q=A 88a<1 ::?
r7   c                r   |j                   d   }t        j                  |j                  dz  |j                  dz        }g |j                   dd |}g |j
                  dd |j
                  d   dz  }t        j                  |||      }| j                  j
                  }g |dd |d   dz  }| j                  d   }	t        |	t        |	j                  d      d      }
g | j                  dd |
}t        j                  | t        j                  |t        j                         |      } | |fS )zBReturns a new block info and indexer that reads `int4` as `uint8`.rl  r   N)r  r   Fr   )r<   r>   )r  r!   r  r  r   r   r4   rJ   r<   r>   r   r  rj   jaxShapeDtypeStructrX   uint8)r4  r  last_idxnew_last_idxnew_indices	new_shaperz   r  new_full_shape	start_idxnew_start_idxnew_start_indicess               r8   _reinterpret_int4_as_uint8r    s=    #(! 3X]]a5GH,8*$$Sb)8<8+A
  "%Az'7'7';q'@A)J9M#**00*:Z_:jn&9:.&&r*)IuY^^Q'?N-E
00"5E}E""++NCIIF%*
 
Sr7   c                  | j                   ^}}|J |j                  |      \  }}	}
}|j                  | j                        ^ }}}t        |	      dkD  rt	        d      t        |	      }	|	sCt        j                  |	| j                  d   j                        }t        j                  |      }n|	d   }t        j                  j                  |j                        st        | j                        dk(  sJ |S |j                  j                   t"        j$                  t"        j&                  fv }t)        ||      }|rt+        ||      \  }}t-        ||      }|}|r$|s"t/        |t1        |j                  d      d      }|j3                         }t5        t7        ||      |      }|
t7        t9        |
|      |      }
|t7        t9        ||      |      }t;        ||
||||      }|s|S |rt=        j>                  |t1        |j                  d            }tA        tC        jD                  |j                              }t        jF                  |||      }tC        jD                  |j                        j                  }tI        |g |d d	 |d	   |d
   z        }ntK        |tB        jL                  jO                  d      d      }tQ        |t1        |j                  d      d      }tS        |t1        |j                  d            }tK        ||j                  d      }t=        j>                  ||      }tK        |tB        jL                  jO                  d      d      S )Nr   %No support for multiple indexers yet.r   r   Fr   )r_  r  r  r  r     r   rl  r   )*rI   rm  rG   r   rv   rn  r   rp  r   ro  rq  r   r   r^   rj   r<   r   rX   r  r  r  r  r  r   r  r  r  r   rU   r  r  shruirg  r   r   ri  r^  r@  rw  r  r   rg   )ry   r~  r  r  r  r  r4  rM  re  r  r_  r  r  
other_avalr  rz   is_int4is_contiguous_int4r  ptr_offsetsr   r  
msb_values	join_typein_msbshifts                             r8   r  r  *  s    ??.*q			(229=#xu'11#,,?1i]Q
E
FF(^(	**8S\\!_5J5JKI

(
(
3C
1+C				*	*388	4s||!!!J''--#((CII1FF'*:s;
 1SAOJ)*c:'+'GU7<<%;EJK



!%YsE"K0#	%dI6>D
&uj95AE	#%& 
M$$VU6;;-BCJb11&++>?I__Y
;F,22EfBcr
BE"Ib	,ABCFw ; ;B ?NG'5q1%@Fv{{A./EUFKK6E  /F	&"..55a8	GGr7   c               .   t        j                  ||      }t        j                  j	                  |j
                        st        |      dk(  sJ |S t        |      dkD  rt        d      t        j                  |||d f      \  }}t        | g||d dS )Nr   r   r  )r~  r  )
r
   rs   r   r   r^   rj   r   rv   r  _masked_swap_lowering_rule)ry   re  r0  r  rz   r  r  r~  s           r8   _swap_lowering_ruler  t  s    %%dC0(				*	*388	4x=AJ]Q
E
FF"//ht0LM)Y	#	

!*D
 r7   )r  r  c               ,   |t         j                  j                  }n|dk7  r
t        |   }nt	        d|       |t         j
                  j                  }n
	 t        |   }t         j                  j                  | j                        rSt        j                  | j                        }t        j                  j                  |j                        rt        d      t!        | j                        }t         j                  j                  |      st	        d|       t        j                  |      }t        j                  j                  | j                        sjt        j                  j                  |j                        rt	        d      |4t        j                  j                  |j                        rt	        d      |j                  }t        |t        j"                        rd|j$                  dk(  rUt        j"                  j'                  d	      }t)        | t         j                  j+                  ||j,                        d
      } t)        ||d
      }t        j.                  | ||||      S # t        $ r t	        d|       d w xY w)Nr  r  r  r  r  z1value cannot be a block if pointer is not a blockr  r   r  Fr   )r_  r  r  )r   r  r  r  rk   r  r  r  r  r   r^   rj   r   r   rb  rv   r  rw  r  r  r@  r   r  store)re  r0  r_  r  r  rh  rb  s          r8   _storer    s%    --22N+N;N
3N3CD
EE //66O/@o &&sxx0%%chh/H	%%h&;&;< OPP388$(				*	*8	4
1(<
==##H-(				'	'	1	%%ejj1JKKB//::499EIJJ&&,bnn-,2D2D2I>>..q1L
""<1G1GHC 5,u
5%				5t>
 =  )/):
;s   	I: :Jc               R   | j                   ^}}|J |j                  |      \  }}}}	|j                  | j                        ^ }}
}t        |      dkD  rt	        d      |sCt        j                  || j                  d   j                        }t        j                  |      }n|d   }t        |||      }d }|t        ||
      }|	@t        t        |	|      |j                               }	|t        ||j                               }t        ||	|      }t        |||	|       |S )Nr   r  r   )r_  r  )r_  r  )rI   rm  rG   r   rv   r   rp  r   ro  rq  rr  rU   r   r  r  r  )ry   r~  r  r  r4  rM  re  r  r0  r_  r  r  r  rz   r  	old_values                   r8   r  r    s*    ??.*q			(229=#x'11#,,?1j)]Q
E
FF	**8S\\!_5J5JKI

(
(
3C
1+C&sJ<#
%
UJ/E	%dI68M8M8OPDs4467eCd%0)e$@	r7   c                  | j                   ^}}|J t        j                  ||      }t        j                  j                  |j                        st        |      dk(  sJ |S t        |      dkD  rt        d      |d   }t        |||      }t        j                  j                  }	t        t        |j                        t        j                        rt        j                  j                  }	t!        |	||       g S )Nr   r   r  )rI   r
   rs   r   r   r^   rj   r   rv   rr  ru  rx  r  r   rw  rv  rj  )
ry   re  r0  r  rz   r4  rM  r  indexerrd  s
             r8   _addupdate_lowering_ruler    s    ??.*q			%%dC0(				*	*388	4x=AJ]Q
E
FFQK'&sJ@#"ejj)2>>:					Bb#u	)r7   c               .    t        j                  ||      S rn   )r   ru  )ry   r   rv  s      r8   _transpose_loweringr    s    			![	))r7   c                L    t        | t        t        j                        d      S NFr   )r@  r   rX   bfloat16r  s    r8   _as_bf16r    s    	!&s||4U	CCr7   c                L    t        | t        t        j                        d      S r  )r@  r   rX   float32r  s    r8   _as_f32r    s    	!&s{{3E	BBr7   c               >   ~~|\  \  \  }\  }}	|	dk(  sJ |dk(  rt        j                  |d      }|dk(  rt        j                  |d      }| j                  \  }
}| j                  \  }||t        j
                  j                  k(  r4t        j                  j                  t        j                  j                  f}t        |t        j
                        r|xt        j
                  j                  k(  r t         j                  j                  }n[xt        j
                  j                  k(  r t         j                  j                  }n!xt        j
                  j                  k(  r t         j                  j                  }nxt        j
                  j                   k(  rnxt        j
                  j"                  k(  rnxt        j
                  j$                  k(  rnzxt        j
                  j&                  k(  rn\xt        j
                  j(                  k(  rn>xt        j
                  j*                  k(  rn xt        j
                  j,                  k(  rn n d }n	 t/        d| d      t1        ||
j2                  |j4                  d         }t1        ||j2                  |j6                  d         }|j8                  }nt        |t:              r|\  }}|t<        v s|t<        v rt         j                  j                  }n:|
j2                  t>        j@                  k(  rt         j                  j                  }nd }|j2                  }|t>        jB                  k7  r3|t>        jD                  k7  r t>        j@                  }nt/        d| d      tG        jH                  |jJ                        }tG        jH                  |jJ                        }tM        |jN                        tM        |jN                        cxk7  rdk7  r(n n%tQ        d	|jN                   d
|jN                         tS        |jN                   dk  rtQ        d      |jT                  |jT                  k7  r%tQ        d|jT                   d
|jT                         |jN                  \  }}|jN                  \  }}tW        tF        jH                  jY                  ||gt[        |                  }|t        j
                  j(                  t        j
                  j*                  t        j
                  j,                  fv rt]        |      }t]        |      }t_        |ta        |            }t_        |ta        |            }t]        |      }t]        |      }t]        t_        |ta        |                  }t]        t_        |ta        |                  }|t        j
                  j,                  k(  rEt        jb                  |||      }t        jb                  |||      }t        jb                  |||      }|t        j
                  j*                  t        j
                  j,                  fv rEt        jb                  |||      }t        jb                  |||      }t        jb                  |||      }t        jb                  |||      }t        jb                  |||      }te        jf                  ti        |      tk        |      |      }||}}t        jb                  ||||      }t1        |||j2                        S )N)r6   r6   r   )r   r   r   zUnsupported dot algorithm: .zUnsupported dot precision: r   za and b must be 2D, but got: r     z"all dimensions of b must be >= 16 z2a and b must have the same element type, but got: )input_precision)6r   ru  rG   rH   r	   DotAlgorithmPresetDEFAULT	Precisionr^   TF32_TF32_F32InputPrecisionTF32TF32_TF32_F32_X3TF32x3F32_F32_F32IEEEF16_F16_F16F16_F16_F32BF16_BF16_BF16BF16_BF16_F32BF16_BF16_F32_X3BF16_BF16_F32_X6BF16_BF16_F32_X9rv   r  r   supported_lhs_typessupported_rhs_typesaccumulation_typerr   _TF32_PRECISIONSrX   r  rY   float16r   r   rj   r   r   rk   r  r   r
  r   r   r  r}  r  dotr  rL  r  r~  )ry   r   rc   dimension_numbersout_sharding	precisionpreferred_element_typea_contract_dimb_contract_dim
batch_dimsrO  rP  r   r  	acc_dtypea_precisionb_precisionr   b_typemrM  nr  a_bf16b_bf16a_err0b_err0a_err0_bf16b_err0_bf16a_err1_bf16b_err1_bf16s                                  r8   _dot_general_loweringr    s    l7H4(N'~*	x		qF#AqF#A<<.&&}}*89(>(>(F(FF&&(=(=>I	3112
/3!!//$338823!!22$33::-3!!--$3388
,#
 
 
,
,.C""..1C""110C""003C""333C""333C""334 !$?	{!"LMMay<<Q?@Aay<<Q?@A++I)U#(K&&+9I*I"1166o		$"1166ooICII)s{{":++i
 ;I;aH
IIqvv&&qvv&&#fll+0q0
 eFLL>; < <&,,"
9
::F///
	 f&9&9%:	< 
 
$!Q	$!Qr""&&1v/@/KLM#	--	--	-- 
 a[Fa[F!WV_%F!WV_%F6"K6"K4(<=>K4(<=>KC**;;;NN;S9cNN;S9cNN;S9c////  NN;4cNN6;4cNN;S9c
..fc
2C
..c
2C 

ws|[-=s
CC6qAq!S/B#	sIx~~	..r7   c                   t        j                  |      }|\  }|j                  D cg c]"  }t        j                  d|j
                        $ }}t        j                  ||f      }t        j                  t        j                  | t        j                  d| ||fi             |      \  }	}
t        j                  |	g ||      \  }}} |
       }~|rt        d      |D cg c]  }t        |j                          }}t#        j$                  ||      }|dz  } |j&                  d   j(                  j*                  | }t,        j.                  j1                  |      5  t3        |j4                  |d g|j6                   }t#        j8                  |       d d d        |j;                          t=        |j>                        S c c}w c c}w # 1 sw Y   8xY w)Nr6   zpallas triton reductionrD  z(Reductions with constants not supported.r   r   ) r
   r  rG   rV   rW   r   r  r   r  rF  rG  r   rH  rI  rv   r  rj   r   ReduceOpr  r  r   r   r   r   ro   rE   r
  reduce_returnr  rn  r  )r  ry   r   r  r  rW  r   mapped_avalsr  r  r  r  rM  rN  r  r  r  	reduce_opr  r  r  s                        r8   _reduction_loweringr%  n	  s   ##A&)'4CF<<P4(&&r4::6P,P$$aV,'%::ll
(()B)-1vr;< (N  66.,..-F (
H
II6?@s=*@-@!!)T2)!+
,)

A

%
%
,
,k
:%	''. &&]D+0??G W%	&
 	i	5 Q  A& &s   'G%G#8GGc                  t        |t              sJ |s|S t        |      dkD  rt        |      t        fd|j                  D              }t        | |j                  |      |f      }t        t        |            }|j                  |      }t        fd|D              }t        |      dkD  rt        | |||      d   S )Nr   c              3     K   | ]6  }|j                  |j                  d  |j                  dz   d  z          8 y w)Nr   r   )updater   )rZ   r8  rW  s     r8   r\   z#_reduce_lowering.<locals>.<genexpr>	  sB      - hhQWWUd^aggdQhi6H%HhI -s   <?)rH   r[  rG   c              3  .   K   | ]  }|k7  s	|  y wrn   r6   )rZ   axrW  s     r8   r\   z#_reduce_lowering.<locals>.<genexpr>	  s     1bDj1rs  r   )	r^   rr   r   r  rG   r]  rJ   ry  r%  )r  ry   r   r  r  	dst_avalsrW  s         @r8   r]  r]  	  s    	D%	  	 	HD	At9D -"||- -IckkIk.	AA 	vayA
++y+
)C1d11D 	D	A 
T3	5a	88r7   c               B   |t         j                  k7  rt        d      t        |      dk7  rt        d      |\  }|j                  \  }t        d|j                  |         }t        |j                        dkD  rKt        t        |j                              D ]  }||k7  s	t        ||      } t        ||j                        }|j                  ||j                  t        j                  t         j                              g      }t        | |||f|      \  }	}
|
S )Nz`index_type` must be i32.r   z8`pallas` reduce operations only support one reduce axis.r   r  r)  r[  )rX   rY   rk   r   rG   r  r   r   r  r   rJ   r(  r   r%  )r  ry   r   r  index_dtyperW  rO  r  r[   rM  r  s              r8   _argreduce_loweringr/  	  s     CII
0
11Y!^
O
PP&4\\(6
ad+
,%3v||$% '	
dUA&' eV\\*Effmm#))CII:Nm&OPQ#"4q%jtD*!W	.r7   c           	         | \  }}|\  }}||kD  }||k  }t        j                  ||      }t        j                  ||t        j                  |||            }	t        j                  ||      }
|
|	fS rn   )rX   r  wherer  leftrightvalue1index1value2index2gtlt	index_min	index_ret	value_rets              r8   _reduce_argmax_combiner>  	  p    .&&.&&""kk&&))iiFCIIb&)$DE)kk&&))	I	r7   c           	         | \  }}|\  }}||kD  }||k  }t        j                  ||      }t        j                  ||t        j                  |||            }	t        j                  ||      }
|
|	fS rn   )rX   r  r1  r2  s              r8   _reduce_argmin_combinerA  	  r?  r7   c               ~    |j                   rt        t        | j                  |j                  | j
                  g| S rn   )rN  rv   ro   rE   rq   rI   ry   rq   r2  rM  s       r8   _pjit_lowering_rulerD  	  s7    
\\
	!	kk5;;
26
 r7   c                   |S rn   r6   )ry   r   dst_shardingconcrete_meshs       r8   _reshard_lowering_rulerH  	  s    	
(r7   c                   |j                   |j                  }}|rt        t        | j                  || j
                  g| S rn   )rq   rN  rv   ro   rE   rI   )ry   rE  r2  rM  rq   rN  s         r8   rK  rK  	  s>    
 ""J$5$5%
	!#++ucoo	M	MMr7   c               F    t        | j                  || j                  g| S rn   )ro   rE   rI   rC  s       r8   _remat_lowering_rulerK  	  s    	!#++ucoo	M	MMr7   c                    |S rn   r6   )rM  r   s     r8   r%  r%   
  s    a r7   c                   | j                   j                  j                  }||v rt        | |j	                  |            S t        d| d      )N)rW  z
Axis name z not found in grid.)rE   r*   
grid_namesrZ  r  LookupError)ry   	axis_namerN  s      r8   _axis_index_rulerQ  
  sL    {{''22**$Sz/?/?	/JKKj+>?@@r7   c                z    t        |       dk(  ryt        |       dkD  ry| \  }t        |t        j                        S )Nr   Tr   F)r   r^   r   
ReadEffect)ref_effectseffs     r8   _is_read_onlyrV  
  s<    &3	C))	**r7   r   )step
bound_typec               `   |dk7  rt         ||j                  dk(  rt        |      }nt        |      }t	        j
                  ||||      }	t        j                  j                  |	j                        5  |	j                  }
t        |      D cg c]!  \  }}|	j                  j                  |dz      # }}}|r	g ||
|}ng ||}t        | j                  || j                  g| }t	        j                   |       d d d        t#        |	j$                        S c c}}w # 1 sw Y   $xY w)Nr   r   )rv   r  r   _i64_constantscf_dialectForOpr   r   r   r  induction_variabler   r
  ro   rE   rI   yield_rn  results_)ry   rq   lower_boundupper_boundrN  has_loop_indexrW  rX  r2  for_op
loop_indexr[   rM  for_body_args
jaxpr_argsall_outs                   r8   _lower_jaxpr_to_for_looprh  
  s"    
QY
:++r1DD[+tTB&	''4 
 **J>GoNdaV[[**1q51NMN8V8Z8-8j,V,m,j&UCOO.8G w
  
foo	 O
  
 s   5D$&D6AD$D$$D-c               P   ~t        |	      |z
  |z
  }
|
rt        |rt        |dk7  rt        ~~
~~|j                  |j                  }}|rt        ~t	        j
                  |||      \  }}t        t        |	| j                        }	t        j                  |	|g      \  }}	|r1|	^}}	t        |t        ||j                              }|j                  }n5t        d      }t        |      }t        j                   j#                  d      }t%        | ||||g|	|d|d}|r|g|S |S )Nr   r   r   rb  rW  rX  )r   rv   rq   rN  rx  pattern_match_scan_to_fori_loopr'  rU   rG   r   r#   r  rh   rj   r   r   rw  r  rh  )ry   rq   linearlengthr  unroll
num_consts	num_carry_split_transposer2  num_extensivejaxpr_constsrb  rN  r`  ra  rX  for_outs                     r8   _scan_lowering_ruleru  7
  sF    d)j(94---''q[++mVWU\\%,, 225*iP % 
tS\\	2$
|4,&$K${L9I9I$JKK!!J"K'K,,R0J$	5+{FD59D#!
D'  "'""	.r7   c               r   |ry t        |j                  j                  |g      \  }}|D cg c]  }|j                   }	}t	        |	      dk  ry |	d d \  }
}|
j
                  dk7  s,|
j                  t        j                  t        j                  fvry |j
                  dk7  s,|j                  t        j                  t        j                  fvry |d d \  }}|j                  j                  d   }|j                  j                  t        j                  k(  sJ t	        |j                  j                        dk7  ry |j                  j                  d   }|j                  t        j                  k7  ry |j                  |gk7  ry |j                  ||gk7  ry t        |j                  j                  |g      \  }}|d d \  }}|j                  j                  d d \  }}||ury t!        |j                  j                        D ]  \  }}|j                  t        j"                  u s#|j                  d   |u s5t%        |j                  d   t&        j(                        s]|j                  d   j*                  dk(  sz|j                  d   |k(  s|} n y |j                  }g |j                  d | |j                  |   |j                  |dz   d  }t-        |j                  dd        }|j/                  |j                  d | |j                  |dz   d  z   |||j0                  j3                               }t        |||g      \  }}}|d d |dd  c\  }}}t        | j4                  |g      \  }}| j/                  g |d |dd        } t7        | ||||g|dd|j8                  d}||g|S c c}w )	Nr   r6   r   r   )r&  r   r   r   )rI   Trj  )r#   rq   r   r   r   r   r   rX   rY   int64r   bool_r&  r   r	   lt_pr   add_pr^   rV   r  r  rr   rJ   r   with_unknown_namesrI   rh  rj   )ry   cond_nconsts
cond_jaxprbody_nconsts
body_jaxprr2  rM  cond_invarsr8  cond_in_avalsa1a2v1v2outvarr5  body_invarsvo1vo2r[   	eqn_indexrq   
new_invarsnew_outvarsbody_constscarrylbubconst_block_infosargs_block_infosrt  s                                  r8   _maybe_pattern_match_fori_loopr  g
  s    j..55~F.![#./a166/-/!!&"bXX^rxx		399'==XX^rxx		399'==r?&"b##A&&			cii	''	'				1$a #]]chh[[VHZZB8j..55~F.![r?&"b%%bq)(#ss]***//0 	fa
}}		!	A"	cjjmX%5%56ZZ]!#{{1~$i	 


%2m|, 2\*2lQ./02* emmAB'(+
--::jy!EJJy1}~$>>!!446	  8%
 %TL,+GH![%!9eABi.(2rD(23??4@>)C%% !8"3 !8T !8"212"6!8 	9#$	
 
 
' b	7	G 0s   N4c               z   t        t        || j                        }t        | g|||||d}||S t	        j
                  |||g      \  }}}	t	        j
                  | j                  ||g      \  }
}}|D cg c]  }|j                   }}|D cg c]  }|j                   }}|	D cg c]  }|j                   }}g |||}t        j                  ||      } |j                  j                  j                  | }t	        j
                  |j                  ||g      \  }}}g ||}t        j                  j!                  |      5  t#        | j$                  |j&                  g |
|g| \  }t        j(                  ||j                         d d d         |j*                  j                  j                  | }t	        j
                  |j                  ||g      \  }}}g |||}t	        j
                  |||g      \  }}}t        j                  j!                  |      5  t#        | j$                  |j&                  g ||g|| }g |||} | rt        j,                  |        d d d        t/        |j0                        }!|!||z   d  S c c}w c c}w c c}w # 1 sw Y   xY w# 1 sw Y   BxY w)N)r|  r~  r}  r  )r'  rU   rG   r  r   r#   rI   rj   r[  WhileOpbeforer  r   r
  r   r   r   ro   rE   rq   	conditionafterr^  rn  r_  )"ry   r|  r}  r~  r  r2  r  cond_constsr  r  cond_const_block_infosbody_const_block_infoscarry_block_infosr   cond_const_typesbody_const_typescarry_types	all_typeswhile_opbefore_blockcond_consts_rM  carry_	cond_argscondafter_blockbody_consts_all_argscond_const_argsbody_const_args
carry_argsloop_outall_handlesrg  s"                                     r8   _while_lowering_ruler  
  s    
tS\\	2$ *# A A<7CPZ5?A& M$(OO
\<(%!+{E oocool'CD D02C '22aff22&12aff22!&'A'+'B B#3BkB)  D1(.''..	:, OO\",6 '&v&)	''5 8%5
 5#45 
	FT $ 6 678 -%%,,i8+'+\"($,f 5|4l4V4(15|,2./?J 
''4 
&'5
 5#45 
	
 
H BOAoAAK%
& ""#'	,.	//W 32'8 8$
& 
&s,   8JJ*JA	J$(AJ1$J.1J:c               r   | j                   }d }| j                  D cg c]
  } ||       }}t        |t        d|j                        d      }t        j                  ||d      }	t        j                  j                  |	j                        5  t        | j                  |d   j                  |dd  g| }
t        j                  |
       d d d        t        j                  j                  |	j                        5  t!        |      dkD  r2t#        | t%        |t        d|j                              g|d	|dd  i}n(t        | j                  |d   j                  |dd  g| }t        j                  |       d d d        t'        |	j(                        S c c}w # 1 sw Y   xY w# 1 sw Y   /xY w)
Nc                    t        | j                        }| j                  s|S t        j                  j                  | j                  |      S rn   )r   r   r   r   r   r   )r   r   s     r8   to_typez$_cond_lowering_rule.<locals>.to_type  s:    $X^^4L>>""8>><@@r7   r   Fr   T)hasElser   r   branches)rI   rH   _equalrh   rj   r[  IfOpr   r   r   
then_blockro   rE   rq   r^  
else_blockr   _cond_lowering_ruler}  rn  r_  )ry   r  r  r2  rI   r  rO  	out_typesuse_branch0if_opouts0outs1s               r8   r  r  
  s    +A (+}}5ws|5)5ul1ejj9%H+


;	4
@%	''(8(89 $AB 
	E
 u 
''(8(89 
8}q!

ul1ejj1
2  AB<	e '
++
1+


ab/ 	e
 u" 
enn	; 6
  s   F>F!A>F-!F*-F6c                
   t        | t        j                        r| S t        | t        j                  t        j
                  t        t        t        j                  f      rt        | t        |j                              S t        rn   )r^   r   r  r   r   r   r   r   r   r   rh   r   r   rv   )r   r   s     r8   rU   rU   .  sZ    288H"))RZZeX-B-B	C ,TZZ899r7   c                Z   t        | t        j                  t        j                  t        t
        t        j                  f      rct        |t        j                        rt	        |       } n't        |t        j                        sJ t        |       } t        j                  ||       S t        rn   )r^   r   r   r   r   r   r   r   r   rw  r  r  r  rv   )r8  r$  s     r8   rh   rh   8  sw    "))RZZeX-B-B	C !R^^$
a&a2<<(((
(a!!!Q''r7   c                h    t        j                  t        j                  j	                  d      |       S )Nr   r  r  r   rw  r  r8  s    r8   r   r   E  $    			 ; ;B ?	CCr7   c                h    t        j                  t        j                  j	                  d      |       S )NrC  r  r  s    r8   rZ  rZ  I  r  r7   c                    t        j                  |       } t        j                  | t        j                        r,t
        j                  j                  | j                  dz        S t        j                  |       S )Nr  )rX   r   r  r   r  r   rw  r  r  r   dtype_to_ir_typer  s    r8   r   r   M  sQ    
))E
%^^E2::&>>&&u~~'9::			u	%%r7   c               p   t        |j                        }t        t        |            }t        |t        j
                  t        j                  f      sJ t        |t        j
                  t        j                  f      sJ |j                  |j                  k7  rt        d| d| d      t        j                  j                  |j                        rJt	        j                  |j                        j                  }t        j                  j                  ||      }n|}t        j                  ||      S )Nr/  r   z because of different widths)r  rj   r   r^   r   rw  r  r  rv   r   r   r   r   r(  )ry   operandrI  src_elem_typedst_elem_typer   ri  s          r8   #_bitcast_convert_type_lowering_ruler  U  s    
  -- 1) <=-	MBNNBLL#A	BB	B	MBNNBLL#A	BB	BM///

witI;.JK  ##GLL1-33E%%))%?KK			K	11r7   )ry   r&   r{   BlockMapping)r{   r  r  rN   )r   r  r   rN   r  r  )r   r  r   r  r   r;   r   r;   r   r;   r  r  )r   zjax_core.Primitiver  zCallable[[_T], _T])r*   r)   )r  z
ir.Context)r   rA   )rq   jax_core.Jaxprr*   r)   r/   r'   r  rL   )ry   r&   rq   r  rI   z!Sequence[BlockInfo | None] | Noner  r=   )rB  Callable[..., Any]r1  r  r  r  )rW  r   r   r?   r  r  rQ  )rd  ztt_dialect.RMWOpre  r  r  r  r_  ir.Value | Nonerf  ztt_dialect.MemSemanticrg  ztt_dialect.MemSyncScoper  r  )ry   rD   r  zprimitives.AtomicOpType)ry   rD   rW  r   r  r  )r(   r'   r  zSequence[_Extern | _Fallback]r  r  )r   r  r  r  )r   r  r   r  )r   r  r   r  r  r  )r   r  r   r  r   r  r  r  )r   r  r   r  r  arith_dialect.CmpIPredicater  r  r  zarith_dialect.CmpFPredicater   r  r  r  )ry   rD   r2  r  r  r'   )r8  r  r(   r'   r  zir.Attributer  None)ry   rD   r  r?   )ry   rD   r   r?   )ry   rD   r   r   )r$  ir.Typer  r  )r  r   r  r   r  r  )r$  r  r8  objectr  r  )r$  r  r  r  )r   zir.valuer   r?   r  r  )r   r  rW  r   r  r  )r!  r  r"  r  r  r  )r!  r  r"  r  r   r  r  r  )r!  r  rA  jax.typing.DTypeLiker"  r  r  r  )
r!  r  r"  r  r   r  r?  r  r  r  )r   r  r   r?   r  r  )rf  zir.RankedTensorType)r4  r:   r  ro  r  r  )r  r  r4  r:   r  ro  r  r  )NN)re  r  r_  r  r  r  r  
str | Noner  r  r  r  r  r  )r4  r:   r  ro  r  r  )r4  r:   r  ro  r  ztuple[BlockInfo, NDIndexer]rn   )re  r  r0  r  r_  r  r  r  r  r  r  r  )ry   rD   rP  r   )r  r  )
ry   rD   rq   r  rb  r  rW  r   rX  zir.IntegerType | None)r   r  r   r;   r  r  )r8  r  r$  r  r  r  )r8  r   r  r  )r   r  r  r  )ry   rD   r  r  r  r  (  rP   
__future__r   collections.abcr   r   r   r4   	functoolsr   typingr   r   r  r	   r
   jax._srcr   r   r   r   r   rV   r   r   r   r   rF  r   r   r   r   jax._src.interpretersr   r   rH  jax._src.lib.mlirr   jax._src.lib.mlir.dialectsr   r  math_dialectr   r[  jax._src.lib.tritonr   r   jax._src.pallasr_   r   r    rx  jax._src.stater!   spjax._src.utilr"   r#   	jax.numpynumpyrX   r   r$   safe_mapr'  
unsafe_mapsafe_ziprx   
unsafe_zipro  r)   r  rf   	dataclassr&   r:   rD   rL   r.  rR   r}   r   r   r   r   r   r   r   r   r  ro   rR  r   program_id_prZ  num_programs_pr]  r  r  r  r  rj  atomic_rmw_pr  atomic_cas_pr  r  cumsum_pr  not_pr  r  r  r  rY   rw  r  float64r  absir  absfabs_dispatch_tabler  ceil_dispatch_tabler  floor_dispatch_tabler  exp_dispatch_tabler  exp2_dispatch_tabler  expm1_dispatch_tabler  log_dispatch_tabler  log1p_dispatch_tabler  sqrt_dispatch_tablefpowipowfpow_dispatch_tabler  cbrt_dispatch_tabler#  rsqrt_dispatch_tabler(  sin_dispatch_tabler-  cos_dispatch_tabler2  tan_dispatch_tabler7  asin_dispatch_tabler<  acos_dispatch_tablerA  atan_dispatch_tablerF  atan2_dispatch_tablerK  sinh_dispatch_tablerP  cosh_dispatch_tablerU  tanh_dispatch_tablerZ  asinh_dispatch_tabler_  acosh_dispatch_tablerd  atanh_dispatch_tablectpoppopulation_count_dispatch_tablectlzclz_dispatch_tablenextafter_dispatch_tabler(  abs_pneg_pceil_pfloor_pexp_pexp2_pexpm1_plog_plog1p_psqrt_psquare_ppow_pcbrt_prsqrt_psin_pcos_ptan_pasin_pacos_patan_patan2_psinh_pcosh_ptanh_pasinh_pacosh_patanh_ppopulation_count_pclz_pnextafter_pry  r  r}  rg   r   r  r   r  partialCmpIPredicateeqr  OEQr  neUNEr'  sltultOLT
_less_thansleuleOLE_less_equalsgtugtOGT_greater_thansgeugeOGE_greater_equalr  rz  sub_pmul_pand_pandior_porixor_pr  shift_left_pshlishift_right_arithmetic_pshrsishift_right_logical_pr  	add_any_p_JAX_TO_TRITON_BINARYitemsprimr   r  rem_peq_pne_pgt_pge_pry  le_p_JAX_TO_TRITON_SIGNED_BINARYr  debug_print_pr  r  multiple_of_pr  max_contiguous_pr  broadcast_to_pr  integer_pow_pr  clamp_p
logistic_pis_finite_p_JAX_FN_MAPPINGmin_pr  max_pr  div_pr  sign_psign_lowering_helper	erf_inv_perf_inv_lowering_helperiota_pr  r  r  r  r
  r~  r  r  r  r  r%  r-  r9  r  r  r@  convert_element_type_prJ  
select_n_prQ  broadcast_in_dim_prT  	squeeze_prY  	reshape_prX  r^  rg  concatenate_prm  split_pr}  r  rr  get_pr  r  r'   r  r  r  r  r  r  load_pr  swap_pr  r  r  addupdate_pr  transpose_pr  r  HIGHr  r  r  r  dot_general_pr  r%  r]  r  reduce_max_pr  reduce_min_pr  reduce_sum_pr/  r>  argmax_prA  argmin_pjit_prD  	reshard_prH  closed_call_pcustom_jvp_call_prK  remat_prK  stop_gradient_paxis_index_prQ  rV  rh  scan_pru  r  while_pr  cond_pr  rU   rh   r   rZ  r   bitcast_convert_type_pr  )r=  cs   00r8   <module>r     sE%   7 " . $     
   "    % '   &  %   & 4   = ; 9 5 / & 1 # + ! $   T]--Z--Z	%%''


    6 6 6         I "	",8"JJ$ ! !	
 # 8  :z	F,F,F, F, 	F,R=&	=&=& 3=&
 =&@	26,) :**+' ,' :,,-+ .+ !'1'='='M'M*4*A*A*E*E	 
 	
 % ( * :**+(.	(. )	(. ,(.V :**+ ,(!H 3<< A	A*-A8<A !A 399E E
 d#! ! $!H d#  $(
6& *	Z3\3995|S[[9{CKK83;;-!2!233<<.,"3"34
 	0#++>0#++>3;;-!2!233<<.,"3"34	
 $ +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  *	{CKK8z3;;73<<.,"2"23
 	/=/=3<<.,"2"23
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  *	{CKK8z3;;73<<.,"2"23
 	/=/=3<<.,"2"23
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  *	cii(,Dcii(+s{{C\\3;;'	
 	ckk*KEckk*JD\\3<<(	

 	cii(*;S[[Icii(*;S[[I\\3;;'	
 	ckk*,<ckkJckk*,<ckkJ\\3<<(	

! @ +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  *	{CKK8z3;;73<<.,"2"23
 	/=/=3<<.,"2"23
  *	{CKK8z3;;73<<.,"2"23
 	/=/=3<<.,"2"23
  *	{CKK8z3;;73<<.,"2"23
 	/=/=3<<.,"2"23
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  ,ckk*M3;;Gckk*L#++F3<<.0B0BC
 	ckk*,>Lckk*,>L3<<.0B0BC
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  +
|S[[9{CKK83<<.,"3"34
 	0#++>0#++>3<<.,"3"34
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  ,}ckk:|S[[93<<.,"4"45
 	13;;?13;;?3<<.,"4"45
  #7[#))4]CII63;;-!3!34
 	2CII>2CII>3;;-!3!34
#  *	Z3\39953;;-!2!23
 	-syy9-syy93;;-!2!23
  0ckk*,=s{{Kckk*,<ckkJ

 	[[#++&(>	
 	[[#++&(>	
	
      II!II' JJ# KK%	
 II! JJ# KK% II! KK% JJ# LL+ II! JJ# KK% II!  II!!" II!#$ JJ#JJ#JJ#KK%JJ#JJ#JJ#KK%KK%KK%;II!OO-= D!J$<I
%	I
%KKK )K )	K
 (K K K& 
		''**''**&&**	
 Y''**''**&&**	
 Y''++''++&&**	
  i''++''++&&**	 "	!!''++''++&&**	 #""''++''++&&**	C
 IItIItIItII}!!HHmII}!!m((  -"5"5}22t  &++- .HD"79  !.. IItHHfHHjHHmHHnHHjHHk   -224 ,HD"57 L
 !,, 9**+!	!! 
! ,!H :++, - :../ 0 2$$%7 &7
 3$$% &8 KKJNN=OOL  %%' FHD" )"u EF 399% %" 399% %" 399
( 
(  #** l//%H
 ! #--  l22UK
 3::   E) ."1	1$115116/	/$/15//
	
"
 #
 	
 /4C@C@'+C@8@C@L 3--.+	+ /+ 3>>"* #* 3))*
	
 +
 3==!X "X
 3==!%	% "% 
R 3$$%- &-& 3;;  *RR'0RRjLL$-L;DLL 288   /9.G.GH3q619H -7-E-EF#a&!)F 
 !!C
 "&"&C	C
C C
 C  C C CL '0 . :$$%FH	FH &FHR 299
 
  !3
 "&"&3	33 3
 3  3 3l :$$%	 &8 2>>" #$ 3??#* $* MM&&(=(=> DC 3$$%r/	r/ &r/j @9( +<)*;*;ckk+ c&& ' +<)*;*;ckk+ c&& ' +<)*;*;cgg+ c&& '
"* '8i&7&7/' cll #
 '8i&7&7/' cll #
 4::  4>>" # 8))*%778N	N 9 +N =(()N *N 2@ g-- . 3##$A %A+$ (,	   &D 3::,	, ,^N	Nb 3;;C0	C0  C0L 3::+	+ +\
DD& 3--.2	2'/22 /2{ IFs   KB[L B[