
    uki1                    

   d Z ddlmZ ddlZddlZddlZddlmZ ddlZddl	m
Z ddl	mZ ddl	mZ ddl	mZ dd	l	mZ dd
l	mZ ddl	mZ ddl	mZ ddlmZ ddlm
Z ddlmZ ddlm
Z ddlmZ ddlm Z  ddlmZ! ddl"m#Z# ddl$m%Z% ddl&m'Z( e jR                  Z)ejT                  e+cZ+Z,ejZ                  e.cZ.Z/e0ejb                  z  Z2e3e2df   e4e5e3e5df   z  e2f   z  Z6ejn                  ejp                  z  Z9 ejt                  d      Z;d_dZ<e;jz                  d        Z>e;j~                  d`d       Z@dadZA ej                  e;eA        ejt                  d      ZCdbdZDeCjz                  d        ZEdadZF ej                  eCeF        ejt                  d      ZGddd	 	 	 	 	 	 	 	 	 	 	 dcdZHeGjz                  d         ZI	 	 dad!ZJ ej                  eGeJ       ej                   G d" d#             ZLd$ ZMd% ZNddd&ZOddd'ZPej(                  j                   ej                  d()       G d* d+ejp                                      ZRd, ZS ejt                  d-      ZTd(eT_U        d. ZVeVeT_W        d/ ZXeXeT_Y        eTj                  d0        Z[	 	 	 	 ded1Z\e\ej                  eT<   d2 Z^  ej                  eT      e^        ejt                  d3      Z`d(e`_U        eVe`_W        d4 Zaeae`_Y        e`j                  d5        Zb	 	 	 	 ded6Zcecej                  e`<   d7 Zd  ej                  e`      ed       d8 Zedfd9Zfdd:d;	 	 	 	 	 dgd<Zgej                  j                  f	 	 	 	 	 dhd=Zjej                  j                  f	 	 	 did>Zk ejt                  d?      Zleljz                  d@        ZmdA Zn ejt                  dB      Zod(eo_U        eojz                  dC        ZpdjdDZq ejt                  dE      Zrerjz                  dF        ZsdG Zt ejt                  dH      Zud(eu_U        eujz                  dI        ZvdJ Zw ejt                  dK      Zxexjz                  dL        ZydM Zz ejt                  dN      Z{dO Z|e{jz                  dP        Z}dQ Z~ ejt                  dR      ZdS Zejz                  dT        Z ejt                  dU      ZdV Zejz                  dW        Z	 	 	 	 	 	 dkdXZddYdldZZddYdmd[Z ejt                  d\      Zd(e_U        dnd]Zej                  dod^       Zy)pz<Module for Pallas:TPU-specific JAX primitives and functions.    )annotationsN)Any)core)dtypes)pretty_printer)prng)random)state)	tree_util)util)mlir)
primitives)	discharge)indexing)	Transform)	DTypeLike.repeatc                r    t        j                  || j                        }t        j	                  | ||      S )Nrepeatsaxis)r   canonicalize_axisndimrepeat_pbind)xr   r   s      \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/pallas/mosaic/primitives.pyr   r   6   s-    			aff	-$	q'	55    c               
   |dk  s|t        | j                        k\  r%t        d| dt        | j                         d      t        | j                        }||xx   |z  cc<   t	        j
                  || j                        S )Nr   zaxis: z is out of range [0, ))lenshape
ValueErrorlistjax_coreShapedArraydtype)r   r   r   r"   s       r   _repeat_abstract_evalr(   :   sm    	AXQWW%
vdV#8QWWaH
II
qww-%++			eQWW	--r   c                   t        | j                        D cg c]  }||k(  r|nd }}t        j                  | |      S c c}w )N   )ranger   jnptile)r   r   r   irepss        r   repeat_implr0   C   s>    /4QVV}	=!Q$Y'A
%	=$	=	!T	 
>s   A c               r     t        j                  t        j                  t        ||      d      | |      S )Nr   Fmultiple_results)r   	lower_fun	functoolspartialr0   )ctxr   r   r   s       r   _repeat_lowering_ruler8   I   s8    
W4@
 
 r   bitcastc                H   t        j                  |      }t        | j                        dk  rt	        d      t        j
                  | j                        }t        j
                  |      }| j                  d   |z  |z  rt	        d      t        j                  | |      S )N   zNot implemented: bitcast 1DzJNot implemented: the 2nd minor dim can not be perfectly packed or unpacked)ty)	r   !check_and_canonicalize_user_dtyper!   r"   r#   itemsize_bitsr'   	bitcast_pr   )r   r=   src_bitwidthdst_bitwidths       r   r9   r9   S   s    //3"\A
2
33%%agg.,%%b),WWR[<,.
	  
b	!!r   c                   t        | j                        }t        j                  | j                        }t        j                  |      }|d   |z  |z  |d<   t        j                  ||      S )Nr<   )r$   r"   r   r?   r'   r%   r&   )r   r=   r"   rA   rB   s        r   _bitcast_abstract_evalrD   a   s]    
qww-%%%agg.,%%b),Bi,&,6%)			eR	((r   c               J    fd} t        j                  |d      | |      S )Nc                L   t        j                  | j                        }t        j                        }||k  rg| j                  ^ }}}||z  } | j                  g |||z  || } t        j                  | dd      } t        j                  j                  |       S ||kD  r_t        j                  j                  |       }|j                  ^ }}}} t        j                  |dd      j                  g |||z  | S t        j                  j                  |       S )Nr<   )
r   r?   r'   r"   reshaper,   swapaxesjaxlaxbitcast_convert_type)	r   rA   rB   leadingmnpackingyr=   s	           r   _bitcastz(_bitcast_lowering_rule.<locals>._bitcastk   s   ''0L''+Ll"wwnw1,g
!))
7W
7a7l
7G
7Q
7a
,,q"b
!aWW))!R00l"
''
&
&q"
-a !w1g,S\\!R$,,FgFq7{FAFF77''2..r   Fr2   r   r4   )r7   r   r=   rR   s     ` r   _bitcast_lowering_rulerT   j   s$    / 
:5	9#q	AAr   roll)stridestride_axisc                  t        |t              r|dk  rt        d      |dk  s|t        | j                        k\  rt        d      |d u |d u k7  rt        d      |J|H|dk  rt        d      |dk  s|t        | j                        k\  rt        d      ||k(  rt        d      t
        j                  | ||||      S )	Nr   zshift must be non-negative.zaxis is out of range.z5stride and stride_axis must be both specified or not.zstride must be non-negative.zstride_axis is out of rangez,expected axis and stride_axis are different.)r   rV   rW   )
isinstanceintr#   r!   r"   roll_pr   )r   shiftr   rV   rW   s        r   rU   rU      s     s	
2
33	AXQWW%
,
--n+-.
L
MMK3z566Q+QWW5455{EFF	T&k 
 
 r   c                    ~| S N )r   r\   _s      r   _roll_abstract_evalra      s
    	
(r   c               T    fd} t        j                  |d      | ||      S )Nc           
     "   t        j                  | |      S t        t        j                  | | j                                 D cg c]"  \  }}t        j                  |||z  z         $ }}}t        j
                  |      S c c}}w r^   )r,   rU   	enumeratesplitr"   concatenate)r   r\   r.   xsoutputsr   rV   rW   s        r   _rollz"_roll_lowering_rule.<locals>._roll   s    ~XXa%% syyAGGK,@+NOAr 	UQZ'.G  ??7K00	s   'BFr2   rS   )r7   r   r\   r   rV   rW   ri   s      ``` r   _roll_lowering_rulerj      s&    1 
7	6sAu	EEr   c                     e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ej
                  j                  Zded<    ej                  dddd      Z
ded<   d Zd Zed        Zd d!dZd"ddd#dZd Zd Zd Zy)$AsyncCopyDescriptorr   src_refztuple[Transform, ...]src_transformsdst_refdst_transformsint | jax.Arraydst_semdst_sem_transformszint | jax.Array | Nonesrc_semztuple[Transform, ...] | Nonesrc_sem_transforms%MultiDimDeviceId | IntDeviceId | None	device_idprimitives.DeviceIdTypedevice_id_typeF)defaultinitcomparehashbool_usedc                T    | j                   d u | j                  d u z  rt        d      y )Nz<Either both or neither `src_sem` and `device_id` can be set.)rt   rw   r#   selfs    r   __post_init__z!AsyncCopyDescriptor.__post_init__   s3    4!78 % & & 9r   c                H    | j                   st        j                  d       y y )NzOAsyncCopyDescriptor was not used. Did you mean to call `start` or `wait` on it?)r   loggingerrorr   s    r   __del__zAsyncCopyDescriptor.__del__   s    ::mm; r   c                    | j                   d uS r^   rt   r   s    r   	is_remotezAsyncCopyDescriptor.is_remote   s    <<t##r   c                   |rmt        | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  	      S t        | j                  | j                  | j                  | j                  | j                  | j                  | j
                  | j                  | j                  	      S r^   )
_dma_flattenro   rp   rm   rn   rt   ru   rr   rs   rw   )r   swap_src_and_dsts     r   _get_args_and_treez&AsyncCopyDescriptor._get_args_and_tree   s    
,,



,,



,,

!
!
,,

!
!
..
 
 
,,



,,



,,

!
!
,,

!
!
..
 
r   )addc               |    d| _         | j                         \  }}t        j                  ||| j                  ||d y )NT)treery   priorityr   )r   r   dma_start_pr   ry   )r   r   r   	flat_argsr   s        r   startzAsyncCopyDescriptor.start   s>    DJ--/OIt	**r   c                \    | j                   r| j                          | j                          y r^   )r   	wait_send	wait_recvr   s    r   waitzAsyncCopyDescriptor.wait   s    ~~
nnNNr   c                x    d| _         | j                         \  }}t        j                  ||| j                  d y )NTr   ry   )r   r   
dma_wait_pr   ry   r   r   r   s      r   r   zAsyncCopyDescriptor.wait_recv   s4    DJ--/OItOO	d.A.Ar   c                    d| _         | j                  st        d      | j                  d      \  }}t	        j
                  ||| j                  d y )NTz#Cannot `wait_send` on a local copy.)r   r   )r   r   r#   r   r   r   ry   r   s      r   r   zAsyncCopyDescriptor.wait_send  sN    DJ>><== --t-DOItOO	d.A.Ar   N)F)r   r~   )r   )r   rZ   r   r~   )__name__
__module____qualname____annotations__r   DeviceIdTypeMESHry   dataclassesfieldr   r   r   propertyr   r   r   r   r   r   r_   r   r   rl   rl      s    ,'',''++!!2222,6,C,C,H,H.)H!!!%U% &
 $ $4	E 	

r   rl   c	                b    t        j                  | t        |      |t        |      |||||f	      S r^   )r   tree_flatten_maybe_wrap_transformed_refs)	rm   rn   ro   rp   rr   rs   rt   ru   rw   s	            r   r   r     sA     
		">2">2
! 

 
r   c           	     |    t        j                  | |      \	  }}}}}}}}	}
|t        |      |t        |      ||||	|
f	S r^   )r   tree_unflatten_maybe_unwrap_transformed_refs)r   r   rm   rn   ro   rp   rr   rs   rt   ru   rw   s              r   _dma_unflattenr   +  se     tY/
 $^4$^4

 
r   c                D    t         j                  j                  d |       S )Nc                v    t        | t        j                        rt        t        j                  |             S | S r^   )rY   r
   TransformedRefr   TransformedRefTreewrapobjs    r   <lambda>z._maybe_wrap_transformed_refs.<locals>.<lambda>F  s2    	C--	. //A/F/Fs/KL  r   rJ   r   map
transformss    r   r   r   D  s!    	 	
 r   c                J    t         j                  j                  d | d       S )Nc                X    t        | t              rt        | j                               S | S r^   )rY   r   r   unwrapr   s    r   r   z0_maybe_unwrap_transformed_refs.<locals>.<lambda>O  s'    	C+	, 1>  r   c                "    t        | t              S r^   )rY   r   r   s    r   r   z0_maybe_unwrap_transformed_refs.<locals>.<lambda>S  s    *S*<= r   )is_leafr   r   s    r   r   r   M  s)    	 = 
 
 r   T)frozenc                  *    e Zd ZdZedd       ZddZy)r   zA PyTree wrapper for a ``TransformedRef``.

  The wrapper is necessary to support the case when a ``TransformedRef`` is
  indexed with other ``TransformedRef``s.
  c                <     | |j                   |j                        S r^   )refr   )clsr   s     r   r   zTransformedRefTree.wrap`  s    sww''r   c                V    t        j                  | j                  | j                        S r^   )r
   r   r   r   r   s    r   r   zTransformedRefTree.unwrapd  s    $//::r   N)r   state.TransformedRefreturnr   )r   r   )r   r   r   __doc__classmethodr   r   r_   r   r   r   r   W  s      ( (;r   r   c                   t        t        j                  |             }t        t        j                  |            }t        t        j                  |            }d|z   dz   |z   }	t        j                  d      t        j
                  |dz         t        j
                  |	      h}
|5d|z   dz   |z   dz   |z   }|
j                  t        j
                  |             |q|t        j                  j                  u rUt        |t              rE|D ]@  }t        |t              s|f}|D ]&  }|
j                  t        j                  |             ( B |
S )Nr*   r   )r!   r   tree_leavesr
   
ReadEffectWriteEffectr   r   r   r   rY   dicttupler%   NamedAxisEffect)src_transforms_avalsdst_transforms_avalsdst_sem_transforms_avalssrc_sem_avaldevice_id_avalry   n_src_transformsn_dst_transformsn_dst_sem_transformsdst_sem_indexeffssrc_sem_indexkk_s                 r   _get_dma_effectsr   h  sP    ../CDE../CDEY223KLM&&*-==-q(1,-&
$
 	q #33a7:NN  	HHU}-.00555*;  1!!U#d! 	1B
((8++B/
0	11
 
+r   	dma_startc                 &    t        d | D              S )Nc              3  4   K   | ]  }|j                     y wr^   )is_high).0avals     r   	<genexpr>z_dma_is_high.<locals>.<genexpr>  s     ,dT\\,s   )any)avalsparamss     r   _dma_is_highr     s    	,e,	,,r   c           
        t        j                  | |      \	  }}}}}	}
}}}t        j                  |      }t        j                  |      }|j                  r|j                  st        d      t        j                  |	      }|j                  rt        d      |*t        j                  |      j                  rt        d      t        j                  ||      }t        j                  ||      }|t        j                  ||      }t        j                  |	|
      }	|j                  j                  ||||	||||       g S )Nz'dma_start not implemented in LoJAX yet.)rw   r   ry   r   )
r   r   r%   get_avalr   NotImplementedErrorr
   r   
inner_avalr   )r   ry   r   r   argsrm   rn   ro   rp   rr   rs   rt   ru   rw   src_ref_avaldst_ref_avaldst_sem_avalsrc_transformed_refdst_transformed_refs                      r   _dma_start_to_lojaxr     sF    tT*
""7+,""7+,


<#7#7
G
HH""7+,
G
HH!)) IJJ,,WnE,,WnE""7,>?G  *<='###
 $ 	 
)r   c           	        |dk  rt        d|       t        | |      \	  }}}}}	}
}}}t        d |||	fD              st        d      |	j                  }|
r|
d   j	                         }|rt        d|       |Vt        |t        j                        st        d      |j                  }|r|d   j	                         }|rt        d|       g t        |||
|||      fS )Nr   z)DMA start priority must be non-negative: c              3  P   K   | ]  }t        |t        j                           y wr^   )rY   r
   AbstractRef)r   r   s     r   r   z+_dma_start_abstract_eval.<locals>.<genexpr>  s!      1!Z5,,- 1s   $&z8DMA source/destination/semaphore arguments must be Refs.rG   z,Cannot signal on a non-()-shaped semaphore: z#DMA source semaphore must be a Ref.)	r#   r   allr"   get_indexer_shaperY   r
   r   r   )r   ry   r   r   r   r   r   r   r   r   r   r   src_sem_transforms_avalsr   dst_sem_shapesrc_sem_shapes                   r   _dma_start_abstract_evalr     s?   \
@
K
LL T4 
	 1L,80 1 
1
BD D$$-,R0BBDM

6}oF  lE$5$56
/1 1 &&M.r2DDFm8
H  
 
 r   c                :   | j                   }| j                  d   }| j                  d   }| j                  d   }t        ||      \	  }}}	}
}}}}}~|s|rt        j                  | ||      S t        j                  t        j                  d| |rdnd d      t        j                  d      t        j                  |||      t        j                  d	      t        j                  ||	|
      t        j                  d      t        j                  |||      g      S )
Nr   r   r   zdma_start(pz, add r     z -> )
invarsr   r   r%   _pp_eqnppconcattextsppp_ref_transforms)eqncontextsettingsr  r   r   r   rm   rn   ro   rp   rr   rs   rt   ru   rw   s                   r   _dma_start_pp_eqnr    s    ::&	F	$ZZ
#(

5# T6"
	C(33	ggH:g"%=Q?@ggcl7G^<ggfo7G^<ggcl7G-?@ 
 r   c          	     	  *+,-./012 ~|rt        d      t        ||      \	  }}	}
,}+}1}t        ||      \	  }}}}*}0}}~t        ||       ^}}}}}}}|d u}d }|r|d   }|s|J 1J t        t        j                  |            }t        t        j                  |            }t        t        j                  |            }t        t        j                  |            }t        j                  |d   |	      22.|rt        j                         }|j                  D cg c]  }||	 }}t        |t              r}|t        j                  j                  urt        d|d      g }|D ]@  } |j!                  |j#                  | t$        j&                  j)                  |                    B t+        |      }|t        j                  j,                  k(  r>t        |      dkD  rt        d      |d   /t$        j&                  j)                  /      }!n|t        j                  j                  k(  rd}"t        |t$        j.                        r|j0                  }"nt3        |d      rt        |      }"|"t        |j                        k7  r%t        d	|" d
t        |j                         d      |"dkD  st        |      dkD  rt        d      |d   /t$        j&                  j)                  /      }!nt        d|       t$        j&                  j5                  |/      |!k(  }#t7        j8                  |#d      -t$        j&                  j5                  2/      }$t$        j&                  j;                  |$-dd      2t        j<                  /fd,      }%t        j<                  -fd|%      ,|
f,2fd	}&|f*+2fd	}'|f.01fd	}(d})|)d|z  z  })|)|r |&       nd fz  })|)d|z  z  })|)|r |'       nd fz  })|)d|z  z  })|r|)|r |(       nd fz  })|)d|z  z  })|)dz  })t        |)      t        |      k(  s J t        |)      |)f dt        |              |s!t?        j@                  |
d  |&|
d                |s!t?        j@                  |d  |'|d                |r#|s!t?        j@                  |d  |(|d                |)g fS c c}w )Nz3DMA partial discharge add=True not yet implemented.r   .zN`device_id_type` must be MESH if `device_id` is a dict, got: device_id_type = .r*   zaSharding with more than one named axis not implemented in dma_start_p for LOGICAL device_id_type.__len__zdevice_id (z) and mesh (z) must have same length.zFMeshes with more than 1 named dimension not implemented in dma_start_pzUnknown device_id_type: )r   Fr   keepdimsc                D    t         j                  j                  |       S r^   )rJ   rK   
all_gather)r   
shard_axiss    r   r   z2dma_start_partial_discharge_rule.<locals>.<lambda>~  s    #''$$Q
3 r   c                J    t         j                  j                  | dd      S )Nr   Fr  )rJ   rK   dynamic_index_in_dim)r   indexs    r   r   z2dma_start_partial_discharge_rule.<locals>.<lambda>  s$    #''..u1u / 
 r   c                <    t        j                  |       \  }}|S r^   )state_dischargetransform_swap_array)ro   r`   retrp   updatess      r   do_discharge_dstz:dma_start_partial_discharge_rule.<locals>.do_discharge_dst  s%    11FAs Jr   c                   t        j                  j                  t        j                        }t        j
                  |t        j                        }t        j                  |       }t        j                  | |d   |z         \  }}|S Nr'   .r,   minimumsizepl_coreSEMAPHORE_MAX_VALUEarraySEMAPHORE_INTERPRET_DTYPEr   _transform_semaphorer  r  )rr   	recv_sizedst_sem_valuer`   r  r   rs   r  s        r   do_discharge_dst_semz>dma_start_partial_discharge_rule.<locals>.do_discharge_dst_sem  sz    GLL'*E*EFI		)7+L+LMI33#\M 11#]3%7)%CFAs Jr   c                   t        j                  j                  t        j                        }t        j
                  |t        j                        }t        j                  |       }t        j                  | |d   |z         \  }}|S r  r   )rt   	send_sizesrc_sem_valuer`   r  	local_srcr   ru   s        r   do_discharge_src_semz>dma_start_partial_discharge_rule.<locals>.do_discharge_src_sem  sz    INNG,G,GHI		)7+L+LMI33#\M 11#]3%7)%CFAs Jr   r^    != )ro   )rr   r   )!r   r   r!   r   r   r  transform_arrayr%   get_axis_env
axis_sizesrY   r   r   r   r   r#   appendgetrJ   rK   
axis_indexr   LOGICALArrayr"  hasattrr  r,   argmaxr  tree_mapr  ref_set)3should_dischargein_avals	out_avalsr   ry   r   r   r   rm   rn   ro   rr   rt   rw   r`   r   r   r   r   dst_dischargedst_sem_dischargemaybe_src_sem_discharger   src_sem_dischargenum_src_sem_transformsnum_dst_sem_transformsnum_src_transform_valsnum_dst_transform_valsaxis_envnamenonempty_axesdevice_id_listr   my_axisdevice_id_lenwho_copy_to_meglobal_updatesglobal_dst_transformsr  r*  r/  new_valsr   rs   rp   r  r.  r  r   ru   r  s3                                             @@@@@@@@@r    dma_start_partial_discharge_rulerR    s3   
 
=? ? T4 
 T8$
 T+,t#)/2	??%%%y445MNOy445MNOy445IJKy445IJK++GCL.I') $$&H&.&9&9NdT=MTNMN)T"	z66;;	;'#'q*
 	
 n M$immD#''2D2DT2JKLM'i00888	]	a	! #4 5 	5 !#j"":.g	:2277	7m	Isyy	)!9i(I	#h112	2-S9L9L5M4N O% %& 	& 
	c-014! #? @ 	@ #j"":.g1.1ABCCWW''	:>'IN JJ~A.EWW''<Ngg**A + 7G
 &..3^ ''	
 		N  '  $+ 	 $+ 	 (
g...(
]!>>(
g...(
):#%FF(
g...(+<%'$HHH000HH
h-
h- J ]H45T#h-IJ 
 
JJw.ws|DE	JJw273<HI(JJw273<HI	2[ Os   'S/Sdma_waitc           	        t        j                  | |      \	  }}}}}}}	}
}t        j                  |      }t        j                  |      }|j                  r|j                  st        d      t        j                  |      }|j                  rt        d      |	*t        j                  |	      j                  rt        d      t        j                  ||      }t        j                  ||      }|	t        j                  |	|
      }	t        j                  ||      }|j                  j                  |||	|||       g S )Nz&dma_wait not implemented in LoJAX yet.)rw   ry   )
r   r   r%   r   r   r   r
   r   r   rS  )r   ry   r   rm   rn   ro   rp   rr   rs   rt   ru   rw   r   r   r   r   r   s                    r   _dma_wait_to_lojaxrU    s@    tT*
""7+,""7+,


<#7#7
F
GG""7+,
F
GG!)) HII,,WnE,,WnE""7,>?G  *<='""# #  
)r   c           	     R    t        | |      \	  }}}}}}}	}
}g t        ||||	||      fS r^   )r   r   )r   ry   r   r   r   r   r   r   r   r   r   r   s               r   _dma_wait_abstract_evalrW    sU     T4 
	 
 r   c                `   ~| j                   }| j                  d   }t        ||      \	  }}}}}}	}}}t        j                  t        j
                  d      t        j
                  d      t        j                  |||      t        j
                  d      t        j                  |||	      g      S )Nr   rS  r   )r  r   r   r  r  r  r  r  )
r  r	  r
  r  r   r`   r   r   semsem_transformss
             r   _dma_wait_pp_eqnr[    s     ::&	F	$ T6"
			ggjggcl7C4ggcl7C8 
 r   c          	        ~~t        ||      \	  }}}}}	}
}}}t        ||      \	  }}}}}}}}}t        ||       }|d   sdt        |      z  g fS t        t        j                  |            }t        t        j                  |            }t	        j
                  |d   |      }t        j                  |j                  t        j                        }t        j                  |t        j                        }t        j                  |	|
|      }t	        j                  |	|
||z
        \  }}d}|dt        t        j                  |            z  z  }|dz  }|d|z  z  }||fz  }|d|z  z  }|dt        t        j                  |            z  z  }|dt        t        j                  |            z  z  }|dt        t        j                  |            z  z  }|g fS )N   r^   .r  )r   r!   r   r   r  r1  r,   r!  r"  r#  r$  r%  r&  r   r'  r  )r=  r>  r?  r   ry   r   r`   ro   dst_ref_transformsrr   rs   src_ref_transforms_avalsdst_ref_transforms_avalsr   r   r   r   r   should_discharge_unflattenednum_sem_transformsnum_transformsr  	copy_size	sem_valuenew_semrQ  s                             r   dma_wait_partial_discharge_rulerg    s    T4  J!Q#W.@!Q T8$
 "06F!G	%a	(S]"B&&9001IJKy,,-EFG.++GCL:LM'kk',,(C(CD)ii	)J)JK)--g7I<X)33!9y#8*!W (
gI112JKLLL(
g(
g&&(
wj(
g***(
gI11,?@@@(
gI112JKLLL(
gI11.ABBB(	2r   c                n    t        | t        j                        r| j                  | j                  fS | dfS Nr_   )rY   r
   r   r   r   r   s    r   _get_ref_and_transformsrk  M  s.    U))*77CNN""	b.r   c                    t        |       \  } }t        |      \  }}t        |      \  }}t        | |||||dddt        j                  j                  
      S )zCreates a description of an asynchronous copy operation.

  Args:
    src_ref: The source Reference.
    dst_ref: The destination Reference.
    sem: The semaphore used to track completion of the copy.

  Returns:
    An AsyncCopyDescriptor.
  N)rk  rl   r   r   r   )rm   ro   rY  rn   rp   rZ  s         r   make_async_copyrm  S  sh     4G<'>3G<'>/4#~		


""
 r   Fr   r   c               F    t        | ||      }|j                  ||       |S )z-Issues a DMA copying from src_ref to dst_ref.rn  )rm  r   )rm   ro   rY  r   r   copy_descriptors         r   
async_copyrq  o  s+     $GWc:/s3	r   c                   t        |       \  } }t        |      \  }}t        |      \  }}t        |      \  }}	|t        j                  j                  k(  rt	        |t
        t        z        rJ d       t        | |||||	||||
      S )a  Creates a description of a remote copy operation.

  Copies data from src_ref on the current device to dst_ref on the device
  specified by device_id. Both semaphores should be waited on using the
  descriptor on both source and target devices.

  Note that device_id can also refer to the current device.

  Args:
    src_ref: The source Reference.
    dst_ref: The destination Reference.
    send_sem: The semaphore on the source device.
    recv_sem: The semaphore on the destination device.
    device_id: The device id of the destination device. It could be a tuple, or
      a dictionary specifying the communication axis and destination index.
    device_id_type: The type of the device id.

  Returns:
    An AsyncCopyDescriptor.
  zELOGICAL device_id_type does not support device_id as a tuple or dict.)ry   )rk  r   r   r7  rY   r   r   rl   )
rm   ro   send_semrecv_semrw   ry   rn   send_sem_transformsrp   recv_sem_transformss
             r   make_async_remote_copyrw  x  s    8 4G<'>"9("C(3G<'>"9("C(z..66654< ONO  
#
 r   c                F    t        | |||||      }|j                          |S )z4Issues a remote DMA copying from src_ref to dst_ref.)rw  r   )rm   ro   rs  rt  rw   ry   rp  s          r   async_remote_copyry    s.     +7GXx+4nF/	r   get_barrier_semaphorec                     t        j                  t        j                  dt	        j
                               t        j                  j                        S ri  )	r
   r   r%   r&   r#  BarrierSemaphoretpu_coreMemorySpace	SEMAPHOREr_   r   r   $_get_barrier_semaphore_abstract_evalr    s=    			2w779:$$
 r   c                 *    t         j                         S )a  Returns a barrier semaphore.

  This function returns a barrier semaphore based on the collective_id of the
  current pallas kernel.

  It's very important that the semaphore is wait-ed back down to 0, or else the
  semaphores will become corrupted.

  It's also very important that the collective_id is different for each pallas
  kernel with communication. E.g. if you have two pallas kernels, one that syncs
  across the X axis of the device mesh and the second that syncs across the Y
  axis, they must have different collective_ids.
  However it is legal for two kernels that perform the same synchronization
  pattern (e.g. only communicating with neighbours on the same mesh axis)
  to share a collective_id. However, if in doubt, prefer not sharing
  collective_ids, as doing so incorrectly can lead to silent data corruption or
  crashes.
  Note that reusing the same collective_id doesn't guarantee that the same
  semaphore is provided by XLA.
  )get_barrier_semaphore_pr   r_   r   r   rz  rz    s    * 
!	%	%	''r   	prng_seedc                     g S r^   r_   )r`   s    r   _prng_seed_abstract_evalr    s    	)r   c                 (    t        j                  |   y)zSets the seed for PRNG.

  Args:
    seeds: One or more integer seeds for setting the PRNG seed. If
      more than one seed is passed in, the seed material will be
      mixed before setting the internal PRNG state.
  N)prng_seed_pr   )seedss    r   r  r    s     Er   prng_random_bitsc                T    t        j                  | t        j                  d            S )Nint32)r%   r&   r,   r'   r"   s    r   _prng_random_bits_abstract_evalr    s    			eSYYw%7	88r   c                .    t         j                  |       S )Nr  )prng_random_bits_pr   r  s    r   r  r    s    		 	 u	 	--r   
prng_splitc                    | j                   j                  j                  }t        |      dk7  s|d   dk7  rt	        d|       t        j                  dt        j                   d            g|d   z  S )Nr;   r   r*   Key shape must be (1, N), got r_   uint32)r'   _impl	key_shaper!   r#   r%   r&   r,   )seedr  s     r   _split_key_scalar_abstract_evalr    si    jj(()^qIaLA-
5i[A
BB


r399X#6
7	89Q<	GGr   c                ,    t         j                  |       S )z.Splits a PRNG key into it's scalar components.)split_key_pr   )r  s    r   unwrap_pallas_seedr    s    			$	r   	prng_joinc                `   t        | j                        dk7  s| j                  d   dk7  rt        d| j                         t        |      | j                  d   k7  r(t        dt        |       d| j                  d    d      t        j                  dt        j                  |       	      S )
Nr;   r   r*   r  z*Number of seeds must match key shape, got r0  r  r_   r  )r!   r  r#   r%   r&   jax_prngKeyTyimplr  s     r   _join_key_scalar_abstract_evalr    s    A!2a!7
5dnn5EF
GGZ4>>!$$

4SZL
t~~a !	$  
		bt(<	==r   c                V    t        j                  |       } t        j                  |d| iS )z$Joins scalar into a single PRNG key.r  )
jax_randomresolve_prng_impl
join_key_pr   r  s     r   wrap_pallas_seedr    s&    		%	%d	+$	%	+d	++r   stochastic_roundc               2    t         j                  | ||      S )N)target_dtype)stochastic_round_pr   r   random_bitsr  s      r   r  r  (  s    		 	 Kl	 	KKr   c               2   |j                   | j                   k7  r%t        d|j                    d| j                          |j                  t        j                  d      k7  rt        d|j                         t	        j
                  | j                   |      S )NzUThe shape of `random_bits` must match the shape of `x` for stochastic_round, but got  and r  zHThe dtype of `random_bits` must be uint32 for stochastic_round, but got )r"   r#   r'   r,   r%   r&   r  s      r   _stochastic_round_abstract_evalr  ,  s    !''!
	%%0%6%6$7uQWWI	G  #))H--
	$$%	'  
		agg|	44r   c                    t        j                  |       }t        j                  |      }||z  dk7  rt        d| d|       ||z  S )Nr   z=Unpacked bitwidth must be a multiple of packed bitwidth, got r  )r   r?   r#   )unpacked_dtypepacked_dtypeunpacked_bitwidthpacked_bitwidths       r   _get_elementwise_packing_factorr  :  s_    **>:((6/(A-
G
U?"3	5  
o	--r   pack_elementwisec               ,    t        j                  | d|iS )Nr  )pack_elementwise_pr   )rg   r  s     r   r  r  G  s    		 	 "	@<	@@r   c                |   |st        d      |d   t        fd|D              st        d      t        fd|D              st        d      t        j                  |       }t	        |      |k7  rt        d| dt	        |             t        j                  j                  t        j                        S )	NzAt least one source is requiredr   c              3  P   K   | ]  }|j                   j                   k(    y wr^   r  r   r   firsts     r   r   z2_pack_elementwise_abstract_eval.<locals>.<genexpr>P       0QWW#0   #&z$All sources must have the same shapec              3  P   K   | ]  }|j                   j                   k(    y wr^   r  r  s     r   r   z2_pack_elementwise_abstract_eval.<locals>.<genexpr>R  r  r  z$All sources must have the same dtypez5The number of sources must match the packing factor (z), got )
r#   r   r  r'   r!   r%   r&   r"   r,   r  )r  rg   packing_factorr  s      @r   _pack_elementwise_abstract_evalr  K  s    	
6
77
Q%%	0R0	0
;
<<	0R0	0
;
<<25;;M.W
	73r7)	-  
		ekk3::	66r   unpack_elementwisec               4    t         j                  | |||      S )N)r  r  r  )unpack_elementwise_pr   )r   r  r  r  s       r   r  r  `  s#    		"	"u< 
# 
 r   c                   | j                   t        j                  k7  rt        d| j                          t	        ||      }|dk  s||k\  rt        d| d|       t        j                  | j                  |      S )NzSource must be uint32, got r   zIndex z% is out of bounds for packing factor )r'   r,   r  r#   r  r%   r&   r"   )r   r  r  r  r  s        r   !_unpack_elementwise_abstract_evalr  f  s{    WW


2177)<
==2><P.
QY%>)

<^<LMO O			agg~	66r   c                4   |t         j                  j                  u r| S |t        j                  j                  t        j                  j
                  t        j                  j                  hvrt        d      t         j                  j                  | |      S )ac  Constrains the memory space of an array.

  This primitive does not change the value of ``x``, but it constrains the
  memory space where it should be allocated. This is useful to force
  Pallas to allocate an array in a specific memory space.

  As of now, this only operates on the inputs pallas_calls, as in you can
  apply this to the arguments of a pallas_call and it will constrain them, but
  other operations will not respect this constraint.

  Args:
    x: The array to constrain.
    memory_space: The memory space to constrain to.

  Returns:
    The array ``x`` with the memory space constraint.
  z>with_memory_space_constraint only supports HBM, VMEM and SMEM.)memory_space)
r#  r~  ANYr}  HBMVMEMSMEMr   with_memory_space_constraint_pr   )r   r  s     r   with_memory_space_constraintr  q  s    ( W((,,,H 
 H  
	/	/	4	4l 
5 
$ $r   maskc               2    t        j                  | d|      S )a  Loads an array from the given ref.

  If ``mask`` is not specified, this function has the same semantics as
  ``ref[idx]`` in JAX.

  Args:
    ref: The ref to load from.
    mask: An optional boolean mask specifying which indices to load.

  Returns:
    The loaded array.
  Nr  )r   load)r   r  s     r   r  r    s     
d	..r   c               4    t        j                  | d||      S )a  Stores a value to the given ref.

  If ``mask`` is not specified, this function has the same semantics as
  ``ref[idx] = val`` in JAX.

  Args:
    ref: The ref to store to.
    val: The value to store.
    mask: An optional boolean mask specifying which indices to store.
  Nr  )r   store)r   valr  s      r   r  r    s     
		#tSt	44r   add_dependencyc                    t         j                  j                  |       }|D  cg c]*  } t        | t        j
                        r| j                  n| , }} |D ]  } t        j                  |         yc c} w )z3Adds a fake read-write dependency to the given ref.N)	rJ   r   leavesrY   r
   r   r   touch_pr   )r   
ref_leavess     r   touchr    si    xxs#*%' &c5+?+?@cI '* ' cLL's   /A2c                Z    g t        j                  d      t        j                  d      hfS )Nr   )r
   r   r   rj  s    r   _touch_abstract_evalr    s(    	eq!5#4#4Q#78	88r   )r   	jax.Arrayr   rZ   r   rZ   r   r  )r   r  r   rZ   r   rZ   )r7   zmlir.LoweringRuleContext)r   r  r=   r   r   r  )r   r  r\   zjax.Array | intr   rZ   rV   
int | NonerW   r  r   r  )r   r   r   r   )r  zjax_core.JaxprEqnr	  zjax_core.JaxprPpContextr
  zjax_core.JaxprPpSettings)r   rl   )r   rZ   r   r~   r   rl   )rw   rv   ry   rx   r   rl   )ry   rx   r   rl   )r  rq   r   None)r   r  r  r   r   r  )r   Refr  jax.Array | Noner   r  )r   r  r  r  r  r  r   r  )r   z jax.Array | state.TransformedRefr   r  )r   r  )r   
__future__r   r   r5   r   typingr   rJ   jax._srcr   r%   r   r   r  r   r  r	   r  r
   r   r   jax._src.interpretersr   jax._src.pallasr#  r   jax._src.pallas.mosaicr}  jax._src.stater   r  r   r  jax._src.state.typesr   jax._src.typingr   	jax.numpynumpyr,   Slicesafe_mapr   
unsafe_mapsafe_zipzip
unsafe_ziprZ   r8  IntDeviceIdr   r   strMultiDimDeviceIdr   r   r  	Primitiver   r   def_abstract_evalr(   def_implr0   r8   register_loweringr@   r9   rD   rT   r[   rU   ra   rj   	dataclassrl   r   r   r   r   register_dataclassr   r   r   r3   r   r   r   to_lojaxdef_effectful_abstract_evalr   r  pp_eqn_rulesrR  register_partial_discharge_ruler   rU  rW  r[  rg  rk  rm  rq  r   r   rw  ry  r  r  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r_   r   r   <module>r     sv   C "     
 %  ) % )    & + & 3 7 # + * % --Z--ZCIIoc)*T#c3h2G2T-UU %...8h'6 
. . 
 

   x!6 7Hy)	" ) )B&   y"8 9			F	# " 
   4  
F	!F   v2 3 [ [ [|02 !!d#;-- ; $ ";D !h  -# - # &N + ((+ )+Z6 8@ &7  k "od = / / / <=] ^  X
+
"
 !
 #H )
 '' (*572 %5  j !-\ < / / /
 ;<[ \: /0U(+6: /9.E.E.J.J0
 50 ,0 0r /9.E.E.J.J ,  -(,,-DE ** +(2 !h  -#    (X''  %%9 &9. !h  .#  H H 
  X,
 > >, (X''(:; L %%5 &5. (X''(:; A %%7 &7" *x))*>?  ''7 (7$$ #$$D 04 /  AE 5 (

-
.  	$$9 %9r   