
    uki                       d dl mZ d dlZd dlZd dlmZmZ d dlZd dlZd dl	Z	d dl
Z
d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ  d dl!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z; d dl<Z=e j|                  j                   Z?e@eAdf   ZBe j                  ZCe@eDdf   ZEe@eCdf   ZFej                  ZHdNdZI ej                  dd      dOd       ZKej                   G d d             ZMe j                  j                         ZPd ZQ e:e j                         G d d ej                               ZRd!eR_S         ej                  dd      	 	 	 	 dPd"       ZT e:e j                         G d# d$ej                               ZUd!eU_S        d% ZV e:e j                         G d& d'ej                               ZWeZX	 dQd(ZY G d) d*e      ZZ ej                  d+,       G d- d.             Z[ ej                  d+,       G d/ d0             Z\ ej                  d+,       G d1 d2             Z]d3 Z^d4 Z_d5 Z`d6 Za	 	 	 	 	 	 dRd7ZbdSd8Zc G d9 d:ed      Ze ej                  dd      	 	 	 	 	 	 	 	 dTd;       Zf	 	 	 	 	 	 dUd<Zg	 	 	 	 	 	 	 	 dVd=ZhdWd>ZidXd?Zjd@ ZkdYdAZldB ZmdZdCZnd[dDZo ej                         	 d\	 	 	 	 	 	 	 d]dE       Zp	 	 	 	 	 	 d^dFZq ej                         	 d_	 	 	 	 	 d`dG       Zr	 d\ddH	 	 	 	 	 dadIZs G dJ dK      ZtdbdLZuej                  dcdM       Zwy)d    )annotationsN)MappingSequence)Any
NamedTuplecast)config)core)meshsharding)sharding_specs)	tree_util)util)source_info_util)
xla_bridge)
mesh_utils)deprecations)
xla_client)sdy)SdyArraySdyDimUnspecifiedValueAUTOflatten_specNamedSharding_check_unique_resourcesUNSPECIFIEDArrayMappingArrayMappingOrAutoOrUnspecifiedget_array_mappingarray_mapping_to_axis_resources"named_sharding_to_xla_hlo_sharding"modify_sdy_sharding_wrt_axis_types)are_hlo_shardings_equalget_num_ways_dim_shardedis_hlo_sharding_replicated)PartitionSpec)safe_zipuse_cpp_classuse_cpp_method.c                `    t        d | D              sJ t        t        d | D                    S )Nc              3  Z   K   | ]#  }t        |t              s|j                  d u  % y wN)
isinstanceslicestep.0vs     R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/_src/sharding_impls.py	<genexpr>zhashed_index.<locals>.<genexpr>A   s!     ?*Q*>QVVt^?s   ++c              3  p   K   | ].  }t        |t              r|j                  |j                  fn| 0 y wr.   )r/   r0   startstopr2   s     r5   r6   zhashed_index.<locals>.<genexpr>B   s+     PAu)=QWWaff%1DPs   46)allhashtuplexs    r5   hashed_indexr?   >   s.     
?Q?	??	?	ePaPP	QQ    i   F)max_sizetrace_context_in_keyc                   	 | j                   }t        j                         }i } ||      j                         D ]'  \  }}t        |      }||   }||xx   dz  cc<   |||<   ) |S # t        $ r t        d|  d      d w xY w)Nz,Cannot calculate replica ids from sharding: zf. Please create a device to index mapping for your sharding from which replica ids will be calculated.   )devices_indices_mapAttributeError
ValueErrorcollectionsCounteritemsr?   )	r   global_shapedevice_indices_map_fnindex_to_replicaoutdeviceindexh_index
replica_ids	            r5   device_replica_id_maprS   E   s    -$88 &1%8%8%:
#,\:@@B mfe5!G!'*JW"CK	
 
* 
 -

6xj A" 	"# )---s   A& &B c                       e Zd ZU ded<   ddZy)SdyArrayListzSequence[SdyArray]	shardingsc                    t         j                  j                  | j                  D cg c]  }|j	                          c}      S c c}w r.   )r   TensorShardingPerValueAttrgetrV   build)selfr   s     r5   rZ   zSdyArrayList.build]   s7    ))--*...9h	9; ;9s   AN)returnzsdy.TensorShardingPerValueAttr)__name__
__module____qualname____annotations__rZ    r@   r5   rU   rU   Y   s    ;r@   rU   c                    t        | |      S Nmemory_kind)SingleDeviceSharding)rO   re   s     r5    _unpickle_single_device_shardingrg   e   s    	f+	>>r@   c                      e Zd ZU dZded<   ded<    e       dddd       Zd	 Zd
 Zd Z	d Z
edd       Zedd       Zedd       ZddZddZedd       ZddZd dZed!d       Zed!d       Zd"dZy)#rf   zA :class:`Sharding` that places its data on a single device.

  Args:
    device: A single :py:class:`Device`.

  Examples:

    >>> single_device_sharding = jax.sharding.SingleDeviceSharding(
    ...     jax.devices()[0])
  Device_device
str | None_memory_kindNrd   c                    || _         || _        y r.   )rj   rl   )r[   rO   re   s      r5   __init__zSingleDeviceSharding.__init__y   s    DL#Dr@   c                >    t         | j                  | j                  ffS r.   )rg   rj   rl   r[   s    r5   
__reduce__zSingleDeviceSharding.__reduce__~   s    ,t||T=N=N.OPPr@   c                `    | j                   dnd| j                    }d| j                  | dS )N , memory_kind=zSingleDeviceSharding(device=))rl   rj   r[   mems     r5   __repr__zSingleDeviceSharding.__repr__   s:    !!)"@Q@Q?R/SC)$,,)9#a@@r@   c                ~    t        | d      s&t        | j                  | j                  f      | _        | j                  S N_hash)hasattrr;   rj   re   r{   rp   s    r5   __hash__zSingleDeviceSharding.__hash__   s1    4!t'7'789dj::r@   c                    t        |t              sy| |u ry| j                  |j                  k(  xr | j                  |j                  k(  S NFT)r/   rf   rj   re   r[   others     r5   __eq__zSingleDeviceSharding.__eq__   sH    e12u}LLEMM) 2 1 113r@   c                ,    t        | j                        S r.   len
device_setrp   s    r5   num_devicesz SingleDeviceSharding.num_devices       tr@   c                    | j                   hS r.   rj   rp   s    r5   r   zSingleDeviceSharding.device_set   s    LL>r@   c                    | j                   S r.   rl   rp   s    r5   re   z SingleDeviceSharding.memory_kind       r@   c                0    t        | j                  |      S rc   )rf   rj   r[   kinds     r5   with_memory_kindz%SingleDeviceSharding.with_memory_kind   s    $??r@   c                J    | j                   t        d       ft        |      z  iS r.   )rj   r0   r   r[   rK   s     r5   rE   z(SingleDeviceSharding.devices_indices_map   s!    LL5;.3|+<<==r@   c                    | j                   fS r.   r   rp   s    r5   _device_assignmentz'SingleDeviceSharding._device_assignment   s    LL?r@   c                    t         S r.   replicated_hlo_shardingr[   num_dimensionss     r5   _to_xla_hlo_shardingz)SingleDeviceSharding._to_xla_hlo_sharding   s    ""r@   c                l    t        |      D cg c]  }t        g d       }}t        d |      S c c}w )NF)axesis_open)
mesh_shapedim_shardings)ranger   r   )r[   r   _sdy_dim_shardings       r5   _to_sdy_shardingz%SingleDeviceSharding._to_sdy_sharding   s?    !&~!68 B6 8 8t3CDD8s   1c                     y)NTra   rp   s    r5   is_fully_replicatedz(SingleDeviceSharding.is_fully_replicated   s    r@   c                    t        j                  | j                  j                        | j                  j                  k(  S r.   )xbprocess_indexrj   clientrp   s    r5   is_fully_addressablez)SingleDeviceSharding.is_fully_addressable   s,    DLL//0DLL4N4NNNr@   c                     y r.   ra   r[   
aval_shapes     r5   check_compatible_avalz*SingleDeviceSharding.check_compatible_aval       
r@   )rO   ri   re   rk   r\   intr\   zset[Device]r\   rk   )r   strr\   rf   rK   Shaper\   zMapping[Device, Index]r\   XLADeviceAssignmentr   r   r\   xc.HloShardingr   r   r\   r   r\   boolr   r   r\   None)r]   r^   r_   __doc__r`   r+   rn   rq   rx   r}   r   propertyr   r   re   r   rE   r   r   r   r   r   r   ra   r@   r5   rf   rf   i   s    	 /BF $ $QA
3        @>  #E
   O Or@   rf   zjax.shardingc                    | j                  |       t        j                  || j                        }t	        t        | j                  j                  |            S r.   )shard_shaper   spec_to_indicessharding_specdictr)   devicesflat)r[   rK   indicess      r5   !pmap_sharding_devices_indices_mapr      sF     < **<9K9KL'	ht||(('2	33r@   c                     e Zd ZU ded<   ded<   ded<    e       	 	 dd       Zd Zd	 Zd
 Zd Z	d Z
	 	 	 	 ddZe	 	 d	 	 	 dd       Zed d       Zej"                  d!d       Zd"dZej"                  d#d       Zed$d       Zd%dZd&dZd'dZej"                  d(d       Zej"                  d(d       Zd)dZd*dZy)+PmapShardingz
np.ndarrayr   sharding_specs.ShardingSpecr   xc.DeviceList_internal_device_listc                F    t        j                  |      | _        || _        y r.   )npasarrayr   r   )r[   r   r   s      r5   rn   zPmapSharding.__init__   s     ::g&DL&Dr@   c                H    t        |       | j                  | j                  ffS r.   )typer   r   rp   s    r5   rq   zPmapSharding.__reduce__   s     Jt'9'9:;;r@   c                    t        |t              sy| |u ry| j                  |j                  k(  xrH | j                  j                  |j                  j                  k(  xr | j
                  |j
                  k(  S r   )r/   r   r   r   shaper   r   s     r5   r   zPmapSharding.__eq__   so    e\*u}%"5"55 FLL%--"5"55F&&%*E*EEGr@   c                ~    t        | d      s&t        | j                  | j                  f      | _        | j                  S rz   )r|   r;   r   r   r{   rp   s    r5   r}   zPmapSharding.__hash__   s3    4!33T5G5GHIdj::r@   c           	        | j                   j                  D cg c]  }|j                   }}d| j                   d|d| j                   j                  d   j                  j                          d| j                   j                   d	S c c}w )NPmapSharding(sharding_spec=z, device_ids=z, device_platform=r   z, device_shape=ru   )r   r   idr   platformupperr   )r[   d
device_idss      r5   __str__zPmapSharding.__str__   s     $ 1 121!$$2J2)$*<*<)= >m #||003<<BBDE F LL../q2 3 3s   B
c                <    d| j                    d| j                   dS )Nr   z
, devices=ru   )r   r   rp   s    r5   rx   zPmapSharding.__repr__   s*    )$*<*<)= >||nA' (r@   c                    | |k(  S r.   ra   )r[   r   ndims      r5   is_equivalent_tozPmapSharding.is_equivalent_to   s    5=r@   Nc           	        |J|t        d      t        |      } | t        j                  |      t	        j
                  |||d            S t	        j                  t        |      |      }d}|j                  D ]~  }t        |t        j                        r|J |j                  }.t        |t        j                        sI|J t        |j                        dk(  r|j                  d   }ut        d       |+t        j                  t        j                          d|       }nt        j                  |      } | ||      S )a  Creates a :class:`PmapSharding` which matches the default placement
    used by :func:`jax.pmap`.

    Args:
      shape: The shape of the input array.
      sharded_dim: Dimension the input array is sharded on. Defaults to 0.
      devices: Optional sequence of devices to use. If omitted, the implicit
        device order used by pmap is used, which is the order of
        :func:`jax.local_devices`.
    Nz*One of sharded_dim or devices must be set.rD   r   z3Multiple chunks in Chunked dimension not supported.)rG   r   r   arrayr   pmap_sharding_speccreate_pmap_sharding_specr<   r   r/   	UnstackedsizeChunkedchunksNotImplementedErrorr   local_devices)	clsr   sharded_dimr   nrepr   num_ways_shardedspmap_devicess	            r5   defaultzPmapSharding.default   s@    	EFF\d'"

+
+D$t
DF F
 #<<ek#M ## 
E	A~//	0'''66a//0'''qxx=AXXa[
#CE E
E !#"*:*:*<=N>N*O!PlXXg&l|]++r@   c                ,    t        | j                        S r.   r   rp   s    r5   r   zPmapSharding.num_devices  r   r@   c                @    t        | j                  j                        S r.   )setr   r   rp   s    r5   r   zPmapSharding.device_set#  s    t||  !!r@   c                    t        | |      S r.   )r   r   s     r5   rE   z PmapSharding.devices_indices_map'  s    ,T<@@r@   c                @    t        | j                  j                        S r.   )r<   r   r   rp   s    r5   r   zPmapSharding._device_assignment*  s    ""##r@   c                >    	 | j                   j                  S #  Y y xY wr.   )r   default_memory_kindrp   s    r5   re   zPmapSharding.memory_kind.  s#    '';;;s    c                    t        d      )Nzpmap does not support memories.r   r   s     r5   r   zPmapSharding.with_memory_kind5  s    
?
@@r@   c                    t        d      )Nzpmap doesn't use OpSharding.r   r   s     r5   r   z!PmapSharding._to_xla_hlo_sharding8  s    
<
==r@   c                    t        d      )Nzpmap doesn't use SdyArray.r   r   s     r5   r   zPmapSharding._to_sdy_sharding;  s    
:
;;r@   c                    | j                   j                  D ].  }t        |t        j                  t        j
                  f      s. y yr   )r   r   r/   r   r   r   )r[   r   s     r5   r   z PmapSharding.is_fully_replicated>  s@    (( 	A00.2H2HI	J r@   c                .    | j                   j                  S r.   r   r   rp   s    r5   r   z!PmapSharding.is_fully_addressableE      %%:::r@   c                     y r.   ra   r   s     r5   r   z"PmapSharding.check_compatible_avalI  r   r@   c                (   d }d }t        | j                  j                        D ]  \  }}t        |t        j
                        r&|}|j                  }t        j                  ||      } not        |t        j                        sa|}t        |j                        dk(  sJ |j                         |j                  d   }t        j                  ||d      } n ||S ||   |k7  r-t        d| d||    d| dt        | j                               S )NrD   r   zkThe sharded dimension must be equal to the number of devices passed to PmapSharding. Got sharded dimension z with value z
 in shape z and the number of devices=)	enumerater   r   r/   r   r   r   r   tuple_deleter   r   r   tuple_updaterG   r   )r[   rK   r   sharded_dim_sizeir   sharded_shapes          r5   r   zPmapSharding.shard_shapeL  s0   K$,,556 1	A~//	066)),Da//0188}!+188+!88A;)),QG K $44CCN- P$[12*\N K##&t'>'>#?"@BC C
 r@   )r   zSequence[Device] | np.ndarrayr   r   )r[   r   r   r   r   r   r\   r   )r   N)r   r   r   z
int | Noner   Sequence[xc.Device] | Noner\   r   r   r   r   r   r   )r   r   r   r   r   r   )rK   r   r\   r   )r]   r^   r_   r`   r+   rn   rq   r   r}   r   rx   r   classmethodr   r   r   	functoolscached_propertyr   rE   r   re   r   r   r   r   r   r   r   ra   r@   r5   r   r      s;   ,,&&'9' '<G
3( %)
 ;<48),1),=I), ),V     " "A $ $  A><   ; ;r@   r   c                    t        | ||      S rc   )GSPMDSharding)r   op_shardingre   s      r5   _unpickle_gspmd_shardingr  h  s    	w	EEr@   c                  z   e Zd ZU ded<   ded<   ded<   ded<    e       dd		 	 	 dd
       Zd Zej                  d        Z	d Z
d Zd ZddZedd       Zej                  dd       Zedd       Zd dZed!d       Zd"dZd#dZej                  d$d       Zej                  d$d       Zedd	d%d       Zy)&r  r   _devicesr   _hlo_shardingrk   rl   r   Nrd   c                  t        |t        j                        r|nt        j                  t        |            | _        t        |t        j
                        rt        j                  j                  |      n|| _        || _	        y r.   )
r/   xc
DeviceListr<   r  
OpShardingHloSharding
from_protor  rl   )r[   r   r  re   s       r5   rn   zGSPMDSharding.__init__r  sd     !+7BMM BW]]5>2 	M (R]]C ..33K@% 	 $Dr@   c                p    t         | j                  | j                  j                         | j                  ffS r.   )r  r  r  to_protorl   rp   s    r5   rq   zGSPMDSharding.__reduce__}  s4    $]]D..7794;L;LMO Or@   c                b    | j                   rt        t              S t        | j                        S r.   )r   r;   r   r  rp   s    r5   _hlo_sharding_hashz GSPMDSharding._hlo_sharding_hash  s(    )**""##r@   c                    t        |t              sy| |u ryt        | j                  |j                        xr4 | j                  |j                  k(  xr | j
                  |j
                  k(  S r   )r/   r  r%   r  re   r   r   s     r5   r   zGSPMDSharding.__eq__  si    e]+u}#D$6$68K8KL J  E$5$55J**e.I.IIKr@   c                    t        | d      s1t        | j                  | j                  | j                  f      | _        | j
                  S rz   )r|   r;   r   r!  re   r{   rp   s    r5   r}   zGSPMDSharding.__hash__  s@    4!33T5L5L((* +dj::r@   c                `    | j                   dnd| j                    }d| j                  | dS )Nrs   rt   zGSPMDSharding(ru   )rl   r  rv   s     r5   rx   zGSPMDSharding.__repr__  s<    !!)"@Q@Q?R/SCD..1#a88r@   c           
         t        | j                        \  }}t        |      t        |      k  r&t        d|  dt        |       dt        |             y )Nz	Sharding z+ is only valid for values of rank at least z%, but was applied to a value of rank )r&   r  r   rG   )r[   r   num_ways_dim_shardedr   s       r5   r   z#GSPMDSharding.check_compatible_aval  sd    6t7I7IJ!
:122dVF%&''L_    3r@   c                ,    t        | j                        S r.   )r   r   rp   s    r5   r   zGSPMDSharding.num_devices  s    t))**r@   c                ,    t        | j                        S r.   )r   r  rp   s    r5   r   zGSPMDSharding.device_set  s    t}}r@   c                    | j                   S r.   r   rp   s    r5   re   zGSPMDSharding.memory_kind  r   r@   c                F    t        | j                  | j                  |      S rc   )r  r  r  r   s     r5   r   zGSPMDSharding.with_memory_kind  s    (:(:MMr@   c                ,    t        | j                        S r.   )r<   r  rp   s    r5   r   z GSPMDSharding._device_assignment  s    r@   c                    | j                   S r.   )r  r   s     r5   r   z"GSPMDSharding._to_xla_hlo_sharding  s    r@   c                   | j                   j                         rt        d| j                    d      | j                   j                         r9t	        j
                  dd      }t        |t                     j                  |      S | j                   j                         r| j                   j                         st        d| j                    d      t        | j                   j                               }t        d t        t        |            D              }t	        j
                  ||      }t        | |      j                  |      S t        d| j                    d      )NzCannot convert GSPMDSharding z into SdyArray.ra   c              3  &   K   | ]	  }d |   yw)_axis_Nra   r3   r  s     r5   r6   z1GSPMDSharding._to_sdy_sharding.<locals>.<genexpr>  s     F!6!F   )r  tuple_elements	TypeErroris_replicatedmesh_libAbstractMeshr   r(   r   is_tiledis_tile_assignment_iotar<   get_axis_sizesr   r   !_gspmd_to_named_sharding_via_mesh)r[   r   
empty_mesh
axis_sizes
axis_namesr   s         r5   r   zGSPMDSharding._to_sdy_sharding  sD   ((*)$*<*<)=_
MO O				)	)	+((R0j:}7HH
 				$	$	&779+D,>,>+?OQ 	Q++::<=jFuS_/EFFj"":z:d.tT:KK
  )$*<*<)=_
MO Or@   c                ,    t        | j                        S r.   )r'   r  rp   s    r5   r   z!GSPMDSharding.is_fully_replicated  s    %d&8&899r@   c                .    | j                   j                  S r.   r  rp   s    r5   r   z"GSPMDSharding.is_fully_addressable  r  r@   c                     | |t         |      S rc   r   )r   device_assignmentre   s      r5   get_replicatedzGSPMDSharding.get_replicated  s     "9&( (r@   )r   z Sequence[Device] | xc.DeviceListr  xc.OpSharding | xc.HloShardingre   rk   r   r   r   r   )r   r   r\   r  r   r   r   r   )re   rk   )r]   r^   r_   r`   r+   rn   rq   r  r  r!  r   r}   rx   r   r   r   r   re   r   r   r   r   r   r   r  rB  ra   r@   r5   r  r  k  s+   && -1$:$)$ $O $ $
K9  + +    N    O* : : ; ; JN ( (r@   r  c                &   t        j                  | d       \  }}| d}g }|D ]T  }t        |t        t        f      s||j                  |       .t        |t        j                        rt        |t              rt        d| d| d      t        |t              r(|j                  j                  rt        d| d| d      |s=t        |t              r-t        j                  |j                  v rt        d| d	|       |j                  |       t        |t              st!        | d
|       |s#t        j                  |v rt        d| d	|       t#        ||       |j                  |       W t        j$                  ||      S )Nc                
    | d u S r.   ra   r=   s    r5   <lambda>z(prepare_axis_resources.<locals>.<lambda>  s
    T	 r@   )is_leafz leaf specificationszOne of z got sharding z which is not allowed.z got an empty NamedSharding: z2Unconstrained dims are not allowed when passed to z: z= are expected to be PartitionSpec instances or None, but got )r   tree_flattenr/   r   r   append	jshardingShardingr   rG   r   r   emptyr(   UNCONSTRAINEDspecr3  r   tree_unflatten)axis_resourcesarg_nameallow_unconstrained_dimsentriestreedefwhatnew_entriesentrys           r5   prepare_axis_resourcesrX    s   ++13'7:)	*$+  e%*D12em	E9--	.	E<	(74&ug >$ $ % 	%	E=	)ejj.>.>74&(EeW M1 1 2 	2&:e]+K

%
%
3@
 Kw 	 }-4& !DDI7L M 	M%-*E*E*N@
 Kw 	 eX.3 6 
	!	!';	77r@   c                  0    e Zd ZU dZded<   ded<   ded<   y)	AxisEnvz5Represents a pmap mesh (only along the replica axes).r   nrepsztuple[Any, ...]namesztuple[int, ...]sizesNr]   r^   r_   r   r`   ra   r@   r5   rZ  rZ    s    =	*		r@   rZ  T)frozenc                  \    e Zd ZU dZded<    e       Zded<   ed        Zed        Z	d
dZ
y	)SPMDAxisContextzA hardware axis context for parallel computations that use the GSPMD partitioner.

  This includes the mesh that will later by used to execute this computation,
  as well as a set of mesh axes that are currently lowered in the MANUAL
  sharding mode.
  mesh_lib.Meshr   frozenset[MeshAxisName]manual_axesc                    | j                   S r.   )unsafe_axis_envrp   s    r5   axis_envzSPMDAxisContext.axis_env  s     r@   c                    t        | j                  j                  | j                  j                  t	        | j                  j
                  j                                     S )Nr[  r\  r]  )rZ  r   r   r=  r<   r   valuesrp   s    r5   rf  zSPMDAxisContext.unsafe_axis_env  s@    iinnii""DIIOO**,-/ /r@   c                H    t        | j                  | j                  |z        S r.   )ra  r   rd  )r[   r   s     r5   extend_manualzSPMDAxisContext.extend_manual$  s    499d&6&6&=>>r@   N)r   rc  r\   ra  )r]   r^   r_   r   r`   	frozensetrd  r   rg  rf  rl  ra   r@   r5   ra  ra    sG     	)2+&4   
 / /?r@   ra  c                      e Zd ZU dZded<   y)ReplicaAxisContextzA hardware axis context for parallel computations that are partitioned by JAX.

  Unlike in the SPMDAxisContext, this means that JAX might need to emit calls to
  explicit collectives.
  rZ  rg  Nr^  ra   r@   r5   ro  ro  (  s    
 r@   ro  c                  N    e Zd ZU dZded<   dZded<   dZded<   d	 Zed
        Z	y)ShardingContextzA hardware axis context for parallel computations that use the sharding
  interface.

  This context also uses the GSPMD partitioner.
  r   r   Nztuple[xc.Device, ...] | NonerA  zmesh_lib.AbstractMesh | Noneabstract_meshc                    | j                   At        | j                   t              sJ | j                  t	        | j                         k(  sJ y y r.   )rA  r/   r<   r   r   rp   s    r5   __post_init__zShardingContext.__post_init__=  sH    )..666T%;%;!<<<< *r@   c                    t        ddd      S )NrD   ra   ri  )rZ  rp   s    r5   rg  zShardingContext.axis_envC  s    "B//r@   )
r]   r^   r_   r   r`   rA  rr  rt  r   rg  ra   r@   r5   rq  rq  2  s?    
 481804--4= 0 0r@   rq  c                p    t        j                  | ddd         ddd   t        j                  |       z  S )z5Returns an array of strides for major-to-minor sizes.N)r   cumprodr   )r]  s    r5   strides_for_sizesry  M  s0    	E$B$K	 2	&"**U*;	;;r@   c                   | j                         D ci c]  \  }}|dk7  s|| } }}t        j                  | j                         t        j                        }t        |      }t        |t        |            }t        |||       D ci c]  \  }}}||f| }}}}|D 	cg c]  }	||	   	 c}	S c c}}w c c}}}w c c}	w )ai  Recovers the ordering of axis names based on a device assignment.

  The device assignments that this function can convert into axis orders
  are of the form::

    np.arange(np.prod(named_sizes.values())).transpose(...).flatten()

  for some transposition ``...``. This is satisfied by all OpSharding assignments
  generated from partition specs.

  Arguments:
    named_sizes: A dictionary mapping axis names to their sizes.
    assignment: A permutation of integers between 0 and the product of all
      named sizes.

  Returns:
    A major-to-minor list of axis names that corresponds to the given assignment.
  rD   dtype)	rJ   r   fromiterrj  int64ry  explode_superdimsunflatten_superdimszip)
named_sizes
assignmentnamer   r]  stridesdimsstridedim_to_namer   s
             r5   unflatten_arrayr  Q  s    & /:.?.?.AO
dTQYtO+O
++k((*"((
;%e$'	5"5j"A	B$@CE7T_@`aa*<$$%a+a"&	'Q+a.	'' P b	's   B5B5B;&Cc                \   d }t        j                  | t         j                        } ||d   dk(         g }|j                  dkD  rd|d   }t	        t        |            D ]  }||   ||z  k7  s n dz  }|}|j                  ||f       |dkD  sJ |dd|   }|j                  dkD  rd|S )a  Unflatten a list of dimension sizes and their strides that generates assignment.

  If this function succeeds for a given ``assignment``, then the following property
  should be satisfied::

    dims_with_strides = unflatten_superdims(assignment)
    base_array = np.arange(map(fst, sorted(dims_with_strides, key=snd, reverse=True)))
    assignment == base_array.transpose(argsort(dims_with_strides, key=snd, reverse=True)).flatten()

  That is, the returned dimensions list all sizes of the base array (with strides
  indicating their initial order). The order of dimensions in the list corresponds
  to the permutation that applied to the base array generates the assignment.
  c                    | ry t        d      )NzKFailed to convert OpSharding into a ShardingSpec. Please open a bug report!r   )conds    r5   checkz"unflatten_superdims.<locals>.checky  s    V
 : ; ;r@   r{  r   rD   N)r   r   r~  r   r   r   rI  )r  r  flat_assignmentr  r  r  r   s          r5   r  r  k  s    ; JJz:/a 	$q QF3'( 		q6z	)5
 1faDKKv!8O8%ff-O 	q  
+r@   c                |   t        | t        |             D ci c]  \  }}||
 }}}t        t        |            }g }|D ]r  \  }}||   }g }||kD  r9|dkD  sJ ||z  dk(  sJ |j	                  ||f       ||z  }||z  }||   }||kD  r9||k(  sJ |j	                  ||f       |t        |      z  }t |S c c}}w )a  Explode superdims to fit a known shape.

  The unflattening process might mistakenly generate too few too large dimensions.
  For example, ``unflatten_superdims(np.arange(n))`` always returns ``[(n, 1)]``.
  This function takes a list of such contiguous super-dimensions and splits them
  into smaller dimensions such that::

    set(map(fst, explode_superdims(sizes, dims))) == set(sizes)
  rD   r   )r  ry  listreversedrI  )r]  r  r   r  strides_to_sizes
final_dimstarget_sizenew_dimss           r5   r  r    s     8;5BSTYBZ7[\|tVfdl\\	htn	$* %ldF"6*KH

1__K1$$$oo{F+,
{df$V,k 
 ;OOT6N#(8$$J% 
! ]s   B8c                   t        | t        j                        rt        j                  j	                  |       } | j                         r4g }| j                         D ]  }|j                  t        ||              |S | j                         rt               gS | j                         r|j                  dk(  rt               gS | j                         r+|j                  }t        |j                  | j                               }t!        |      }| j#                         }g }|D ]o  }	g }
|	dkD  rLt%        |      }||   }|	|z  dk7  rt'        d|d|d|	d|d	      |	|z  }	|
j)                  |       |	dkD  rL|j)                  t+        |
             q t-        | j/                               dkD  rt1        d      | j3                         r|d d	 }|r#|d	   d
k(  r|j5                          |r	|d	   d
k(  rt        | gS t7        d      )NrD   r   zshape=z! is incompatible with mesh_shape=z: dim_size=z is not divisible by axis_size=.z5Unhandled HloSharding type. Please open a bug report!rw  ra   z4Unhandled OpSharding type. Please open a bug report!)r/   r  r  r  r  r2  extendparse_flatten_op_shardingr4  r(   
is_maximalr   r7  r   r  tile_assignment_devicesitertile_assignment_dimensionsnextrG   rI  r<   r   subgroup_typesr   replicate_on_last_tile_dimpopAssertionError)hlo_shardingr   rN   r   r   mesh_axis_order	mesh_axisr   
partitionsdim_sizedim_partitionsaxis	axis_sizes                r5   r  r    s    bmm,>>,,\:L  "!C((* 5	jj*1d345J!!#O TYY!^OJ%

L88:O _%I335EJ /nqLIt$	i1$:zm <;A?  	Yd# qL n-./ <&&()A-
A  ..0cr?j
B2-nn B2-:&''
O
PPr@   c                N    | j                   J | j                  | j                  fS r.   )r1   r8   r9   )r   s    r5   _slice_as_tupler    s#    	

''166	r@   c                      e Zd ZdZy)NonUniformShardingErrorz5Raised when sharding is not uniform across processes.N)r]   r^   r_   r   ra   r@   r5   r  r    s    =r@   r  c                   | j                   s| j                  ry| j                  | j                  f|z        }|j	                         D ci c]  \  }}|||    }}}t        j                  t              }t               }|j	                         D ]L  \  }	}|j                  |j                  f}
||	j                     j                  |
       |j                  |
       N t        t        | j                              j                  }t        ||         }t!        |      t#        fd|j%                         D              rt'        d| d|d      t)        |j%                         D ch c]  }t        |       c}      }t+        d |D              t!        |      k7  rt'        d| d|      |j-                  |      t!        |      fS c c}}w c c}w )a7	  Get current process index and number of unique processes for given dimension.

  This function facilitates mapping of process-level data to individual
  devices. Each process can use its index to obtain the data corresponding
  to that index. If process level data is sharded on multiple dimensions
  this function can be used to build the cross product of indices in
  each sharded axis. Processes that need to load the same data will have
  the same index. For shardings whose per-process data is not distributed
  on a grid, the number of distinct shards will be such that it is possible to
  build the target shape while maintaining a "cube" shape of local-process data.

  For example, in case of 4 hosts with sharding distributed like so:

  1234
  2143

  For dim 0 (rows): all processes need to access all rows, so we return (0, 1)
  For dim 1 (cols):
     process 1 and 2 returns index 0 out of 2 (need cols 0 and 1),
     process 3 and 4 returns index 1 out of 2 (need cols 2 and 3).

  On the other hand, for a sharding like:

  1212
  3434

  Dim 0 (rows): process 1 and 2 returns (0, 2), process 3 and 4 returns (1, 2)
  Dim 1 (cols): process 1 and 3 returns (0, 2), process 2 and 4 returns (1, 2)

  Note: This function requires sharding to be process uniform in dimension
  `dim`:
   each process has the same number of addressable indices in that
  dimension and all index sets across processes are either disjoint or the same.

  For sharding to be process uniform the addressable shards doesn't need to
  form contiguous subtensor, or even a sparse grid  and  in case of
  interleaved high-dimensional tensor it is possible for sharding to be
  process uniform only in some dimensions but not others.

  For example:
    1111 and 12 and 1212 and 1212
    2222     21     2121     1212

  are all sharding uniform, in both dimensions. However

    1122
    2121
    1121
    1222

  is uniform in dimension 0 (both hosts access all rows), but
  is not uniform in dimension 1 (host 1 accesses columns: 0, 1, and 3),
  while host 2 accesses (0, 1, 2, 3).

  Returns:
    A tuple of (index, num_distinct_shards) for the given dimension.
    It is guaranteed that `index` will cover 0 to `num_distinct_shards - 1`,
    across all processes.

  Raises:
    NonUniformShardingError: if the sharding is not process uniform in dimension
    `dim`.
  )r   rD   c              3  :   K   | ]  }t        |      k7    y wr.   r   )r3   r>   slices_per_processs     r5   r6   z.get_process_index_and_count.<locals>.<genexpr>F  s     I!Q%	%Is   ztensor_sharding=z is non-uniform on dim=z3 as some processes have different number of slices.c              3  2   K   | ]  }t        |        y wr.   r  )r3   hs     r5   r6   z.get_process_index_and_count.<locals>.<genexpr>P  s     *AQ*s   )r   r   rE   r   rJ   rH   defaultdictr   r8   r9   r   addr  r  addressable_devicesrm  r   anyrj  r  r  sumrP   )tensor_shardingdimndims
device_mapkr4   global_sliceprocess_to_slice
all_slicesr   keycurrent_pidaddressable_slicesr>   unique_processesr  s                  @r5   get_process_index_and_countr    s   H **))
 22""$u,.* )3(8(8(:;1!QsV);,; !,,S1u*   " da77AFF
CQ__%))#.NN3 T/==>?MM+ !1+!>? -.I/?/F/F/HII
!?
4v 6& 	&  1A1H1H1JKA9Q<KL
 	*)**c*o=
!?
4v6  
 
 !3
4c:J6K	LLE <2 Ls   G.Gc                    dgt        |      z  }t        |      D ](  \  }}	 t        | |t        |            \  }}||z  ||<   * t	        |      S # t        $ r d||<   Y Fw xY w)aV  Computes the global shape given the per process if possible.

  The returned shape will have the size of the global tensor in that dimension
  or None, if it is not computable. The latter can happen when sharding
  is not uniform along that dimension, e.g. different hosts require
  different shapes, or if different processes have partial data overlap.

  If at most one dimension is sharded the shape is always computable.
  Generally, global shape is computable for most practical meshes (including
  topology aware such as meshes returned by mesh_utils.create_device_mesh)

  Some examples: Suppose mesh is {'a': 2, 'b': 2, 'c': 2} with 2 devices
  per host, 4 hosts total. For different specs we get:
  - P():
      global_shape = local_shape

  - P(('a', 'b', 'c'), None):
      global_shape =  (4 * local_shape[0], local_shape[1])
      Note: per device shape is (local_shape[0] / 2, local_shape[1])

  - P(('a', 'b'), None)
      global_shape =  (4 * local_shape[0], local_shape[1])
      # NB: the same global shape as above, since sharding along 'c' dimension
      # happens to be within process, and thus doesn't affect the global shape.
      # The underlying difference will be in the per *device* shape, which
      # would be  (local_shape[0], local_shape[1]) in this case.

  - P(None, ('a', 'c'))
      global_shape = (local_shape[0], 2 * local_shape[1])
      # Per device shape is (local_shape[0], local_shape[1] / 2)
  - P(('a', 'c'), 'b'):
      global_shape = (2 * local_shape[0], 2 * local_shape[1])
      # Per device shape is (local_shape[0] / 2, local_shape[1])
  - If devices in the Mesh are randomly permuted: For any partition spec
  which shards more than 1 axis:  e.g. P('a', ('b', 'c')):
      global_shape = (None, None)

  Args:
    local_shape: global shape of the tensor.

  Returns:
    global_shape with Nones in non-uniform dimensions.
  N)r  )r   r  r  r  r<   )r   local_shaperK   r  	local_dimr   shard_counts          r5   local_to_global_shaper  W  s    \ &*FS-=$=,, la2
AS-/na!K/l1o	 
|		 # l1os   "AA"!A"c           	        | j                  |      }t        t        t        j                  t
        f   |      }t        |j                         D ch c]  }t        ||          c}      }| j                  |      |   }||z  S c c}w )a  Returns the number of indices for given dimension this host has access to.

  Each host can have multiple number of devices that are spanning
  possibly discontiguous slices of data. This function computes the
  total number of unique indices for dimension `dim` that any of its
  addressable devices hold.

  In most cases the addressable indices form a sparse grid (and in some
  cases a subcube), and thus each host will hold the same of number of
  indices for each dimension.  However, it is possible to design a mesh that
  addressable shards form a complicated pattern. In that case, the returned
  value is the number of indices that are addressable by at least one device.

  For example, suppose the sharding looks like this: (number indicates
  the host index)

    1221
    1221
    0000

  Then on host 1 and 2, both dim 0 (rows), and  dim=1 (cols) will have size 2,
  while on host 0, dim 0  will have size 1, and dim 1 will have size 4.

  Args:
    tensor_sharding: Sharding of the tensor.
    dim: dimension along which to compute the number of addressable indices.
    global_shape: global shape of the tensor.

  Returns:
    The number of indices for dimension  `dim` that this host holds.
  )
addressable_devices_indices_mapr   r   rJ  ri   Indexr   rj  r  r   )r  r  rK   addressablesaddressablenum_unique_slices
shard_sizes          r5   num_addressable_indicesr    s    D !@@N,gi..56E,;G;N;N;P,7ok#&'  **<8=*	'	''	s   Bc                $   t        j                  | j                        }|j                         j	                         }t        |      \  }}|dk(  rg n|g}|dg|j                  z  z   |z   }||_        t        j                  j                  |      S NrD   )r
   physical_element_avalr|  r  cloner&   r   r  r  r  r  )avalr  elt_avalnew_op_shardingr  num_replicassuffixtads           r5   physical_hlo_shardingr    s    ''

3( ))+113/5lC*l"2&aS8==((61#/2/,		"	"?	33r@   c                F    | j                   dk(  xr t        | t               S r  )r   r/   r   r   s    r5   is_single_device_shardingr    s$     
			"	M:h+M'MMr@   c                   t        |      r|S t        |t              rt        j                  | j
                        }t        j                         g|j                  z  }t        j                  g |j                  j                  ||j                  j                        }t        |j                  |      S t        |t              rVt        j                  | j
                        }d g|j                  z  }|j                  t!        g |j"                  |       S |j%                  | j                        }t'        |j(                  t+        | |            S N)r   mesh_mapping)r   r   )rN  )r  r/   r   r
   r  r|  r   
NoShardingr   ShardingSpecr   r   r  r   r   updater(   rN  r   r  r   r  )r  r   r  trailing_shardingphys_sharding_spectrailing_spechloss          r5   make_key_array_phys_shardingr    s"   x(O(L)))$**5H'2245E'44G8))22G5FG++88:  0 0&8: :(M*))$**5HFX]]*M?? Mx}} M} M?NN((3D&&(=dD(IK Kr@   c                    t        | |      S r.   )r  )r  r   s     r5   physical_shardingr    s    	%dH	55r@   c                   t        j                  |      }|j                  t        |       |j                  z         }t        |      \  }}|dk(  rg n|g}|j                         j                         }|d |j                    |z   }	|	|_        t        |j                  t        j                  j                  |            S r  )r
   r  r   r   r   r&   r  r  r  r  r   r  r  r  )
logical_shaper|  phys_shardingr  phys_hlo_shardingr  r  r  logical_op_shardingr  s
             r5   get_logical_gspmd_shardingr    s    ''.(#88	-8==(*56GH*l"2&)224::<?X]]N#f,#360	}::~~001DE
G Gr@   c                r   t        | t              ry t        | t              r| j                  j                  ry t        j                  ||      }| j                  t        |            }t        |      \  }}t        |      t        |      z
  }t        d || d  D              st        d|  d| d|       y )Nc              3  &   K   | ]	  }|d k(    yw)rD   Nra   r0  s     r5   r6   z1check_replicated_trailing_dims.<locals>.<genexpr>  s     =Q!V=r1  zIThe trailing dims of extended dtypes should be replicated. Got sharding: z, partitions: z, num_trailing_dims: )r/   r   r   r   _any_axis_manualr
   physical_shaper   r   r&   r:   r  )r   r  r|  
phys_shapehlo_sr  r   num_trailing_dimss           r5   check_replicated_trailing_dimsr    s    ,'
-(X]]-K-K
""=%8*

'
'J
8%*51-*a*oM(::	=Z):(:(;<=	=
	Z~j\ :/0	23 3 
>r@   c                   t        || |       t        |      r|S t        |t              ryt	        j
                  |      }t        j                  |j                  j                  d |j                    |j                  j                        }t        |j                  |      S t        |t              rt	        j
                  |      }t	        j                  | |      }t        |j                         t        |      k  r5g |j                   d gt        |      t        |j                         z
  z  }n|j                   }|j#                  |d |j                          S t%        | ||      S r  )r  r  r/   r   r
   r  r   r  r   r   r   r  r   r   r  r   rN  r  r  )r  r|  r  r  logical_sharding_specr  	phys_specs          r5   logical_shardingr    sH    !uE}--.))%0H*77,,55o~F"00==?  5 5&;= =-/))%0H$$]E:J
=Z0JM&& JFc*oM4F4F0GGHJi  $$iY%?@@%mUMJJr@   c                    t        | ||      S rc   )r   )r   pspecre   s      r5   cached_named_shardingr    s     
tU	<<r@   c                b    t        | j                  |      d   }t        ||| j                        S )Nr   )r  r  r  re   )out_sr   rN  s      r5   r:  r:  #  s0     
#5#6#6	=a	@$	tT5+<+<	==r@   c                   | y t        | t              r| j                  j                  ry t        | t        t        f      st        d| d|  dt        |              t        j                         }t        | t              r'|j                  rt        d|        t        ||       } n| j                  j                  j                  r|j                  rd}|ru|j                  si| j                  j                  |k7  rPt        d| d| j                  j                   d| d	t        j                  t        j                                      t        | j                  t        j                        r*t        | j                  j                  | j                         } t#        | j                         D ]  }|| j                  j$                  |   t        j&                  j(                  t        j&                  j*                  hv sTt        d
| d| j                    d| d| j                  j$                  |    d	t        j                  t        j                                
       | S )Nz`out_sharding` argument of zD only supports instances of `NamedSharding` or `PartitionSpec`. Got z
 of type: zUsing PartitionSpec when you are not under a mesh context is not allowed. Please pass a NamedSharding instance or enter into a mesh context via `jax.set_mesh`. Got FzContext mesh z# should match the mesh of sharding z passed to z . This error occurs at source:  zPartitionSpec passed to zO cannot contain axis names that are of type Auto or Manual. Got PartitionSpec: z with axis name: )r/   r   r   rL  r(   r3  r   r5  get_abstract_meshrG   rr  are_all_axes_autor   	summarizecurrentMeshrN  r   _name_to_typeAxisTypeAutoManual)r   api_namecheck_mesh_consistencycur_meshr   s        r5   canonicalize_shardingr  *  sD    -(X]]-@-@	H}m<	=

%hZ 044<: >N	 
 '')(-(~~..6Z9: : Xx0H 	##55""$x~~##x/( $mm))*+hZ @(()9)A)A)CDEGH H (--/x}}::HMMJh& 
Hay}}""1% 1 1 8 8*: :$XJ /mm_-aS 1mm))!,- .(()9)A)A)CDE	GH H
H 
/r@   )r   c                  |t        j                         }t        j                  |       }|t	        d|        ~ t        j                  |      }|t        |      kD  rt	        dt        |       d|       |t        |      k  r|d| }|d   j                  t        j                  t        j                  fv rd}nd}t        j                  |||      }|j                  d   }|j                  d	k(  rGt        |d
      r;t        |j                  D 	ch c]  }	|	j                   c}	      dkD  rt	        d      |ct!        j"                  d      r2t$        j&                  j(                  ft        |j*                        z  }nt-        j.                  dt0        d       t%        j2                  |||      S c c}	w )a  Creates an efficient mesh with the shape and axis names specified.

  This function attempts to automatically compute a good mapping from a set of
  logical axes to a physical mesh. For example, on a TPU v3 with 8 devices:

  >>> mesh = jax.make_mesh((8,), ('x'))  # doctest: +SKIP
  >>> [d.id for d in mesh.devices.flat]  # doctest: +SKIP
  [0, 1, 2, 3, 6, 7, 4, 5]

  The above ordering takes into account the physical topology of TPU v3.
  It orders the devices into a ring, which yields efficient all-reduces on a
  TPU v3.

  Now, let's see another example with 16 devices of TPU v3:

  >>> mesh = jax.make_mesh((2, 8), ('x', 'y'))  # doctest: +SKIP
  >>> [d.id for d in mesh.devices.flat]  # doctest: +SKIP
  [0, 1, 2, 3, 6, 7, 4, 5, 8, 9, 10, 11, 14, 15, 12, 13]
  >>> mesh = jax.make_mesh((4, 4), ('x', 'y'))  # doctest: +SKIP
  >>> [d.id for d in mesh.devices.flat]  # doctest: +SKIP
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

  As you can see, logical axes (`axis_shapes`) affect the ordering of the
  devices.

  You can use `jax.experimental.mesh_utils.create_device_mesh` if you want to
  use the extra arguments it provides like `contiguous_submeshes` and
  `allow_split_physical_axes`.

  Args:
    axis_shapes: Shape of the mesh. For example, axis_shape=(4, 2)
    axis_names: Names of the mesh axes. For example, axis_names=('x', 'y')
    axis_types: Optional tuple of :class:`jax.sharding.AxisType` entries
      corresponding to the ``axis_names``. See `Explicit Sharding`_ for more
      information.
    devices: Optional keyword only argument, that allows you to specify the
      devices you want to create a mesh with.

  Returns:
    A :class:`jax.sharding.Mesh` object.

  .. _Explicit Sharding:  https://docs.jax.dev/en/latest/notebooks/explicit-sharding.html
  NzF`axis_shapes` passed to `make_mesh` should be a sequence of ints. Got zNumber of devices z& must be >= the product of mesh_shape r   TF)allow_split_physical_axestpuslice_indexrD   zy`jax.make_mesh` does not support multi-slice topologies. Please use jax.experimental.mesh_utils.create_hybrid_device_meshzjax-make-mesh-default-explicita  The default axis_types will change in JAX v0.9.0 to jax.sharding.AxisType.Explicit. To maintain the old behavior, pass `axis_types=(jax.sharding.AxisType.Auto,) * len(axis_names)`. To opt-into the new behavior, pass `axis_types=(jax.sharding.AxisType.Explicit,) * len(axis_names)   )category
stacklevel)
axis_types)r   r   r   _canonicalize_axis_sizesrG   mathprodr   device_kind_TPU_V5_LITE_TPU_V5Ecreate_device_meshr   r   r|   r  r   is_acceleratedr5  r  Explicitr   warningswarnDeprecationWarningr
  )
axis_shapesr=  r  r   new_axis_shapesr  r  mesh_devicesfirst_dr   s
             r5   	make_meshr+  b  s   \ _jjlG77D/
	}	  ii()W

S\N +()	+, , 3w<jy!GQZ
 7 79L9LMM $ %..w 9;, a '%GG]$C	,"3"3
4Q1==
459
	AB B ""#CD%%..03|7I7I3JJjmmM
 & 
|ZJ	GG# 5s   F;c                  ,    e Zd ZdZddgZddZd Zd Zy)	set_mesha  Sets a concrete mesh in a thread-local context.

  ``jax.set_mesh`` has dual behavior. You can use it as a global setter or as a
  context manager.

  When a mesh is in context via ``jax.set_mesh``, you can use pass
  raw PartitionSpecs to all APIs that accept sharding as an argument.
  Using ``jax.set_mesh`` is also required for enabling explicit sharding mode:
  https://docs.jax.dev/en/latest/notebooks/explicit-sharding.html

  For example::

    mesh = jax.make_mesh((2,), ('x',))
    jax.set_mesh(mesh)  # use the API as a global setter

    with jax.set_mesh(mesh):  # use the API as a context manager
      ...

  Note: ``jax.set_mesh`` can only be used outside of ``jax.jit``.
  prev_abstract_mesh	prev_meshc                ~   t        |t        j                        st        dt	        |             t        j                         st        d      |j                  rt        d| d      t        j                  j                  |j                        | _        t        j                  j                  |      | _        y )Nz/Expected mesh of type `jax.sharding.Mesh`. Got z1`set_mesh` can only be used outside of `jax.jit`.zmesh z contains manual axes which is not allowed when using `jax.set_mesh`. Please use `jax.shard_map` to enter into `Manual` mode instead.)r/   r5  r
  rG   r   r
   trace_state_cleanr  r	   abstract_mesh_context_manager
swap_localrr  r.  device_contextr/  )r[   r   s     r5   rn   zset_mesh.__init__  s    dHMM*;DJ<
HJ J!!#JKK$   
 %BBMMD**55d;DNr@   c                     y r.   ra   rp   s    r5   	__enter__zset_mesh.__enter__  s    r@   c                    t         j                  j                  | j                         t         j                  j                  | j
                         y r.   )r	   r2  	set_localr.  r4  r/  )r[   exc_type	exc_value	tracebacks       r5   __exit__zset_mesh.__exit__  s4    
((2243J3JK
##DNN3r@   Nr   rb  )r]   r^   r_   r   	__slots__rn   r6  r<  ra   r@   r5   r-  r-    s"    ( $[1)< 	4r@   r-  c                 h    t        j                         st        d      t        j                         S )Nzd`get_mesh` can only be used outside of `jax.jit`. Maybe you want `jax.sharding.get_abstract_mesh()`?)r
   r1  rG   r5  get_concrete_meshra   r@   r5   get_meshrA    s1    				!
	/0 0 
	#	#	%%r@   c              #    K   t        | t        j                        sJ t        j                  j                  |       }	 d  t        j                  j                  |       y # t        j                  j                  |       w xY wwr.   )r/   r5  r
  r	   r4  r3  r8  )r   prev_vals     r5   _internal_use_concrete_meshrD    s`     	D(--	((	(""--d3(.	
##H-F##H-s   <BA#  B#!BBr   )rK   r   r\   zMapping[Device, int]r   )F)r  rC  r   %mesh_lib.Mesh | mesh_lib.AbstractMeshr\   zSequence[PartitionSpec])r   r0   )r  jsharding.Shardingr  r   r  r   r\   ztuple[int, int])r   rF  r  r   r\   ztuple[int | None, ...])r  rF  r  r   rK   r   r\   r   )r  r   r\   r   )r   rF  r\   r   )r   rF  r\   rF  )r   rF  )r\   rF  r.   )r   rE  r  r(   re   rk   r\   r   )r  r  r   rE  r\   r   )T)r   z$NamedSharding | PartitionSpec | Noner  r   r  r   r\   zNamedSharding | None)
r'  zSequence[int]r=  zSequence[str]r  z$tuple[mesh_lib.AxisType, ...] | Noner   r  r\   rb  )r\   rb  r=  )x
__future__r   rH   
contextlibcollections.abcr   r   dataclassesr  r  r$  typingr   r   r   jax._srcr	   r
   r   r5  r   rJ  r   r   r   r   r   r   r   r   jax._src.libr   r  jax._src.lib.mlir.dialectsr   jax._src.named_shardingr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   jax._src.op_shardingsr%   r&   r'   jax._src.partition_specr(   jax._src.utilr)   r*   r+   numpyr   _xla
config_extr<   r   r   ri   r0   r  r   rK  XLACompatibleShardingr?   cacherS   	dataclassrU   r  	replicater   rg   rf   r^   r   r   r  r  MeshAxisNamerX  rZ  ra  ro  rq  ry  r  r  r  r  r  rG   r  r  r  r  r  r  r  r  r  r  r  r  r:  r  r+  r-  rA  contextmanagerrD  ra   r@   r5   <module>r\     s   #   -     ( (   % * #   % %  ! ) *( ( ( (    2 A A WW^^
c3h	eSjFCK( !** R T6 7& ; ; ; ..224 ? r&&'O9-- O (Ob #1  T644"84 74 r]9%% ]  ]~ ) F r l(I&& l( !l(^  5:"8Nj  d#? ? $?6 d#  $ d#0 0 $04<(4!F8-Q0-Q
/-Q4K-Q`
>j > T6pM'pM.1pM:=pMBQpM 7pMf8 8/489O8v(('((.1((AF((KN((V4N
K*6G3 K6  #=
/=8E=='4= =>> E>> HL4$'4AE434 4p BFXH7;XH>XH4XH@MXHt,4 ,4^& . .r@   