
    uki	e                       d Z ddlmZ ddlmZmZ ddlZddlZddlm	Z	 ddl
Z
ddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  ddlm!Z! ddl"m#Z# ddlm$Z$ ddl%m&Z& ddlZ'd2dZ(d3d4dZ)d Z*d Z+d5d6dZ,d7dZ-d8d9dZ.d:dZ/ed        Z0ed         Z1ed!        Z2	 	 	 	 	 	 d;d"Z3	 	 	 	 	 	 d<d#Z4 ejj                  d$      Z6e6jo                  e3       d% Z8e6js                  e8        ejt                  e6d&        d' Z; ee;d      ejx                  e6<   d( Z= ej|                  e6e=       	 	 	 	 	 	 d;d)Z?	 	 	 	 	 	 d=d*Z@ ejj                  d+      ZAeAjo                  e?       d, ZBeAjs                  eB        ejt                  eAd-         ej                  eA       d. ZD ej|                  eAeD       d>d/ZE G d0 d1      ZF eF       ZGy)?zDUtilities for synchronizing and communication across multiple hosts.    )annotations)partial	lru_cacheN)Any)tree_flattentree_unflatten)core)dtypes)ad)batching)mlir)array)sharding_impls)pxla)pjit)prng)PartitionSpec)distributed)safe_zip)
xla_bridge)
xla_clientc                D    t         j                  j                  d |       S )Nc                F    t        j                  | | j                  d      S )Nr   )dtypeaxis)jnpsumr   xs    [/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/multihost_utils.py<lambda>z_psum.<locals>.<lambda>-   s    q A     jaxtreemap)xss    r    _psumr(   ,   s    	A2	FFr"   c                
   t        j                         dk(  r.t         j                  j                  t        j
                  |       S t        j                         dk(  t	        j                  t        j                               j                  t        j                         t        j                               }t         j                  j                  |d      t        d      fd}d }t         j                  j                  ||       } t        j                        5   t        j                  t         t                     |       }ddd       t         j                  j                  |      S # 1 sw Y   )xY w)	a  Broadcast data from a source host (host 0 by default) to all other hosts.

  Args:
    in_tree: pytree of arrays - each array *must* have the same shape across the
      hosts.
    is_source: optional bool denoting whether the caller is the source. Only
      'source host' will contribute the data for the broadcast. If None, then
      host 0 is used.

  Returns:
    A pytree matching in_tree where the leaves now all contain the data from the
    first host.
     Nr   	processeslocal_devicesr,   c                    r| }nt        j                  |       }t        j                  |d      }t        |      S Nr   r   )np
zeros_likeexpand_dims host_local_array_to_global_array)r   inpglobal_mesh	is_sourcepspecs     r    pre_jitz%broadcast_one_to_all.<locals>.pre_jitI   s8    cMM!c
..1
%C+CeDDr"   c                J    t        j                  | j                  d            S )Nr   )r$   
device_getaddressable_datar   s    r    post_jitz&broadcast_one_to_all.<locals>.post_jitQ   s    >>!,,Q/00r"   out_shardings)r$   process_countr%   r&   r1   asarrayprocess_indexr   devicesreshapelocal_device_countshardingMeshPset_meshjitr(   )in_treer7   rC   r9   r=   out_treer6   r8   s    `    @@r    broadcast_one_to_allrM   0   s    	A88<<

G,,!!#q(I	kkmWS..0#2H2H2JK 
!!'+IJ+
K.%E1 HHLL'*'
||K  :0swwuAC09H: 
h	)): :s   '*E99Fc                    | S N r   s    r    _identity_fnrQ   ]   s    	
(r"   c                   t        | t        j                        r| j                  s|st	        d      t        | j
                  t        j                        r%| j
                  j                  t                     }nIt        j                  j                  | j
                  j                  | j
                  j                        } t        j                  t         |      |       }n1t        j"                         dk(  r0t%        j&                  |       }|st%        j(                  |d      S |S t%        j                  t        j*                               j-                  t        j"                         t        j.                               }t        j
                  j1                  |d      }t        d	      }t        j
                  j                  ||      }t%        j&                  |       }|j2                  dk(  s|st%        j(                  |d      }t5        j6                  |j8                  |j:                        }	t        j<                  |d
      }t?        j@                  |t        jB                  |      |	      }
t        jD                         D cg c]  }t        jF                  ||       }}t        jH                  |
j8                  ||      }t        jJ                  |      5   t        j                  t         t                     |      }d d d        t%        j&                  jM                  d            S c c}w # 1 sw Y   2xY w)NzFGathering global non-fully-addressable arrays only supports tiled=True)spec)memory_kindr>   r*   r   r0   r+   r,   pspec to array_mapping)'
isinstancer   	ArrayImplis_fully_addressable
ValueErrorrF   r   NamedShardingupdaterH   GSPMDShardingget_replicated_device_assignmentrT   r$   rJ   rQ   r@   r1   rA   r3   rC   rD   rE   rG   ndimr	   ShapedArrayshaper   prepare_axis_resourcesr   mesh_local_to_globalget_array_mappingr-   
device_put$make_array_from_single_device_arraysrI   r<   )r5   tiledrepsoutrC   r6   r8   shost_np_arravalglobal_avaldbufs
global_arrs                 r    _handle_array_process_allgatherrq   a   sr   U__%c.F.F  #,, < <=\\  ac *d))88
,,
)
)s||7O7O 9 Qd
3#'',d
3C
8C aJJsOc05R^^Ca(>3>hhs{{}%--c.?.?.A.1.D.D.FHG,,##G-KLKkNE"";6A**S/K1ENN;Q7kK--{/@/@AD11%9QRE++^55e<dDK 584E4E4GHqCNN;*HDH;;1d$J	k	" A4CGGL4Z@cA	C((+	,, IA As   L>(*MMFc                N    fd}t         j                  j                  ||       S )a  Gather data from across processes.

  Args:
    in_tree: pytree of arrays - each array _must_ have the same shape across the
      hosts.
    tiled: Whether to stack or concat the output. Defaults to False i.e. stack
      into a new positional axis at index 0.

  Returns:
    Pytrees of numpy arrays.
      * If the input is a non-fully addressable jax.Array, then the data is
        fully replicated.
      * If the input is numpy array or fully addressable jax.Array, then the
        output shape is dependent on the `tiled` argument.
        If its False, then the output will be stacked else concatenated.
      * If the input is a scalar, then the output will be stacked.
  c                    t        |       S rO   )rq   )r5   rg   s    r    _pjitz process_allgather.<locals>._pjit   s    *366r"   r#   )rK   rg   rt   s    ` r    process_allgatherru      s    &7	eW	%%r"   c                    t        j                  t        j                  | j	                                     }t        |d|  d       y)z+Creates a barrier across all hosts/devices.z$sync_global_devices name mismatch ('z')N)r1   uint32zlibcrc32encodeassert_equal)namehs     r    sync_global_devicesr~      s3    ii

4;;=)*!q8bABr"   c                
   d }t        | d      }t        j                  j                  ||       }t        j                  j	                  t        j                  j                  d ||            st        | d| d|  d      y)	z9Verifies that all the hosts have the same tree of values.c                b   t        | t        j                        r0| j                  s$t	        j
                  | j                  d            S t	        j
                  |       } | j                  dk(  rt	        j                  | d      } t	        j                  | gt        j                         z        S r/   )rV   r   rW   rX   r1   rA   r<   r_   r3   concatr$   r@   r   s    r    concat_in_treez$assert_equal.<locals>.concat_in_tree   sy    !U__%a.D.DZZ**1-..
**Q-a	
1NN11%YYsS..0011r"   T)rg   c                 L    t        j                  t        j                  |        S rO   )r1   allequalr   s    r    r!   zassert_equal.<locals>.<lambda>   s    bffRXXq\2 r"   z. Expected: z; got: .N)ru   r$   r%   r&   r   AssertionError)rK   fail_messager   ri   expected_in_trees        r    r{   r{      sz    2 	'.#XX\\.':		hhll24DcJ
L
.SE	;= =
Lr"   c                    t         j                  j                  yt         j                  j                  }|t	        d      |j                  |       S )a  Determine whether all hosts have reached a preemption sync step.

  When any host receives a preemption notice, the notice is propagated to all
  hosts and triggers a synchronization protocol in the background. The
  synchronization protocol calculates the maximum step ids from all hosts, and
  uses the next step id (i.e., max + 1) as the safe step to save a checkpoint.
  All hosts should continue training more steps until this method returns True,
  indicating that the `step_id` is equal to the safe step and the hosts should
  start saving a checkpoint.

  To use this API, all hosts must start training from the same step and call it
  at every training step. Example usage:

  ```
  def should_save(step_id: int) -> bool:

    # Should save an on-demand checkpoint for preemption
    if multihost_utils.reached_preemption_sync_point(step_id):
      return True

    # Should save a regular checkpoint
    return step_id - last_saved_checkpoint_step >= save_interval_steps
  ```

  Preemption notice is provided by the cluster scheduler to notify the
  application in advance before it gets evicted. By default, we use SIGTERM as
  the signal for preemption notice.

  TODO(b/230630494): Add instructions for customized preemption notice.

  Returns:
    A boolean indicating whether all hosts have reached a synchronization step
    after some hosts are preempted.

  Raises:
    RuntimeError: if preemption sync manager has not been initialized.
  FzrPreemption sync manager has not been initialized. Make sure the 'jax_enable_preemption_service' config is enabled.)r   global_stateclientpreemption_sync_managerRuntimeErrorreached_sync_point)step_idsync_managers     r    reached_preemption_sync_pointr      sV    L $$,))AA,
	>  
	(	(	11r"   c                >    t        j                  | | |       d      S )NT)tupled_args)pjit_libflatten_axis_resources)r|   rK   pspecs_thunks      r    _flatten_pspecsr      s     		(	(
G\^
7 7r"   c                    t        j                  |d      }t        j                  |t        j                  |      |       S NrU   )r   rb   r   rc   rd   )
local_avalmeshr8   s      r    _local_to_global_avalr      s<    

/
/7O
P%		"	"
N,,U3Z
A Ar"   c                    t        j                  |d      }t        j                  |t        j                  |      |       S r   )r   rb   r   mesh_global_to_localrd   )rm   r   r8   s      r    _global_to_local_avalr      s<    

/
/7O
P%		"	"
N,,U3[
B Br"   c                  |t        d      t        | t        j                        r| j                  s| S t        | t        j                        r.t        | j
                  t        j
                  j                        st        | d      st        j                  |       } | j                  t        j                  k(  r8t        j                  | j                  t        j                  t                    } | j                  }t        | t         j"                        x}r| j$                  } t        j
                  j'                  |j(                  |      }t        | t        j                        rJ| j
                  j+                  || j,                        r$| j.                  D cg c]  }|j0                   }}nNt        j2                  |       } |j5                  | j                        j7                         D cg c]  }| |   	 }}t9        t;        j<                  | j                  | j                        ||      }	t?        j@                  |	t        j
                  j'                  ||      |tC        |j(                  jD                  jF                              }
|r t!        j"                  |jH                  |
      S |
S c c}w c c}w )N`None` is not a valid input to the pspecs argument. Please use jax.sharding.PartitionSpec() if you wanted to replicate your input.ra   r   )%rY   rV   r   rW   rX   rF   r$   PmapShardinghasattrr1   r   r
   float0zerosra   boolr   PRNGKeyArray_base_arrayrZ   
local_meshis_equivalent_tor_   addressable_shardsdatacanonicalize_valuedevices_indices_mapvaluesr   r	   r`   r   batched_device_putlistrC   flat_impl)arrr6   r8   r   is_prng_key_arraylocal_shardingr   arraysirm   ri   s              r    %host_local_array_to_global_array_implr      s   
]
	NO O U__%c.F.FJeoo&:	llCLL--,/7>sG7L
((3-CYY&--
((399BHHTN
3C
))%$S$*;*;<<<
//C<<--k.D.DeL.
 eoo&	ll##NCHH=!445aff5F5

#
#C
(C&::399ELLNAF  &
syy#)),k5B+ 	3<<--k5Ad;))11667	9# U[[#..	* 6s    J:6J?c           	         t        |       \  }}t        d|t        j                  |            }t	        ||      D cg c]  \  }}t
        j                  |||       }}}t        ||      S c c}}w )a  Converts a host local value to a globally sharded jax.Array.

  This function takes host-local data (which might be different
  across hosts), and populates a global array with this data, where each
  device on each host, get the appropriate slice of the data according to
  sharding defined by the global_mesh/pspects.

  For example:

  >>> global_mesh = jax.sharding.Mesh(jax.devices(), 'x')
  >>> pspecs = jax.sharding.PartitionSpec('x')
  >>> host_id = jax.process_index()
  >>> arr = host_local_array_to_global_array(np.arange(4) * host_id, mesh, pspecs)  # NB: assumes jax.local_device_count() divides 4.   # doctest: +SKIP

  The resulting array will have the shape (4 * num_processes) and will
  have distributed value of: (0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9, ... ),
  where each slice np.arange(4) * host_id will be partitioned across the
  corresponding host's devices.

  Similarly:

  >>> mesh = jax.sharding.Mesh(np.array(jax.devices()).reshape(jax.process_count(), jax.local_device_count()), ['host', 'dev'])
  >>> pspecs = jax.sharding.PartitionSpec('host')
  >>> host_id = jax.process_index()
  >>> arr = host_local_array_to_global_array(np.arange(4) * host_id, mesh, pspecs)  # doctest: +SKIP

  will create the same distributed value (0, 1, 2, 3, 0, 2, 4, 6, ...),
  however each slice np.arange(4) * i will be *replicated* across corresponding
  host devices.

  On the other hand, if pspecs = PartitionSpec(), which means
  replication across all axes, then this snippet:

  >>> pspecs = jax.sharding.PartitionSpec()
  >>> arr = host_local_array_to_global_array(np.arange(4), mesh, pspecs)  # doctest: +SKIP

  will have the shape (4,) and the value (0, 1, 2, 3) will be replicated
  across all hosts and devices.

  It is an undefined behavior to have not identical local_inputs with pspec
  indicating data replication.

  You can use this function to transition to jax.Array. Using jax.Array with
  pjit has the same semantics of using GDA with pjit i.e. all jax.Array
  inputs to pjit should be globally shaped.

  If you are currently passing host local values to pjit, you can use this
  function to convert your host local values to global Arrays and then pass that
  to pjit.


  Example usage.

  >>> from jax.experimental import multihost_utils # doctest: +SKIP
  >>>
  >>> global_inputs = multihost_utils.host_local_array_to_global_array(host_local_inputs, global_mesh, in_pspecs) # doctest: +SKIP
  >>>
  >>> with mesh: # doctest: +SKIP
  >>>   global_out = pjitted_fun(global_inputs) # doctest: +SKIP
  >>>
  >>> host_local_output = multihost_utils.global_array_to_host_local_array(global_out, mesh, out_pspecs) # doctest: +SKIP

  Please note this function requires global mesh to be a continuous mesh, meaning
  that  devices that belong to each host should form a subcube in this mesh.
  To move local data to global array with non-continuous mesh use
  jax.make_array_from_callback or jax.make_array_from_single_device_arrays
  instead.

  Args:
    local_inputs: A Pytree of host local values.
    global_mesh: A jax.sharding.Mesh object. The mesh must be a contiguous mesh,
    that is all hosts' devices must form a subcube in this mesh.
    pspecs: A Pytree of jax.sharding.PartitionSpec's.

  Returns:
    A pytree of global arrays.
  zinput pspecsr6   r8   )r   r   r   hashable_pytreer   "host_local_array_to_global_array_pbindr   )	local_inputsr6   pspecs	flat_inpsrK   	in_pspecsr5   in_specout_flats	            r    r4   r4   )  s    ^ $L1)Wng&66v>@)
 #9i8 #w )--c{4; . =( 
 
	**   "A/r4   c               l    t        t        j                  | j                  | j                        ||      S rO   )r   r	   r`   ra   r   r   r6   r8   s      r    ltg_abstract_evalr     -    	
syy#)),k5
B Br"   c                0    t        j                  | fi |fS rO   )r   r   ct_paramss      r    r!   r!         499"GG'J r"   c                    ~ |\  }|\  }|j                   d n|j                   }t        |      }	||	j                  ||       t        |	 }	t        j                  |||	      }
|
|fS )Nr   )	spmd_namer   insertrH   r   r   )insert_axis	axis_datavals_indims_inr6   r8   r   rn   	new_parts	new_pspecys              r    ltg_batcherr     sw    "!"!))1dy7J7J)5k)]Q	"m)(--[	 . 3!	
A+r"   c                   |gS rO   rP   ctxr   r6   r8   s       r    _ltg_loweringr     	    
*r"   c               :   |t        d      t        | t        j                        r| j                  r| S t        | d      st        j                  |       } | j                  t        j                  k(  r8t        j                  | j                  t        j                  t                    } | j                  }t        | t        j                        x}r| j                  } t         j"                  j%                  ||      }t         j"                  j%                  |j&                  |      }t)        t+        j,                  | j                  | j                        ||      }t        | t        j                        r| j"                  j/                  || j0                        r| j2                  }n"t!        j4                  | |      }	|	j2                  }t        j                  |||d      }
|r t        j                  |j6                  |
      S |
S t        j8                  |       } |j;                  | j                        j=                         D cg c]  }| |   	 }}t?        j@                  |||tC        |j&                  jD                  jF                              S c c}w )Nr   ra   r   T)	committed)$rY   rV   r   rW   rX   r   r1   r   r
   r   r   ra   r   r   r   r   r$   rF   rZ   r   r   r	   r`   r   r_   _arraysre   r   r   r   r   r   r   r   rC   r   )r   r6   r8   r   r   global_shardingr   r   r   resharded_arrayri   r   s               r    %global_array_to_host_local_array_implr     s   
]
	NO O U__%#*B*BJ	g	
((3-CYY&--
((399BHHTN
3C
))%$S$*;*;<<<
//CLL..{EB/<<--k.D.DeL.$
syy#)),k5B* U__%
||$$_chh?{{fsO<o&&f
//*nf
MCu{{C00J 
#
#C
(C&::399ELLNAF  
	 	 .&
;!!))../
1 1s   Jc           	         t        |       \  }}t        d|t        j                  |            }t	        ||      D cg c]  \  }}t
        j                  |||       }}}t        ||      S c c}}w )a  Converts a global `jax.Array` to a host local `jax.Array`.

  You can use this function to transition to `jax.Array`. Using `jax.Array` with
  pjit has the same semantics of using GDA with pjit i.e. all `jax.Array`
  inputs to pjit should be globally shaped and the output from pjit will also
  be globally shaped jax.Array's

  You can use this function to convert the globally shaped `jax.Array` output
  from pjit to host local values again so that the transition to jax.Array can
  be a mechanical change.

  Example usage:

  >>> from jax.experimental import multihost_utils # doctest: +SKIP
  >>>
  >>> global_inputs = multihost_utils.host_local_array_to_global_array(host_local_inputs, global_mesh, in_pspecs) # doctest: +SKIP
  >>>
  >>> with mesh: # doctest: +SKIP
  ...   global_out = pjitted_fun(global_inputs) # doctest: +SKIP
  >>>
  >>> host_local_output = multihost_utils.global_array_to_host_local_array(global_out, mesh, out_pspecs) # doctest: +SKIP

  Args:
    global_inputs: A Pytree of global jax.Array's.
    global_mesh: A :class:`jax.sharding.Mesh` object. The mesh must be contiguous
      meaning all local devices of the host must form a subcube.
    pspecs: A Pytree of :class:`jax.sharding.PartitionSpec` objects.

  Returns:
    A Pytree of host local arrays.
  zoutput pspecsr   )r   r   r   r   r   "global_array_to_host_local_array_pr   r   )	global_inputsr6   r   r   rL   
out_pspecsr5   or   s	            r     global_array_to_host_local_arrayr     s    B %]3)X'77?A*
 Y
3 #q )--c{45 . 7( 
 
(	++r   r   c               l    t        t        j                  | j                  | j                        ||      S rO   )r   r	   r`   ra   r   r   s      r    gtl_abstract_evalr     r   r"   c                0    t        j                  | fi |fS rO   )r   r   r   s      r    r!   r!     r   r"   c                   |gS rO   rP   r   s       r    _gtl_loweringr     r   r"   c                   |D ch c]  }|j                    }}t        j                          |vrt        d      | j                  t	        |            }|D ci c]   }|j                   |v r|||j                      " c}S c c}w c c}w )zEReturns the subset of the provided devices that are live and healthy.z/Provided devices do not have any local devices.)rB   r   rY   get_live_nodesr   )r   rC   rn   process_idslive_process_idss        r    _live_devicesr   
  s    *12Q2+2{2 F
GG**4+<= 

	
,	, 	!//	**
  3
s   B%Bc                  :    e Zd ZdZd Zej                  d        Zy)_LiveDevicesa  A context manager for atomically running code on the set of live devices.

  THIS API IS UNDER ACTIVE DEVELOPMENT AND IS NOT STABLE.

  # Overview

  `live_devices` is a low-level primitive that can be used to make
  multi-controller JAX programs fault tolerant. A multi-controller JAX program
  runs across many devices, and the machines that host these devices might fail.
  `live_devices` is a context manager that yields the current set of healthy
  devices, allowing you to run JAX code on the healthy devices while ignoring
  the failed ones.

  Concretely, `live_devices` is a context manager. You provide it the set of
  devices you are interested in, and it yields the subset of these devices that
  are live. In the body of the `with` statement, you can execute arbitrary JAX
  code using the set of live devices.

  # Example Usage

      try:
        with jax.live_devices(jax.devices()) as devices:
          # Run JAX code here with devices.
          pass
      except:
        # A device died while executing the with statement above.
        pass
      else:
        # The with statement executed successfully.
        pass

  # Barrier Semantics

  It's important that every process agrees on which devices are live to avoid
  the processes' behavior from diverging. For example, imagine a set of
  processes trying to run an AllGather, but they all disagree on which devices
  should be participating in the AllGather. This is buggy.

  To ensure that every process agrees on the set of live devices, the
  `live_devices` context manager has barrier-like semantics. Consider an
  invocation `with live_devices(devices)` where `devices` includes devices
  across a set of processes P. The invocation acts as a barrier, waiting for
  every process in P to call `with live_devices(devices)`. Afterwards,
  `live_devices` returns the same set of live devices `A` to all the processes
  in P. This ensures that every process agrees on the set of live devices.

  `live_devices` does not actually act as a barrier for *every* process in P
  because some processes in P might have failed. Instead, the `live_devices`
  function waits only for the processes with a device in the returned set of
  live devices A.

  # An Example

  Imagine we have four processes, each with two devices:

    Process A: Devices 1 and 2
    Process B: Devices 3 and 4
    Process C: Devices 5 and 6
    Process D: Devices 7 and 8

  Further imagine that process D fails and that every process calls `with
  live_devices(jax.devices())`. The invocation returns devices 1, 2, 3, 4, 5,
  and 6. Because these devices are hosted by processes A, B, and C, the call to
  `live_devices` acts as a barrier across processes A, B, and C. Process D,
  which failed, is ignored.

  # Atomicity

  `live_devices` also provides the following transaction-like atomicity
  property. When a process exits the body of a `with jax.live_devices(...) as
  devices:` block, there are two possibilities.

    1. All processes in `devices` successfully executed all code in the block
       without any exceptions being raised.
    2. All processes in `devices` did not successfully execute the code in the
       block, and all the processes will raise an exception.

  Consider the following code.

      try:
        with jax.live_devices(...) as devices:
          pass
      except:
        pass # A
      else:
        pass # B

  The atomicity property says that either every process with devices in
  `devices` will enter the except branch (A) or every process with devices in
  `devices` will enter the else branch (B). It is impossible for some processes
  to enter A and others to enter B.

  TODO: mwhittaker - Link to formal live devices semantics.

  Args:
    devices: A list of devices. The provided devices must include at least one
    local device.

  Returns:
    The subset of the provided devices that are live and healthy.

  Raises:
    RuntimeError: If the distributed runtime was not initialized.
    ValueError: If no local devices are provided.
  c                    d | _         y rO   )rC   )selfs    r    __init__z_LiveDevices.__init__  s	    DLr"   c              #    K   t         j                  j                  }|t        d      |st	        d      | j
                  t        ||      | _        d }	 t        | j
                  j                               }|j                  d        | | j
                  }t        ||      }|| _        |r||j                         |j                         k  st	        | d|       y # t        $ r}|}Y d }~ed }~ww xY w# | j
                  }t        ||      }|| _        |r||j                         |j                         k  st	        | d|       w xY ww)Nz Distributed JAX not initialized.zNo devices provided.c                    | j                   S rO   )id)rn   s    r    r!   z'_LiveDevices.__call__.<locals>.<lambda>  s
    qtt r"   )keyz is not a subset of )r   r   r   r   rY   rC   r   r   keyssort	Exceptionitems)r   rC   r   	exceptionaliveeold_devicesnew_devicess           r    __call__z_LiveDevices.__call__  sU    %%,,F~;<< -..||"673dlIL4<<$$&'ejj^j$k LLk!&'2k dl	 K$5$5$77K=(<[MJKK 8  i LLk!&'2k dl	 K$5$5$77K=(<[MJKK 8s>   AE:C& AE&	C;/C61C> 6C;;C> >AEEN)__name__
__module____qualname____doc__r   
contextlibcontextmanagerr
  rP   r"   r    r   r     s*    hT L Lr"   r   )r'   r   returnr   rO   )rK   r   r7   zbool | Noner  r   )F)rK   r   rg   r   r  r   )r|   str) )r   r  )r   intr  r   )r   r   r6   jax.sharding.Meshr8   r   )r   r   r6   r  r   r   )r   r   r6   r  r   r   )rC   zlist[xla_client.Device]r  zdict[xla_client.Device, int])Hr  
__future__r   	functoolsr   r   rx   r  typingr   r$   	jax.numpynumpyr   jax.tree_utilr   r   jax._srcr	   r
   jax._src.interpretersr   r   r   r   r   r   r   r   r   jax.shardingr   rH   r   jax._src.utilr   r   jax._src.libr   r1   r(   rM   rQ   rq   ru   r~   r{   r   r   r   r   r   r4   	Primitiver   def_implr   def_abstract_eval
deflinear2r   fancy_primitive_batchersr   register_loweringr   r   r   r   defvectorizedr   r   r   live_devicesrP   r"   r    <module>r)     se   K " (    
  6   $ * &  # & %  +   "  # G(*Z&-R&0C=&.2b 7 7 A A
 B B(	(/(8;(VW+W+$5W+?BW+r &4T^^4V%W " " + +,Q RB # 4 45F G 0JK IPI ! !"D E   9= I(1	(1/(18;(1V),),%6),@C),V &4T^^4V%W " " + +,Q RB # 4 45F G 0JK   9 :   9= I IL ILV ~r"   