
    ukie                    t   d Z ddlmZ ddlmZmZ ddlZddlZddlZddl	Z	ddl
mZ ddlZddlZddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z& ddl'm(Z( eZ) ejT                  dd       G d d             Z+ ejT                  dd       G d d             Z,d%dZ-d&dZ.	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'dZ/	 	 	 	 	 	 	 	 d(dZ0	 	 	 	 	 	 	 	 d(dZ1	 	 	 	 	 	 d)dZ2	 	 	 	 	 	 d)dZ3 G d d      Z4 G d  d!      Z5 G d" d#      Z6 e6d      Z7	 	 	 	 	 	 d*d$Z8y)+z-Colocated Python function API implementation.    )annotations)CallableSequenceN)Any)api)	tree_util)util)pxla)
xla_client)api_boundary)wraps)func_backend)_deserialize_deserialize_specs _make_specs_for_serialized_specs
_serialize_serialize_specs)register_backend_cache)ifrt_programsT)frozenslotsc                  0    e Zd ZU dZded<   ded<   ded<   y)	FunctionInfoz*User function wrapped by colocated_python.Callable[..., Any]fun
str | Nonefun_sourceinfoinspect.Signature | Nonefun_signatureN)__name__
__module____qualname____doc____annotations__     a/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/colocated_python/func.pyr   r   +   s    2))r&   r   c                      e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	ded
<   dZ
ded<   ddddddd	 	 	 	 	 	 	 	 	 	 	 ddZy)Specializationz/Specialization for a colocated_python function.Ntree_util.PyTreeDef | Nonein_specs_treedef'tuple[api.ShapeDtypeStruct, ...] | Nonein_specs_leaves*Callable[..., ShapeDtypeStructTree] | Noneout_specs_fnout_specs_treedefout_specs_leavesxc.DeviceList | Nonedevices)r+   r-   r/   r0   r1   r3   c               Z   || j                   }n| j                   t        d      || j                  }n| j                  t        d      || j                  }n| j                  t        d      || j                  }n| j                  t        d      || j
                  }n| j
                  t        d      || j                  }nO| j                  t        d      t        |t        j                        st        j                  t        |            }t        ||||||      S )z,Creates a new specialization with overrides.zin_specs already specifiedzout_specs_fn already specifiedzout_specs already specifiedzdevices already specified)r+   
ValueErrorr-   r/   r0   r1   r3   
isinstancexc
DeviceListtupler)   )selfr+   r-   r/   r0   r1   r3   s          r'   updatezSpecialization.update?   s7    ..				*344,,o				)344&&l				&788 00				+455..				*455g		!233/eGn-g r&   )r+   r*   r-   r,   r/   r.   r0   r*   r1   r,   r3   z+Sequence[jax.Device] | xc.DeviceList | None)r    r!   r"   r#   r+   r$   r-   r/   r0   r1   r3   r;   r%   r&   r'   r)   r)   4   s    715.5=A/:A=A,:A26/6>B;B"&'&
 6:AEAE6:BF=A0 30 ?	0
 ?0 40 @0 ;0r&   r)   c                    t        | t        j                        st        dt	        |        d      t        j                  | j                  | j                  | j                        S )z7Extracts a spec for a value, which must be a JAX Array.zFcolocated_python only supports jax.Array as input and output, but got .)shapedtypesharding)
r6   jaxArrayr5   typer   ShapeDtypeStructr>   r?   r@   )xs    r'   	_get_specrF   r   sW     
Asyy	!
	G9A	  
		AGG177QZZ	PPr&   c                    t               }| D ]7  }t        |dd      }||j                  |j                  j                         9 |syt        |      dk7  rt        d| d      |j                         S )zBReturns a representative device list from function call arguments.r@   N   zLAll arguments must use the same device list, but got multiple device lists: r=   )setgetattraddr@   _internal_device_listlenr5   pop)argsdevice_list_setrE   r@   s       r'   _infer_devices_from_argsrQ   ~   s    (+/ <aq*d+H!**::;< 
Q
	##2"31	6  
			r&   c                   |||||f}t        |      }t        j                  | ||||      }	|d   j                  }
t	        d |D              }t	        d |D              }	 t        j
                         }|
j                  |	|      t        j                  ||d      j                  fd}|S # t        j                  j                  $ r&}dt        |      v rt        |      d   cY d}~S  d}~ww xY w)	z5Compiles a Python function into a runtime executable.r   c              3     K   | ]8  }t         j                  j                  |j                  |j                         : y wN)rA   coreShapedArrayr>   r?   .0sdss     r'   	<genexpr>z)_compile_to_executable.<locals>.<genexpr>   s.      58chh399cii0s   >A c              3  4   K   | ]  }|j                     y wrT   )r@   rW   s     r'   rZ   z)_compile_to_executable.<locals>.<genexpr>   s     AA   T)	committedc                     t        j                  | |f      }j                  |d      }|j                        }t        j                  |      S )NF)with_tokens)r   tree_leavesexecute_shardedconsume_with_handlerstree_unflatten)rO   kwargsargs_leavesexecute_resultresultsloaded_executableout_handlersr0   s        r'   callz$_compile_to_executable.<locals>.call   sW    ))4.9k(88
5 9 n 44\Bg%%&7AAr&   z#PjRtCompiler requires an HloProgramN)r   r   make_colocated_python_programclientr9   %make_colocated_python_compile_optionscompile_ifrt_programr
   global_avals_to_results_handlerhandlersrA   errorsJaxRuntimeErrorstrr   )namer   r+   r-   r0   r1   r3   fun_and_specializationpickled_functionprogramifrt_clientout_sdssout_shardingscompile_optionsrj   erh   ri   s       `           @@r'   _compile_to_executabler}      s    
   6777
g8H' 
!!+ <L ( A0@AA-
#IIKO#88 77-4h B K		#	# 
 -A6*+A..	
s%   $AB5 5C8C3,C82C33C8c           
         |j                   J |j                  J |j                  J |j                  J |j                  J |j                  d fd}t        j                  t        |j                              \  }}t         j                  dd      }| d}t        |||j                   |j                  |t        |      |j                        S )zXCreates a function that computes output specs and pushes the result to the result store.c                      j                   | i |}t        j                  |      \  }}t        d |D              }t        j
                  j                  |       t        ||      S )Nc              3  2   K   | ]  }t        |        y wrT   rF   rX   rE   s     r'   rZ   zN_make_output_specs_and_push_result_fun.<locals>.lowered_fun.<locals>.<genexpr>   s     @QIaL@   )r   r   tree_flattenr9   r   SINGLETON_RESULT_STOREpushr   )	rO   rd   resultresult_leavesout_treedefout_spec_leavesr3   infouids	         r'   lowered_funz;_make_output_specs_and_push_result_fun.<locals>.lowered_fun   sb    TXXt&v&F!*!7!7!?M;@-@@O'',,S-@K'BBr&   r    unknown_output_specs_and_push_resultrt   r   r+   r-   r0   r1   r3   )returnz	jax.Array)r+   r-   r0   r1   r3   r   r   r   rJ   r   r}   r9   )r   specializationr   r   r1   r0   rt   r3   s   ` `    @r'   &_make_output_specs_and_push_result_funr      s     
	(	(	44	4		'	'	33	3		)	)	11	1		(	(	00	0				++	+""'C )2(>(>&~'='=>)%% 
:y	1$6.	/$	
%66$44)-.$$
 r&   c           	     \   |j                   J |j                  J |j                  J |j                   fd}t        j                  di f      \  }}t        | j                  dd      }| d}t        |||t        |      |j                   |j                  |j                        S )z9Makes a function that pops results from the result store.c                 n    t         j                  j                        } t        j                  |       S rT   )r   r   rN   r   rc   )r   r0   r   s    r'   r   z)_make_pop_result_fun.<locals>.lowered_fun   s-     77;;C@M##$5}EEr&   r%   r    r   _pop_resultr   )	r0   r1   r3   r   r   rJ   r   r}   r9   )r   r   r   r   r-   r+   rt   r0   s     `    @r'   _make_pop_result_funr      s     
	)	)	55	5		(	(	44	4				++	+$66F '0&<&<	> '#/# 
:y	1$6	$	
'O,&88%66$$
 r&   c           	     X   |j                   J |j                  J |j                  J |j                  J |j                  J t        | j                  dd      }t        || j                  |j                   |j                  |j                  |j                  |j                        S )z;Makes a function that asynchronously executes the function.r    r   r   )r+   r-   r0   r1   r3   rJ   r   r}   )r   r   rt   s      r'   _make_async_execution_funr     s    
 
	(	(	44	4		'	'	33	3		)	)	55	5		(	(	44	4				++	+	:y	1$	
((%66$44&88%66$$
 r&   c                     t        j                  d       j                  J j                  J j                  J t        j                  d      t        j                         d fd}|S )z<Returns a specialized function for the given specialization.z+colocated_python_func._get_specialized_funcN?   c                 |   5  
j                   j                  dt              } || i |}t        |      \  }}j	                  ||      t              
|
_         t                     cddd       S t        j                  t        | |f      \  }} j                  |i |}t        j                  |      \  }}j	                  |t        |            t              
nt              
ddd        
| i |}	5  d
_        ddd       |	S # 1 sw Y   %xY w# 1 sw Y   |	S xY w)z?Specialized function to be executed with given args and kwargs.N)r0   r1   )r0   r/   r   r   r;   r    output_specs_and_push_result_funr   r   tree_maprF   r   r9   )rO   rd   r   serialized_out_specsr0   r1   
args_specskwargs_specs	out_specsr   async_execution_funcr   mutexr   r   s             r'   specialized_funcz8_uncached_get_specialized_func.<locals>.specialized_func2  s    
 9Q		%++3((06.# -
 $D$$ 
 3E$3// ,22"3!1 3 N $=n$  1 !A C'ncBDG9Q 9QL (1'9'9D&>($J 433ZP<PI 3<2H2H3// ,22"3!&'7!8 3 N $=n$ 
 ";4!P
s9Q| "4262F	 C>B;CMC9Q 9Q~CMs   A5D%A>D%D1%D.1D;)	r	   
test_eventr+   r-   r3   randomgetrandbits	threadingLock)r   r   r   r   r   r   s   `` @@@r'   _uncached_get_specialized_funcr   #  s    
 //?@		(	(	44	4		'	'	33	3				++	+2#
..
%D DL 
r&   c                      e Zd ZdZ ej
                  dd       G d d             Z ej
                  dd       G d d             Zd Z	 	 	 	 	 	 	 	 	 	 dd	Z	y
)_SpecializedCollectiona   Collection of specialized functions for a single unspecialized function.

  The `get()` method retrieves the specialized function for the provided input
  spec, either by looking up a cache or by compiling the specialized function.

  Looking up a cache with an input spec as a key can be slow, because
  `Sharding`'s equivalence comparison is slow. Instead, we maintain two caches
  for the same value: we use the ID of the sharding object (via `WeakSpec`) as
  the key in one cache, and the corresponding strong references to the sharding
  object (via `StrongSpec`) as the key in another cache. Looking up the
  `WeakSpec`-keyed cache is fast. Note that the ID integer in the `WeakSpec`
  cache will remain valid as long as a strong-ref exists in the `StrongSpec`
  cache.

  The `StrongSpec`-keyed cache is unbounded, while the `WeakSpec`-keyed cache
  is LRU(1): if there is a miss in the `WeakSpec` cache but a hit in the
  `StrongSpec` cache, the strong-ref is the `StrongSpec` cache and the ID
  integer in the `WeakSpec` cache are both updated.
  T)r   unsafe_hashc                  J    e Zd ZU dZded<   ded<   ded<   ded	<   	 	 	 	 dd
Zy)_SpecializedCollection.WeakSpecz;WeakSpec stores just the `id()` of the input spec sharding.ztuple[jax.numpy.dtype, ...]dtypesztuple[tuple[int, ...], ...]shapesztuple[int, ...]sharding_idstree_util.PyTreeDeftreedefc                    t        d |D              | _        t        d |D              | _        t        d |D              | _        || _        y )Nc              3  4   K   | ]  }|j                     y wrT   )r?   r   s     r'   rZ   z;_SpecializedCollection.WeakSpec.__init__.<locals>.<genexpr>       7a!''7r\   c              3  4   K   | ]  }|j                     y wrT   )r>   r   s     r'   rZ   z;_SpecializedCollection.WeakSpec.__init__.<locals>.<genexpr>  r   r\   c              3  F   K   | ]  }t        |j                          y wrT   )idr@   r   s     r'   rZ   z;_SpecializedCollection.WeakSpec.__init__.<locals>.<genexpr>  s     D11::Ds   !)r9   r   r   r   r   )r:   re   r   s      r'   __init__z(_SpecializedCollection.WeakSpec.__init__  sB     7;77dk7;77dkDDDddlr&   N)re   Sequence[jax.Array]r   r   )r    r!   r"   r#   r$   r   r%   r&   r'   WeakSpecr     s2    E''''!!  .9Lr&   r   c                  >    e Zd ZU dZdZded<   dZded<   	 	 	 	 ddZy)	!_SpecializedCollection.StrongSpecz/StrongSpec stores the full input spec sharding.Nr*   r+   r,   r-   c                @    t        d |D              | _        || _        y )Nc              3  2   K   | ]  }t        |        y wrT   r   r   s     r'   rZ   z=_SpecializedCollection.StrongSpec.__init__.<locals>.<genexpr>  s     "EA9Q<"Er   )r9   r-   r+   )r:   re   	pytreedefs      r'   r   z*_SpecializedCollection.StrongSpec.__init__  s     #"E"EEd'dr&   )re   r   r   r   )r    r!   r"   r#   r+   r$   r-   r   r%   r&   r'   
StrongSpecr     s1    93707?CO<C(.(;N(r&   r   c                    t         }i | _        i | _        i | _        i | _        d| _        t        j                         | _        y )Nr   )	int_weak_to_id_id_to_weak_strong_to_id_id_to_compiled_counterr   r   _mu)r:   
CompiledIds     r'   r   z_SpecializedCollection.__init__  s<    JJLDJLDNPDACDDM~~DHr&   c                   t         j                  ||      }| j                  j                  |      }|| j                  |   S | j
                  5  t         j                  ||      }| j                  j                  |d       }|m| j                  j                  |      }| j                  |= || j                  |<   || j                  |<   || j                  |<   | j                  |   cd d d        S |j                  <t        ||j                  |j                  |j                  t        |                  }	n1t        ||j                  |j                  |j                              }	| j                   }| xj                   dz  c_        || j                  |<   || j                  |<   || j                  |<   |	| j                  |<   |	cd d d        S # 1 sw Y   y xY w)N)r+   r-   r3   )r+   r-   rH   )r   r   r   getr   r   r   r   rN   r   r3   r   r;   r+   r-   rQ   r   )
r:   re   r   	func_infor   	weak_speccompiled_idstrong_specold_weakr   s
             r'   r   z_SpecializedCollection.get  s    '//YGI""&&y1K!!+..	 **55k9Mk&&**;=k		  ##''4X&*5;'&1#(1%##K0* *" 
			'/!!!,!=!= + ; ;0= " 
 0!!!,!=!= + ; ; " 
 MMk
mmqm$/dy!(3d%&/d{#*0d;'U* * *s   BG1CGGN)
re   r   r   r   r   r   r   r)   r   r   )
r    r!   r"   r#   dataclasses	dataclassr   r   r   r   r%   r&   r'   r   r   {  s    ( ;t6  7  ;t6
( 
( 7
(	 :&: %: 	:
 %: :r&   r   c                  6    e Zd ZdZddZd Z	 	 	 	 ddZd	dZy)
_JaxSecondLevelCacheszBManages second-level caches registered as a single cache with JAX.c                \    t        j                         | _        i | _        t	        | |       y rT   )r   r   _lock
_callbacksjax_register_backend_cache)r:   rt   s     r'   r   z_JaxSecondLevelCaches.__init__  s!    !DJ57DOtT*r&   c                    | j                   j                         D ]	  } |         | j                   j                          y)z%Meant to be invoked by JAX internals.N)r   valuesclear)r:   callbacks     r'   cache_clearz!_JaxSecondLevelCaches.cache_clear  s3    OO**, jOOr&   c                "    || j                   |<   y rT   )r   )r:   r   cache_clear_callbacks      r'   register_second_levelz+_JaxSecondLevelCaches.register_second_level  s     0DOOCr&   c                Z    	 | j                   j                  |       y # t        $ r Y y w xY wrT   )r   rN   KeyError)r:   r   s     r'   remove_second_levelz)_JaxSecondLevelCaches.remove_second_level	  s+    
oo# 
s    	**N)rt   rs   )r   r   r   r   )r   r   )r    r!   r"   r#   r   r   r   r   r%   r&   r'   r   r     s)    J+
00,>0
r&   r   c                  R    e Zd ZdZ ed      ZddZd Z	 	 	 	 d	dZ	 	 	 	 	 	 d
dZ	y)_CachedColocatedFunctionMakerzFunction maker for colocated Python functions.

  Generated functions are stored (cached) indefinitely so that they can be
  reused, until the cache is dropped.
  'colocated_python_specialized_func_cachec                    ||nt        j                         j                  | _        g g fd}t        j
                  j                  | j                  |       | _        | _        y )Nc                 F     j                          j                          y rT   )r   )specialized_collectionsspecialized_functionss   r'   clear_cachesz<_CachedColocatedFunctionMaker.__init__.<locals>.clear_caches  s    ##%!!#r&   )	uuiduuid4r   held_byr   	JAX_CACHEr   r   r   )r:   r   r   r   r   s      @@r'   r   z&_CachedColocatedFunctionMaker.__init__  s`    %17tzz|7G7GDL<>68$ "++AA $;D !6Dr&   c                    | j                   j                          | j                  j                          	 t        j                  j                  | j                         y # t        $ r Y y w xY wrT   )r   r   r   r   r   r   r   AttributeError)r:   s    r'   __del__z%_CachedColocatedFunctionMaker.__del__)  sU      &&($$&#--AA$,,O 
s   )A   	A,+A,c                     	 	 	 d	 	 	 	 	 d fd}d d t        j                         t         fd       } t        j                        |      }||_        |S )z)Internal implementation of make_callable.c           	         | d\  }}n#t        j                  |       \  }}t        |      }j                  j	                  ||||            S )a7  Returns a colocated Python callable with extra specialization.

      Args:
        in_specs: Optionally specifies the expected input specs. Input specs are
          expressed as a `PyTree[ShapeDtypeStruct]` for `(args, kwargs)` of a
          function call.
        out_specs_fn: Optionally specifies a function that computes the output
          specs from input specs. If unspecified, colocated Python will compute
          the output specs during the very first execution, and this execution
          will be synchronous.
        devices: Optionally specifies the devices to execute the function on.
          Must be provided if `in_specs` has no leaves because devices cannot be
          inferred from input specs or arguments.

      Returns:
        A colocated Python callable with extra specialization.
      )NN)r+   r-   r/   r3   )r   r   r9   _make_callabler;   )	in_specsr/   r3   r-   r+   in_specs_leaves_listr   r:   r   s	         r'   
specializez@_CachedColocatedFunctionMaker._make_callable.<locals>.specialize9  sr    2 
	,6))1:1G1G2
..   45  



/-'	    r&   c                      y rT   r%   r%   r&   r'   <lambda>z>_CachedColocatedFunctionMaker._make_callable.<locals>.<lambda>g      r&   c                      y rT   r%   r%   r&   r'   r   z>_CachedColocatedFunctionMaker._make_callable.<locals>.<lambda>h  r   r&   c            	     B   t        j                  | |f      \  }}t        |      dk(  }|rj                  t	        d      j
                  duxr j                  du}|sm|sk5          }|:t               }
j                  j                  |       t        j                  |      ddd        j                  ||	      | i |}~|S 5          }||rj                  t        	      }nO|r&t        	j                  t        |                  }n'|r%t        	j                  t!               |            }
j"                  j                  |       t        j                  |      ddd        | i |}~|S # 1 sw Y   xY w# 1 sw Y    xY w)a  Executes the given Python function on the same devices as the arguments or as specialized.

      If the callable has not been specialized with output shapes and shardings
      (see `specialize` above), the very first call will run synchronously to
      discover output shapes and shardings, and will run asynchronously after.
      If specialized with output shapes and shardings, every execution of the
      callable will be asynchronous.
      r   NzkNo devices found. colocated_python function without input arguments must be first specialized with devices.)r3   )r-   r+   )r   r   rM   r3   r5   r+   r-   r   r   appendweakrefrefr   r   r;   rQ   r9   r   )rO   rd   re   r+   no_inputfully_specified_in_spec
collectionr   funcr   r:   r   specialized_collections_wrefspecialized_functions_wrefwref_mus            r'   __call__z>_CachedColocatedFunctionMaker._make_callable.<locals>.__call__k  s    '0&<&<dF^&L#k#[!Q&h	n,,4A
 	
 
)
)
5 9,,D8 
 %X  	C/K/M*/1J((//
;+2;;z+B(	C
)4
   9#=#?<$)?)?)K1$GD&1%%4[A & D 1%%$)G%5 & D 
$
$
+
+D
1'.{{4'8
$-9. T$V$f
mM	C 	C9 9s   'AF	B$F	FF)NNN)r   zShapeDtypeStructTree | Noner/   r.   r3   zSequence[jax.Device] | None)r   r   r   r   r   r   )r:   r   r   r   r  r	  r
  r  s   ```  @@@r'   r   z,_CachedColocatedFunctionMaker._make_callable2  s     15CG/3(-(@( -(\ $0 !-nnG@ @ @D uTXXx(H$HOr&   c                L    | j                  t        |||      t                     S )z"Makes a colocated Python callable.)r   r   r)   )r:   r   r   r   s       r'   make_callablez+_CachedColocatedFunctionMaker.make_callable  s(     S.-8.:J r&   N)r   z
int | None)r   r   r   r)   r   r   r   r   r   r   )
r    r!   r"   r#   r   r   r   r   r   r  r%   r&   r'   r   r     sZ     $$MN)7 ~~ %~@		 !	 .		r&   r   c                0    t         j                  | ||      S rT   )_DEFAULT_FUNCTION_MAKERr  )r   r   r   s      r'   r  r    s    
 
!	.	.	>=
 r&   )rE   r   r   zapi.ShapeDtypeStruct)rO   zSequence[Any]r   r2   )rt   rs   r   r   r+   r   r-    tuple[api.ShapeDtypeStruct, ...]r0   r   r1   r  r3   zxc.DeviceListr   r   )r   r   r   r)   r   r   r   r   )r   r   r   r)   r   r   r  )9r#   
__future__r   collections.abcr   r   r   inspectr   r   typingr   r   r  rA   jax._srcr   r   r	   jax._src.interpretersr
   jax._src.libr   r7   jax._src.traceback_utilr   jax._src.utilr   !jax.experimental.colocated_pythonr   /jax.experimental.colocated_python.serializationr   r   r   r   r   jax.extend.backendr   r   jax.extend.ifrt_programsr   ShapeDtypeStructTreer   r   r)   rF   rQ   r}   r   r   r   r   r   r   r   r  r  r%   r&   r'   <module>r!     s   4 " .        
    & ) 0  : ]  ] S 2  d$/* * 0* d$/: : 0:z	Q"2

2
	2
 *2
 6	2

 +2
 72
 2
 2
j"
""" 
" 	"J 
 "  
  	 F
" .U
U"U Upx xv 4k k\ 8= 	 ,r&   