
    ukiwT                       d Z ddlmZ ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
 ddlZddlZddlZddlmZ ddlZddlZddlZddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmc m c m!Z" ddlm#Z$ ddl%Z& ejN                  e(      Z) ejT                         Z+ddgZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5eZ6g dZ7d4dZ8d5dZ9d6d7dZ:d Z;d8dZ<d9dZ=d:d Z>d;d!Z?d<d"Z@	 	 	 	 	 	 	 	 d=d#ZA	 	 	 	 	 	 d>d$ZB	 	 	 	 	 	 d?d%ZCd@d&ZD	 	 dAd'ZEd( fd)ZF ee4*      ZGddd+	 	 	 	 	 dBd,ZHddd+	 	 	 	 	 dBd-ZI	 	 	 	 dCd.ZJdDd/ZKddd0	 	 	 	 	 dEd1ZLddd+	 	 	 	 	 dFd2ZMddd0	 	 	 	 	 dGd3ZNy)HzR
Serializations routines for pytrees including array and non-array serialization.
    )annotations)PathLikeN)Any)uuid4UUID)ThreadPoolExecutor)distributed)flatten_axes)Format)multihost_utils)tensorstore_implpathzgs://zs3://zpytreedef.jsonzarchive.zipTi   array_storezArray({dtype}[{shape}])z&Array\(([a-zA-Z0-9_]+)\[([0-9, ]*)\]\)       )saveloadload_pytreedefnonblocking_loadnonblocking_savec                 <   t        j                         dk(  ryt        j                  t	        j
                  t               j                  t        j                              } t        t	        j                  |       j                               }dt        |       S )zHGenerate a thread-local key for ensuring all host finish (de)serializing   Ndtype)bytesjax_sync_key_)jaxprocess_countr   broadcast_one_to_allnp
frombufferr   r   int32r   arraytobytesstr)sync_key_uniquesync_key_ids     t/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/jax/experimental/array_serialization/pytree_serialization.py_get_unique_sync_keyr*   A   sm    A#88mmEGMM24/288O4<<>?+[)*	++    c           	        t        j                         dk  ryt        |       j                  d      }t	        |      t
        kD  rt        dt
         d      t        j                  t        j                  |t        j                        t        j                  t
        t	        |      z
  t        j                        g      }t        j                  |      }t        t        j                  |d   |dd k(              S )	zEAll-gather the location of the checkpoint and check if it's the same.r   Fzutf-8zPath exceeds maximum length of z in multiprocess case.r   r   N)r   r   r&   encodelen_MAX_PATH_LENGTH
ValueErrorr!   concatenater"   uint8zerosr   process_allgatherboolall)r   path_b
path_arrays      r)   _is_str_same_on_all_hostsr9   L   s    At9G$&[##
67G6H I+ + , ,~~mmF"((+RXX
S[
(.:; <* 00<*	bffZ]jn45	66r+   c                    | y |r|  d| n| }t         j                  j                  x}|j                  |t        dz         y y )N-i  )timeout_in_ms)r	   global_stateclientwait_at_barrier_TIMEOUT_SEC)key	extra_tagfull_keyr>   s       r)   _sync_on_keyrD   [   sR    [
%.uAi[!C(((///f<
8<$3FG =r+   c                V    t        | t        j                  t        j                  f      S N)
isinstancer   Arrayr!   ndarrayxs    r)   _is_array_likerL   c   s    	A		2::.	//r+   c           	         | yt        |       rRt        j                  | j                  j                  dj                  t        t        | j                                    S t        |       j                  S )Nnullz, )r   shape)rL   _ARRAY_TYPE_FORMATformatr   namejoinmapr&   rO   type__name__)leafs    r)   _leaf_to_descrX   g   s_    	\d$$jjooTYYs3

/C%D % F F :r+   c                   | y t        j                  t        |       s| S t        j                  t        |       }|J |j                         \  }}|j	                  d      j	                         j                  d      D cg c]7  }t        |j	                               dkD  rt        |j	                               9 }}t        j                  |t        j                  j                  |            S c c}w )N],r   )rematch_ARRAY_TYPE_REGEXgroupsstripsplitr.   intr   ShapeDtypeStructnumpyr   )	leaf_descshape_dtype_match	dtype_str	shape_strrK   rO   s         r)   _desc_to_leafri   q   s    	#Y	/hh0)<		&&	&*113)Y#,??3#7#=#=#?#E#Ec#J #a1779~! qwwy> #% #			eSYY__Y%?	@@#s   <C2c                4     t         fdt        D              S )z7Check whether a path is remote by examining the prefix.c              3  X   K   | ]!  }t              j                  |d d        # y w)N)r&   
startswith).0prefixr   s     r)   	<genexpr>z"_is_remote_path.<locals>.<genexpr>   s.      1 Y!!&"+. 1s   '*)any_REMOTE_URL_PREFIXESr   s   `r)   _is_remote_pathrs   ~   s     
 1/1 
1 1r+   c                    t        |       rt        j                  |       S t        j                  |       j                         j	                         S rF   )rs   pathlibPath
expanduserresolver   s    r)   
_norm_pathry      s:    T<<	d		&	&	(	0	0	22r+   c                f    t        |       r| j                          y t        j                  |        y rF   )rs   rmtreeshutil)roots    r)   _rm_dirr~      s    TKKM
MM$r+   c                n   t        |       } |r| j                         rt        t        | j	                                     dkD  rt        | j	                               D cg c]"  }|j
                  t        t        t        fvs!|$ }}t        |      dk7  rt        d| d|  d      t        j                         dk(  s|r| j                         rt        |        t        |d       |S | j                         r7t        t        | j	                                     dkD  rt        d|  d|d      |S c c}w )	zOInspect the destination, set it up for writing, potentially read existing data.r   zWRefusing to work on a directory that is not a previous checkpoint. Unrecognized paths: z6. Remove them manually if you're sure you want to use z as the checkpoint directory.	overwritezFiles already exist at path: `z `, but you specified `overwrite=`)ry   existsr.   listiterdirrR   _PYTREEDEF_FILE_ARCHIVE_NAME_ARRAY_STORE_DIRNAMERuntimeErrorr   process_indexr~   rD   r0   )r}   r   pytree_reprdistinct_locationssync_keyr   extra_member_pathss          r)   _set_up_destinationr      s5    
D	${{}T$,,.12Q6  /B499M+?
@4A$B B 
	 A	%$$6#7 8//3f 5 	 



"&8dkkm;'#d4<<>23a77v >1&/\4 5 5'Bs   "D2D2c                   t        |       } t        | ||||      }t        |       sY|st        j                         dk(  r@| j                  d       | j                         r| j                         st        d|        t        |d       |S )zPrepare the directory: check destination, potentially read existing data
  and overwrite.

  Raises:
    RuntimeError: If the destination directory cannot be created.
  r   T)exist_okz*Could not create destination directory at mkdir)
ry   r   rs   r   r   r   r   is_dirr   rD   )r}   r   pytreedef_reprr   r   s        r)   _prepare_directoryr      s     
D	$&
I~'98E. 
	$6'*'8'8':a'?JJJ;;=EdVLMMx!	r+   c           
     r   |D cg c]  }| t        |      z   }}d }|s+t        j                         dkD  rt        j                         }t	        |      D 	cg c]"  \  }}	t        j                  |t        ||	      $ }
}}	t	        |
      D cg c]  \  }}t        j                  ||       c}}t              dkD  r@d   d   d   d   }t	              D ]#  \  }}	t        j                  ||	|t        d       % fd	}t        j                   |              y c c}w c c}	}w c c}}w )
Nr   )ocdbtprocess_idxarrr   kvstorebaser   T)r   check_metadatac                    K   t        j                  t              D  cg c]  \  } }t        j                  | |d        c}}   d {    y c c}} w 7 w)N)primary_host)asynciogatherzipts_implasync_serialize)r   ts_specarrsts_specss     r)   _serialize_arraysz(_write_arrays.<locals>._serialize_arrays   sW     
..!$13S' 	W4@3 4 4 4 3 4s   A"A
AAA)r&   r   r   r   r   r   get_tensorstore_spec
_USE_OCDBTmerge_nested_ts_specsr.   verify_tensorstore_specr   run)array_store_pathr   arr_leaf_idsr   r   leaf_idpathsr   r   r   default_ts_specsdefault_ts_specr   expected_pathr   s    ` `           r)   _write_arraysr      sD    ;G
Gwc'l*
G%
G+	 1 1 3a 7##%K *-UD)9; &4 224z?J7:< ; ;
 144Dh0OQ,?G ++OWE Q( 	]QQK	*626:MHd+ M%%gsM,6tMM4
 
++!"/ H;Qs   D('D- D3c                &   |s.t        j                         dk(  st        j                         dk7  ryt        j                  j                  | d      }t        j                  |dd      d   }t        t        j                               D cg c]  }t        j                  |d|      d    }}|j                  d      }|D cg c]  }|j                  d       }}t        j                  t        j                  ||             yc c}w c c}w )	zWhen multiple processes are writing, they must write to a per-process
  location followed by combining them via no-copy links to the final location.
  r   r   N	dummy_keyTr   r   r   r   )r   r   r   osr   rS   r   r   rangepopr   r   combine_kvstores)kvstore_pathr   dummy_key_pathcombined_kvstoreichildren_kvstores_r   s           r)   _finalize_array_storer      s    
 3,,.!3s7H7H7Ja7O
77<<k:.11Dd44=? S&&()+
 33Da11:< + + 6"!*;<ww{{6<!<	++g&&'79JKL+ =s   "D	Dc                    t        j                         dk(  s|syt        |       }|t        z  j	                  t        j                  |d             y)zMWrite the pytreedef to the destination directory and aux data to the archive.r   N   )indent)r   r   ry   r   
write_textjsondumps)	directoryr   r   r}   s       r)   _write_pytreedefr      sC     



"&8
	I	$/%%djjQ&GHr+   c                
    | d u S rF    rJ   s    r)   <lambda>r      s
    AI r+   c                n   t         j                  j                  | |      \  }}t        t	        |            t         j                  j                  |j                               }t        dt         j                  j                  ||      |      }t         j                  j                  fd|      S )zBroadcast the prefix tree `a` to the full tree `b`

  Uses `flatten_axes` for better error messages on mismatched arity but allowing
  for custom is_leaf in the `a` and `b` trees.
  is_leaftree_broadcastc                    |    S rF   r   )r   a_idx2leaf_maps    r)   r   z!_tree_broadcast.<locals>.<lambda>  s    q 1 r+   )
r   treeflattendict	enumerate	unflattenkeysr
   	structurerT   )abr   a_leavesa_structa_idxa_idx_broadcastr   s          @r)   _tree_broadcastr      s     xx''7';(H	(+,.
((

X~':':'<
=% !1!$!3!3Aw!3!GP/	1?	CCr+   )max_workersr   r   c               X    t         5  t        | |||      cddd       S # 1 sw Y   yxY w)a  Saves the given data structure to the provided directory path.

  This function provides functionality to serialize and save a data structure
  comprising JAX arrays, along with its structure to a given directory. It
  leverages `PyTree` for flattening and reconstructing the data structure.

  This is a simple experimental array serialization API, for anything more
  complex and for all checkpointing prefer: https://github.com/google/orbax

  Args:
    data: The data structure to be saved. Arbitrary composition of JAX arrays,
      including nested structures.
    directory: The directory path where the data will be saved. A local path or
      a remote URL (e.g., gs://, s3://). For remote URLs, `etils` is required.
    overwrite: If True, any existing directory with the same name will be
      overwritten.
    ts_specs: Optional tensorstore specs to use for serialization. If None,
      defaults to using the default tensorstore specs.

  Example:
    >>> data = {"a": jnp.array([1, 2]), "b": None}
    >>> save(data, directory)
  r   N)_THREADING_SAVE_LOCK_save)datar   r   r   s       r)   r   r     s.    2  JyIIJ J Js    )c          
     &   t               }t        |      rt        j                  st	        d      t        || t        j                        }t        j                  j                  | d       \  }}t        d |D              st        d      t        |       }t        j                         dkD  r|rt        d      t        |      }t!        j"                  |      }	t        j                  j%                  t&        |      |	t         j(                  <   t+        |||	||      }	g }
|
j-                  t.        j1                  t2        ||	|             |t4        z  }|D  cg c]  } t7        |       s|  }} t9        |      D  cg c]  \  }} t7        |       s| }}} t        j                  j;                  |t        j                        }|D cg c]  }||   	 }}|
j-                  t.        j1                  t<        |||||             |
D cg c]  }|j?                          }}tA        |d       tC        |      d	kD  r/tD        r)t.        j1                  tF        ||      j?                          tA        |d
       y c c} w c c} }w c c}w c c}w )Nz}For saving to remote URLs (e.g., gs, s3) you need the `etils` module installed. You can install it using `pip install etils`.r   c                
    | d u S rF   r   rJ   s    r)   r   z_save.<locals>.<lambda>8  s
    !t) r+   c              3  >   K   | ]  }|d u xs t        |        y wrF   )rL   )rn   rK   s     r)   rp   z_save.<locals>.<genexpr>9  s!     ?Q$Y+.++?s   zLFor serialization, all leaves must be either None or jax.Array-like objects.r   zSaving to different locations on different hosts is not supported, because it is extremely fragile. Consider using a single location.array_serializationr   end)$r*   rs   ru   epath_installedr   r   r   is_tensorstore_spec_leafr   r   r   r6   r0   r9   r   ry   utilsserialize_pytreedefrT   rX   _LEAF_IDS_KEYr   append_serialization_executorsubmitr   r   rL   r   leavesr   resultrD   r.   r   r   )r   r   r   r   r   	data_flat	pytreedefr   r}   r   futuresr   r   r   r   ts_specs_flatfutr   s                     r)   r   r   .  s\   !#(Y(?(?
 / 0 0 Xt%,%E%EG())$8K)L)Y	?Y?	?
 0 1 14Y??1!3
	NO O 
I	$ ,,Y7.(+]I(N.$$%%
I~'98E.'	..(//n.@B C 00$	=4t(<$	=$	=#,Y#7P4>$;O!P,P((//(*1*J*J " L--9:=#:-:	..(//%t\= 
  ''szz|'!'x./ 	z""/1CEEKVXx% 
>P ; (s$   I>.I>JJJ	Jc           	     @   t        |       }|D cg c]  }|t        |      z   }}t        j                  d      |D cg c]  }t        j                  |t
        d         }}t        |      D 	
cg c]  \  }	}
t        j                  |	|
       c}
}	t              dkD  r3d   d   d   d   }D ]   }
t        j                  |
d |t
        d       " fd	}t        t        |t        j                   |                         S c c}w c c}w c c}
}	w )
Nl       
 r   r   r   r   r   F)r   r   r   r   c                    K   t        j                  t              D  cg c]  \  } }t        j                  | |       c}}   d {   S c c}} w 7 
w)N)byte_limiter)r   r   r   r   async_deserialize)shardingr   r   	shardingsr   s     r)   _deserialize_arraysz)_read_arrays.<locals>._deserialize_arraysy  sW     #&y(#;"=Xw 	!!(G,O"= > > > "= >s   A"A
AAA)ry   r&   r   _LimitInFlightBytesr   r   r   r   r.   r   r   r   r   )r   r   r   r   arr_store_pathr   	arr_pathsr   r   r   r   r   r  r   s     ``         @r)   _read_arraysr  d  s.    ./.<HI~G,I)I ,,^<, #,- 224z?CE - - 144Dh0OQ,?G ++OWE Q( 	]QQK	*626:M N%%g4m,6uNN>
 
c,,?,A BC	DD- J
-Qs   D#D7 Dc                X   t        |       rt        j                  sJ d       t        |       t        z  j                         }t        j                  |      }t        t        |t        j                           }t        j                  j                  t        j                  |      |      S )aL  Loads a pytree from the given directory.

  This is a simple experimental array serialization API, for anything more
  complex and for all checkpointing prefer: https://github.com/google/orbax

  Args:
    directory: Directory path to load from.
  Returns:
    The loaded pytree with arrays represented as jax.ShapeDtypeStruct's.
  For checkpointing using remote URLs (e.g., gs, s3) you need `etils` module installed. You can install it using `pip install etils`.)rs   ru   r   ry   r   	read_textr   loadsrT   ri   r   r   r   r   r   deserialize_pytreedef)r   json_contentraw_treer   s       r)   r   r     s     Y'7+B+B HGH	B Y'/9DDF,ZZ%(}hu':':;<&			E77A6	JJr+   maskr   c               \   t        |       rt        j                  sJ d       t        |       }|j	                         sJ d| d       d }t        |       }t        ||      }t        ||t        j                        }|$d }t        j                  j                  |||      }t        j                  j                  ||      }t        j                  j                  ||      }	t        j                  j                  ||      }
t        d |
D              rt        d	      t        j                  j                  |t        j                        }t!        |	      D cg c]	  \  }}|| }}}|D cg c]  }|
|   	 }
}|D cg c]  }||   	 }}t"        j%                  t&        |t(        z  |||
      }|j+                         }t!        |	      D cg c]  \  }}|j-                  |d       }}}t        j                  j/                  ||      S c c}}w c c}w c c}w c c}}w )
a  Loads and reconstructs a data structure from a directory.

  This is a simple experimental array serialization API, for anything more
  complex and for all checkpointing prefer: https://github.com/google/orbax

  Args:
    directory: Directory path where the data is stored.
    shardings: Sharding strategy for array objects, either a Sharding or a
      ShapeDtypeStruct with a Sharding/Format.
    mask: boolean prefix tree for partial loading, will return None for False
      leaves.
    ts_specs: Optional tensorstore specs to use for deserialization. If None,
      defaults to using the default tensorstore specs.

  Returns:
    Reconstructed data.

  Example:
    >>> save(data, directory)
    >>> restored_data = load(directory, SingleDeviceSharding(jax.devices()[0]))
  r  zCheckpoint directory z does not existc                
    | d u S rF   r   rJ   s    r)   r   zload.<locals>.<lambda>  s
    a4i r+   r   Nc                L    | s!t         j                  j                  d |      S |S )Nc                     y rF   r   )r   s    r)   r   z(load.<locals>.<lambda>.<locals>.<lambda>  s    r+   )r   r   rT   )mrK   s     r)   r   zload.<locals>.<lambda>  s    q^Q ? a r+   c              3  <   K   | ]  }t        |t                y wrF   )rG   r   )rn   r   s     r)   rp   zload.<locals>.<genexpr>  s     G9Iv	&Gs   zDeserialization with `Format` instead of `Sharding` is not currently supported. Pass ShapeDtypeStruct(shape, dtype, sharding=format) instead.)rs   ru   r   ry   r   r   r   r   r   r   r   rT   r   r   rq   NotImplementedErrorr   r   r   r  r   r   getr   )r   r   r  r   r}   r   pytree_prefix_maskr   leaf_ids_flatshardings_flatr   r   r   r   arrs_futr   r   filled_valuess                      r)   r   r     s   0 Y'7+B+B HGH	B 
I	$	E/v_EE' )$&i0)Xv%,%E%EG(	OLXX\\,f5Fhh   9)((//&'/:-88??9g?>.GGG
	  ((//(*1*J*J " L- '0&> *
7(  *, */;<!N1%<.<-9:=#:-:$++D//}( 
	$1:=1IJA488At$J-J			I}	55*<: Ks   "H7H	H#H(c          	         t        j                  t        j                  t        | |||            }t
        j                  j                  d |       |_        |S )a  Nonblocking alias of save, return an awaitable future with a pytree stub.

  This is a simple experimental array serialization API, for anything more
  complex and for all checkpointing prefer: https://github.com/google/orbax

  Examples:
    >>> fut = nonblocking_save(data, directory)
    >>> print(fut.pytree)  # a pytree of jax.ShapeDtypeStruct's
    >>> print(fut.result())  # None, blocking until the serialization is done
  r   c                p    t        |       r*t        j                  | j                  | j                        S | S rF   )rL   r   rc   rO   r   rJ   s    r)   r   z"nonblocking_save.<locals>.<lambda>  s.    -a0 '*&:&:177AGG&L 67 r+   )	r   PyTreeFuturer   r   r   r   r   rT   r  )r   r   r   r   r   s        r)   r   r     sW     	299
D)y8 : E 	F# xx|| 89=?#*	*r+   c          	         t        j                  t        j                  t        | |||            }t        |       |_        |S )a  Nonblocking alias of load, return an awaitable future with a pytree stub.

  This is a simple experimental array serialization API, for anything more
  complex and for all checkpointing prefer: https://github.com/google/orbax

  Examples:
    >>> fut = nonblocking_load(directory)
    >>> print(fut.pytree)  # a pytree of jax.ShapeDtypeStruct
    >>> print(fut.result())  # the fully populated pytree
  r  )r   r  r   r   r   r   r  )r   r   r  r   r   s        r)   r   r     sE      	299
Iyth : @ 	A#i(#*	*r+   )return
str | None)r   str | PathLike[str]r!  r5   ) )rA   r"  rB   r&   r!  None)r!  r&   )re   r"  r!  z!str | None | jax.ShapeDtypeStruct)r   r#  )r   r#  r!  r   )r}   r   r!  r%  )r}   r#  r   r5   r   dict[str, Any]r   r5   r   r"  r!  r&  )
r}   r#  r   r5   r   r&  r   r5   r   r"  )
r   r   r   	list[Any]r   	list[int]r   zlist[Any | None]r   r5   )r   r5   )r   r   r   r&  r   r5   )
r   PyTreeTr   r#  r   r5   r   PyTreeT | Noner!  r%  )r   r#  r   r(  r   r'  r   r'  )r   r#  r!  r)  )
r   r#  r   r)  r  r*  r   r*  r!  r)  )
r   r)  r   r#  r   r5   r   r*  r!  utils.PyTreeFuture)
r   r#  r   r)  r  r*  r   r*  r!  r+  )O__doc__
__future__r   r   r   r\   typingr   uuidr   r   r   r   	threadingconcurrent.futuresr   r|   loggingr   jax._srcr	   jax._src.api_utilr
   jax._src.layoutr   jax.experimentalr   $jax.experimental.array_serializationr   r   ?jax.experimental.array_serialization.pytree_serialization_utilsexperimentalr   pytree_serialization_utilsr   r   ru   rd   r!   	getLoggerrV   loggerLockr   rr   r   r   r   r/   r   rP   r^   _MAX_CONCURRENCYr@   r)  __all__r*   r9   rD   rL   rX   ri   rs   ry   r~   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r+   r)   <module>r@     sQ   #  	 	      1   
   * " , L O O $ 			8	$%y~~' ) "
 $ . =  
3,7H0
A13%3IM",1?>'5KO!+.# )#5E#&*#:M$I)-I #6 D -9IJ   DJJ+9JEIJ< !T3 3 ,:3 FJ3 lE$E1:E:K* !%@6@60>@6@6H (, $7E,. -104)-9Kr+   