
    biD                        d Z ddlZddlZddlmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ  G d d	ej                        Zd
 Zd Zd Zd Zej$                   e
j&                  d      k  rd Zn
 e       rd Z ee      d        Zd Zd Zd Zd Zd Zd Zej$                   e
j&                  d      k  r ee      d        Zy e       r ee      d        Zyy)zPExtends `dill` to support pickling more types and produce more consistent dumps.    N)BytesIO)CodeTypeFunctionType)version   )configc                        e Zd Zej                  j                  ej                  j                  j                               ZdZ	ddZ
 fdZd Z xZS )PicklerFc                    t        |      }|| j                  vrdt        j                  v r'dd l}||j
                  u r t        |      t               dt        j                  v r/dd l}t        ||j                        r t        |      t               dt        j                  v r'dd l}||j                  u r t        |      t               dt        j                  v rdd l}t        ||j                         r t        |      t"               ||j$                  u r t        |      t&               t        ||j(                  j*                        rt-        |d|      }dt        j                  v r/dd l}t        ||j0                        r t        |      t2               |t4        u rt-        |d|      }t6        j8                  j;                  | ||	       y )
Nregexr   spacytiktokentorch	_orig_modtransformers_torchdynamo_orig_callable)save_persistent_id)typedispatchsysmodulesr   Patternpklregister_save_regexPatternr   
issubclassLanguage_save_spacyLanguager   Encoding_save_tiktokenEncodingr   Tensor_save_torchTensor	Generator_save_torchGeneratornnModulegetattrr   PreTrainedTokenizerBase)_save_transformersPreTrainedTokenizerBaser   dillr
   save)	selfobjr   obj_typer   r   r   r   r   s	            O/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/utils/_dill.pyr*   zPickler.save   si   94==(#++%u}},)K)*<=#++%h7)K)*=>S[[(x000)K)*@A#++%h5)K)*;<u.)K)*>? h8!#{C8C,#h(L(LM)K)*ST |##;SAC$8JK    c                     | j                   rt        | 	  |      S 	 t        |      }t        j                  j                  | |       y # t        $ r ddlm t        |fd      }Y Bw xY w)Nr   Hasherc                 ,    j                  | d         S )Nr   )hash)xr2   s    r.   <lambda>z)Pickler._batch_setitems.<locals>.<lambda>R   s    AaD0A r/   key)	_legacy_no_dict_keys_sortingsuper_batch_setitemssorted	Exceptiondatasets.fingerprintr2   r)   r
   )r+   itemsr2   	__class__s     @r.   r;   zPickler._batch_setitemsH   sd    ,,7*511	C5ME
 	$$T51	  	C35&ABE	Cs   A A-,A-c                 h    t        |      t        ur!t        j                  j	                  | |       y y N)r   strr)   r
   memoize)r+   r,   s     r.   rD   zPickler.memoizeU   s'    9CLL  s+  r/   )T)__name__
__module____qualname__r)   _dillMetaCatchingDictr
   r   copyr9   r*   r;   rD   __classcell__)r@   s   @r.   r
   r
      sA    zz**4<<+@+@+E+E+GHH#( 'LR2,r/   r
   c                       fd}|S )z'Register a custom reducer for the type.c                 .    | t         j                  <   | S rB   )r
   r   )functs    r.   proxyzpklregister.<locals>.proxy^   s    "r/    )rO   rP   s   ` r.   r   r   [   s     Lr/   c                  l   t         j                  j                  dd t        j                  d      j                  t        j                  d      j                  t        j                  d      j                  t        j                  d      j                  t        j                  d      j                  fv S )z<Check if the current dill version is in the supported range.N   0.3.6z0.3.7z0.3.8z0.3.9z0.4.0)r   DILL_VERSIONreleaser   parserQ   r/   r.   _is_supported_dill_versionrX   e   s    &&r*g&&g&&g&&g&&g&&/  r/   c                 <    t        |d      j                  |        y)zPickle an object to a file.T)recurseN)r
   dumpr,   files     r.   r[   r[   p   s    D$$$S)r/   c                 N    t               }t        | |       |j                         S )zPickle an object to a string.)r   r[   getvaluer\   s     r.   dumpsr`   u   s    9DdO==?r/   rT   c                 V    t         j                  j                  j                  |       y rB   )r)   rH   loginfopicklermsgs     r.   rb   rb   ~   s    

C r/   c                 X    t         j                  j                  j                  | |       y rB   )r)   rH   loggertracerd   s     r.   rb   rb      s    

-r/   c                     t        | d|        	 t        |      f}| j                  t        ||       t        | d       y # t        $ r! ddlm} t        ||j
                        f}Y Nw xY w)NzSe: r   r1   r7   r,   z# Se)rb   r<   r=   r>   r2   r4   save_reduceset)re   r,   argsr2   s       r.   	_save_setro      sj    4u/s~ Ts+  //s,./s   A 'A,+A,c                     dd l }t        | d|        |j                  |j                  f}| j	                  |j
                  ||       t        | d       y )Nr   zRe: rk   z# Re)r   rb   patternflagsrl   compile)re   r,   r   rn   s       r.   r   r      sJ    4uKK#Dt5r/   c                     dd l }t        | d|        |j                  |j                  |j                  |j
                  f}| j                  |j                  ||       t        | d       y )Nr   zEnc: rk   z# Enc)r   rb   name_pat_str_mergeable_ranks_special_tokensrl   r   )re   r,   r   rn   s       r.   r   r      s\    5HHcllC$8$8#:M:MND))4S9r/   c                    dd l dfd	}t        | d|        |j                  j                  k(  rR|j	                         j                  j                        j                         j                         j                  f}n-|j	                         j                         j                         f}| j                  |||       t        | d       y )Nr   c                 P    j                  |       }|r|j                  |      }|S rB   )
from_numpyr   )np_arraydtypetensorr   s      r.   create_torchTensorz-_save_torchTensor.<locals>.create_torchTensor   s)    !!(+[['Fr/   zTo: rk   z# TorB   )
r   rb   r}   bfloat16detachtofloatcpunumpyrl   )re   r,   r   rn   r   s       @r.   r!   r!      s     4u
yyENN"

,00288:ENNK

  "((*,*Dc:r/   c                     dd l fd}t        | d|        |j                         f}| j                  |||       t        | d       y )Nr   c                 J    j                         }|j                  |        |S rB   )r"   	set_state)state	generatorr   s     r.   create_torchGeneratorz3_save_torchGenerator.<locals>.create_torchGenerator   s#    OO%	E"r/   zGe: rk   z# Ge)r   rb   	get_staterl   )re   r,   r   rn   r   s       @r.   r#   r#      sK    
 4uMMOD-t=r/   c                     dd l fd}t        | d|        |j                  |j                         f}| j	                  |||       t        | d       y )Nr   c                     j                   j                  | d   d         }|j                  |       }|j                  |      S )Nnlplang)utilget_lang_classfrom_config
from_bytes)r   byteslang_cls	lang_instr   s       r.   create_spacyLanguagez1_save_spacyLanguage.<locals>.create_spacyLanguage   sA    ::,,VE]6-BC((0	##E**r/   zSp: rk   z# Sp)r   rb   r   to_bytesrl   )re   r,   r   rn   r   s       @r.   r   r      sQ    +
 4uJJ'D,d<r/   c                     t        | d|        |j                  }d|v rt        |d   t              ri |d<   | j	                  t        |      d||       t        | d       y )NzTok: cacherQ   )r   r,   z# Tok)rb   __dict__
isinstancedictrl   r   )re   r,   r   s      r.   r(   r(      s^    5LLE%JuW~t<gS	2U<r/   c                    t         j                  j                  j                  d|        |j                  j                  d      st        |j                  j                  t        j                  j                              dkD  rE|j                  j                  t        j                  j                        d   j                  d      s|j                  dk(  rdn(t        j                  j                  |j                        }d}t         j                  j                  r_t        |d      r|j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  |t2        j4                  d	k\  r|j6                  n|j8                  |j:                  |j<                  f}n|j                  |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j8                  |j:                  |j<                  f}n|j                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j8                  |j:                  |j<                  f}| j?                  t@        ||
       t         j                  j                  j                  d       y)z
        From dill._dill.save_code
        This is a modified version that removes the origin (filename + line no.)
        of functions created in notebooks or shells for example.
        zCo: <   
ipykernel_<lambda> co_posonlyargcount)rS   
   rk   # CoN)!r)   rH   rb   rc   co_filename
startswithlensplitospathsepco_namebasenamePY3hasattrco_argcountr   co_kwonlyargcount
co_nlocalsco_stacksizeco_flagsco_code	co_constsco_namesco_varnamesr   version_infoco_linetable	co_lnotabco_freevarsco_cellvarsrl   r   re   r,   r   co_firstlinenorn   s        r.   
_save_coder      sZ    	

d3%L)& ))#.COO))"''++67!;OO))"''++6r:EElS{{j(  !!#//2 	 ::>>s01OO**))NN$$LLKKMMLLOOKK"(+(8(8G(CC$$OOOO!( OO))NN$$LLKKMMLLOOKK"MMOOOO&   D  	Hd4

F#r/   c                 
   t         j                  j                  j                  | d|       |j                  j                  d      st        |j                  j                  t        j                  j                              dkD  rE|j                  j                  t        j                  j                        d   j                  d      s|j                  dk(  rdn(t        j                  j                  |j                        }d}t        |d      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  |j2                  ||j                  |j4                  |j6                  |j8                  |j:                  |j<                  f}nt        |d	      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  |j2                  ||j                  |j8                  |j:                  |j<                  f}nt        |d
      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j                  |j:                  |j<                  f}n>t        |d      r|j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j>                  |j:                  |j<                  f}n|j                  |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j>                  |j:                  |j<                  f}| jA                  t         j                  jB                  ||       t         j                  j                  j                  | d       y )NzCo: %sr   r   r   r   r   r   co_endlinetableco_exceptiontabler   r   rk   r   )"r)   rH   rh   ri   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   co_qualnamer   co_columntabler   r   r   r   rl   _create_coder   s        r.   	save_coder   D  s   

37, ))#.COO))"''++67!;OO))"''++6r:EElS{{j(  !!#//2 	  3)*  &&%%    ##""%%+D. S-.  &&%%    %%'D* S.)  &&%%    #D& S./&&%%  !D( %%  D$ 	DJJ33TsC

0r/   )__doc__r   r   ior   typesr   r   r)   	packagingr   r   r   r
   r   rX   r[   r`   rU   rW   rb   rm   ro   r   r   r!   r#   r   r(   r   r   rQ   r/   r.   <module>r      s   W 	 
  (   =,dll =,@*
 
w//!  !. S & 
w//\ \|  !P P "r/   