
    bi                      D   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Zd dlZd dlZd dlZd dlmZmZ ej$                  j&                  j)                  e      Zd Zdedee   dee   d	ed
eej4                     deeeef   fdZ G d dej:                        Zy)    N)Path)OptionalUnion)camelcase_to_snakecasefilenames_for_dataset_splitc                 H    t        |       j                         j                  S N)r   statst_mtime)cached_directory_paths    `/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/packaged_modules/cache/cache.py_get_modification_timer      s    &'--/888    dataset_nameconfig_name	cache_dirconfig_kwargscustom_featuresreturnc                    |s|s|r+t        j                  |xs d      j                  ||      }nd }t        j                  j                  t        |xs t         j                  j                              }| j                  d      }t        |d         |d<   dj                  |      }t        j                  j                  ||      }t        j                  t        j                  j                  ||xs ddd            D 	cg c]r  }	t        j                  j                  |	      rQ|sM|sKt        j                  t!        |	d      j#                  d	            d
   t!        |	      j$                  d   k(  r|	t }
}	|
st        j                  t        j                  j                  |ddd            D 	cg c]#  }	t        j                  j                  |	      r|	% }
}	t'        |
D 	ch c]  }	t!        |	      j$                  d    c}	      }t)        d|  |rd| dndz   |rd| z         dz         t!        t'        |
t*              d         }	|	j$                  dd  \  }}t        j                  t        j                  j                  |d||            D cg c]  }t        j                  j                  |      rg|sM|sKt        j                  t!        |d      j#                  d	            d
   t!        |      j$                  d   k(  rt!        |      j$                  d    }}|s8t-        |      dkD  r*t)        d|  ddj                  |       d|  d|d    d	      |	j$                  d   }d| d|	 dt/        j0                  t+        |	             d}t2        j5                  |       |||fS c c}	w c c}	w c c}	w c c}w )Ndefault)r   r   /___*zdataset_info.jsonzutf-8)encodingr   zCouldn't find cache for z for config '' z!
Available configs in the cache: )key   zThere are multiple 'z' configurations in the cache: z, zR
Please specify which configuration to reload from the cache, e.g.
	load_dataset('z', 'r   z')z/Found the latest cached dataset configuration 'z' at z (last modified on z).)datasetsBuilderConfigcreate_config_idospath
expanduserstrconfigHF_DATASETS_CACHEsplitr   joinglobisdirjsonloadsr   	read_textpartssorted
ValueErrorr   lentimectimeloggerwarning)r   r   r   r   r   	config_idnamespace_and_dataset_namecached_relative_path#cached_datasets_directory_path_rootr   cached_directory_pathsavailable_configsversionhash_cached_directory_pathother_configswarning_msgs                    r   _find_hash_in_cacherF      s    m**;+C)DUU' V 
	 	""3y'UHOO4U4U#VWI!-!3!3C!8%;<VWY<Z%[r" ::&@A*,'',,yBV*W' &*YYGGLL<i>N3PSUXY&
! 77==./zz$46IJTT^eTfghuv)*0045 	  " *.277<<@cehjmor3s)t"
%ww}}23 ""
 "

 #Pfg7LT'(..r2g
 &|n5/8yk+bBK\34E3FGf
 	
 cef
 	
 !(>DZ![\^!_`)//4MGT '+ii=`begnpt0u&v
"77==/0zz$57JKUU_fUghivw*+11"56 	#$**2.
M 
 ]+a/"<.0OPTPYPYZgPhOi!!-d=3C2DBH
 	

 (--b1K
9+eLaKb c!ZZ(>?T(UVWWY	[  NN;%%k"
 h
s   A7N:8(N?+O BO	c                   r    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddee   dee   dee   dee   dee   dee   deej                     deej                     d	eee	ef      d
ee   deeee
eej                  j                  f      dee   dee   dee   f fdZdej                  fdZddee   fdZd Zd Z xZS )Cacher   r   r   rA   rB   	base_pathinfofeaturestokenrepo_id
data_filesdata_dirstorage_optionswriter_batch_sizec                     |
|t        d      |||d<   |||d<   |dk(  r|dk(  rt        |
xs |||||      \  }}}n|dk(  s|dk(  rt        d      t        |   ||||||||	|
||       y )NzArepo_id or dataset_name is required for the Cache dataset builderrN   rO   auto)r   r   r   r   r   z0Pass both hash='auto' and version='auto' instead)r   r   r   rA   rB   rI   rJ   rL   rM   rP   rQ   )r5   rF   NotImplementedErrorsuper__init__)selfr   r   r   rA   rB   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   r   	__class__s                   r   rV   zCache.__init__c   s    $ ?|3`aa!*4M,'(0M*%6>g/)<$4'#+ (*&K$ V^w&0%&XYY%#+/ 	 	
r   r   c                 *    t        j                         S r	   )r#   DatasetInfo)rW   s    r   _infozCache._info   s    ##%%r   
output_dirc                    t         j                  j                  | j                        s%t	        d| j
                   d| j                         |1|| j                  k7  r!t        j                  | j                  |       y y y )NzCache directory for z doesn't exist at )r&   r'   existsr   r5   r   shutilcopytree)rW   r\   argskwargss       r   download_and_preparezCache.download_and_prepare   sk    ww~~dnn-3D4E4E3FFXY]YgYgXhijj!jDNN&BOODNNJ7 'C!r   c                    t        | j                  j                  t        j                        r.t        | j                  j                  j                               }n%t        d| j                   d| j                         |D cg c][  }t        j                  |j                  dt        | j                  | j                  |j                  d|j                        i      ] c}S c c}w )NzMissing splits info for z in cache directory filesarrow)r   r,   filetype_suffixshard_lengths)name
gen_kwargs)
isinstancerJ   splitsr#   	SplitDictlistvaluesr5   r   r   SplitGeneratorri   r   rh   )rW   
dl_managersplit_infos
split_infos       r   _split_generatorszCache._split_generators   s    dii&&(:(:;489I9I9P9P9R4SK78I8I7JJ^_c_m_m^nopp *
  ##__8%)%6%6(oo(/&0&>&>
 	
 
s   A C)c              #     K   t        |      D ]w  \  }}t        |d      5 }	 t        t        j                  j	                  |            D ]0  \  }}t        j
                  j                  |g      }| d| |f 2 	 d d d        y y # t        $ r-}t        j                  d| dt        |       d|         d }~ww xY w# 1 sw Y   xY ww)Nrb_zFailed to read file 'z' with error z: )	enumerateopenpaipcopen_streamTablefrom_batchesr5   r9   errortype)	rW   re   file_idxfilef	batch_idxrecord_batchpa_tablees	            r   _generate_tableszCache._generate_tables   s     '. 	NHddD! 
Q	3<RVV=O=OPQ=R3S B/	<#%88#8#8,#H "*
!I;7AAB
 
	 " LL#8mDQRG9TVWXVY!Z[
 
s:   CCAB	>C		B?(B::B??CC	C)NNNz0.0.0NNNNNNNNNNr	   )__name__
__module____qualname__r   r)   r#   rZ   Featuresr   boolrn   dictrN   DataFilesDictintrV   r[   rc   rt   r   __classcell__)rX   s   @r   rH   rH   b   sV    $(&*%)!("#'/304,0!%Z^"&*.+/.
C=.
 sm.
 c]	.

 #.
 sm.
 C=.
 x++,.
 8,,-.
 dCi().
 #.
 U3dH4G4G4U4U#UVW.
 3-.
 "$.
 $C=.
`&x++ &8x} 8
,r   rH   )r.   r0   r&   r_   r7   pathlibr   typingr   r   pyarrowrz   r#   datasets.configdatasets.data_filesdatasets.namingr   r   utilslogging
get_loggerr   r9   r   r)   r   r   tuplerF   ArrowBasedBuilderrH    r   r   <module>r      s      	    "     O 
			*	*8	49G&G&#G& }G& 	G&
 h//0G& 3S=G&T]H&& ]r   