
    bi$              	          d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ  ee      Z G d dej8                        Zdedeee   ef   fdZ  G d de!ee!ee	f   f         Z"i dg dg dg dg dg dg dg dg dg dg dg dg d g d!g d"g d#g d$g i d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2g d3g d4g d5g g g g d6Z#y)7    N)Counter)groupby)
itemgetter)AnyClassVarOptional)DatasetCardData   )METADATA_CONFIGS_FIELD)Features)DatasetInfoDatasetInfosDict)	_split_re)
get_loggerc                   &     e Zd Zd Zd fd	Z xZS )_NoDuplicateSafeLoaderc                 4   |j                   D cg c]  \  }}| j                  |    }}}|D cg c]  }t        |t              rt	        |      n|! }}t        |      }|D cg c]  }||   dkD  s| }}|rt        d|       y c c}}w c c}w c c}w )N   zGot duplicate yaml keys: )valueconstructed_objects
isinstancelisttupler   	TypeError)selfnodekey_node_keyskeycounterduplicate_keyss           R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/utils/metadata.py(_check_no_duplicates_on_constructed_nodez?_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node   s    FJjjQ{x((2QQHLMjd3c
<MM$-)0E#GCL14D#EE77GHII 	 RMEs   B
$B%B3Bc                 L    t         |   ||      }| j                  |       |S )N)deep)superconstruct_mappingr$   )r   r   r&   mapping	__class__s       r#   r(   z(_NoDuplicateSafeLoader.construct_mapping   s*    '+Dt+<55d;    )F)__name__
__module____qualname__r$   r(   __classcell__)r*   s   @r#   r   r      s    J r+   r   readme_contentreturnc                    t        | j                               }|rS|d   dk(  rKd|dd  v rD|dd  j                  d      dz   }dj                  |d|       }|dj                  ||dz   d        fS d dj                  |      fS )Nr   z---r   
)r   
splitlinesindexjoin)r0   full_contentsep_idx	yamlblocks       r#   _split_yaml_from_readmer:   $   s    1134LQ50Ul12>N5Nqr"((/!3IIl1W56	$))L1$?@@@<(((r+   c            	           e Zd ZU dZeZee   ed<   e	de
fd       Zededee
eef      dedd fd	       Zed
edd fd       Zd
eddfdZdee   fdZy)MetadataConfigsz5Should be in format {config_name: {**config_params}}.
FIELD_NAMEmetadata_configc                    | j                  d      }|t        j                  d| d      }t        |t        t
        f      st        |      t        |t              r|D ]  }t        |t
        t        f      rft        |t              s*t        |      dk(  rGd|v rCt        j                  t        |d         r&t        |j                  d      t
        t        f      rt        |       y y y )N
data_filesz
                Expected data_files in YAML to be either a string or a list of strings
                or a list of dicts with two keys: 'split' and 'path', but got a  
                Examples of data_files in YAML:

                   data_files: data.csv

                   data_files: data/*.png

                   data_files:
                    - part0/*
                    - part1/*

                   data_files:
                    - split: train
                      path: train/*
                    - split: test
                      path: test/*

                   data_files:
                    - split: train
                      path:
                      - train/part1/*
                      - train/part2/*
                    - split: test
                      path: test/*

                PS: some symbols like dashes '-' are not allowed in split names
                r
   splitpath)gettextwrapdedentr   r   str
ValueErrordictlenrematchr   )r>   yaml_data_filesyaml_error_messageyaml_data_files_items       r#   $_raise_if_data_files_field_not_validz4MetadataConfigs._raise_if_data_files_field_not_valid3   s    )--l;&!)OO^N_ `"> oc{; !344/40,; =(&';c4[I%&:DA 45: '+? ? "4H4Q R *+?+C+CF+KcSW[ Y ));<<= 1E 'r+   parquet_commit_hashexported_parquet_filesdataset_infosr1   c                 r   t        |t        d            D ci c]  \  }}|t        |t        d            D cg c]+  \  }}||D cg c]  }|d   j                  d|       c}d- c}}}t        |j	                  |t                     j                  xs d      d }	}}}}}|r_|j                         D 
ci c]@  \  }}
||
j                  D cg c]  }|	|   d   D ]  }|d   |k(  r|  c}}|	|   d	   dB }	}}
}} | |	      S c c}w c c}}}w c c}}}}}w c c}}w c c}}}
}w )
NconfigrA   urlzrefs%2Fconvert%2Fparquet)rA   rB   z0.0.0)r@   versionr@   rV   )	r   r   replacerF   rC   r   rV   itemssplits)clsrP   rQ   rR   config_nameparquet_files_for_config
split_nameparquet_files_for_splitparquet_filemetadata_configsdataset_info	data_files               r#   ._from_exported_parquet_files_and_dataset_infosz>MetadataConfigs._from_exported_parquet_files_and_dataset_infosf   s   * :AAWYcdlYm9n
 
 65  @GG_aklsat?u	 	 <
$; ", 1H! , )/778RTgh!	 }00kmLTT_X_` 
 
   2?1D1D1F    .K  +7*=*=#&)9+)F|)T# &$W-; "#!#  0<YG     #$$3!	
(# s@   "D"DD 'D.9D"D1
D+:D1
DD"+D1
dataset_card_datac                    |j                  | j                        r|| j                     }t        |t              st	        d| j                   d| d      |D ]&  }d|vrt	        d| d      | j                  |       (  | |D ci c]b  }|j                         x}rN|j                  d      |j                         D ci c]"  \  }}||dk7  r|nt        j                  |      $ c}}d c}}}      S  |        S c c}}w c c}}}w )Nz	Expected z to be a list, but got ''r[   zUEach config must include `config_name` field with a string name of a config, but got z. features)rC   r=   r   r   rG   rO   copypoprX   r   _from_yaml_list)rZ   rd   r`   r>   paramr   rT   s          r#   from_dataset_card_dataz&MetadataConfigs.from_dataset_card_data   s6     00@.5 9S^^,<<TUeTffg!hii#3 J 7$##2"327  88IJ  ,< 
 ("1"6"6"888 JJ}-,2LLN0(E5 (;uAYAYZ_A``0 	 	 u0s   
:D'D+DDNc                    | r| j                         D ]  }| j                  |        | j                  |      }t        t	        i || j                                     }|j                         D ]  \  }}|j                  dd         |j                         D cg c]  \  }}d|i| c}}|| j                  <   y y c c}}w )Nr[   )valuesrO   rl   rH   sortedrX   ri   r=   )r   rd   r>   current_metadata_configstotal_metadata_configsr[   config_metadatas          r#   to_dataset_card_dataz$MetadataConfigs.to_dataset_card_data   s    #';;= K99/JK'+'B'BCT'U$%)&1U4L1UPT1U1[1[1]*^%_"0F0L0L0N 9,_##M489 5K4P4P4R20K ??2doo. 2s   C c                     d }| j                         D ]@  \  }}t        |       dk(  s|dk(  s|j                  d      s+||}0t        d| d| d       |S )Nr   defaultz&Dataset has several default configs: 'z' and 'z'.)rX   rI   rC   rG   )r   default_config_namer[   r>   s       r#   get_default_config_namez'MetadataConfigs.get_default_config_name   s{    ",0JJL 	(K4yA~	!9_=P=PQZ=[&.*5'$@AT@UU\]h\iikl 	 #"r+   )r,   r-   r.   __doc__r   r=   r   rF   __annotations__staticmethodrH   rO   classmethodr   r   r   rc   r	   rl   rs   r   rw    r+   r#   r<   r<   .   s    ? 6J60=d 0= 0=d $% $% !%T#s(^ 4$% (	$%
 
$% $%L  K\  0o $ 
## 
#r+   r<   zimage-classificationtranslationzimage-segmentationz	fill-maskzautomatic-speech-recognitionztoken-classificationzsentence-similarityzaudio-classificationzquestion-answeringsummarizationzzero-shot-classificationztable-to-textzfeature-extractionotherzmultiple-choiceztext-classificationztext-to-imageztext2text-generationzzero-shot-image-classificationztabular-classificationztabular-regressionzimage-to-imageztabular-to-textzunconditional-image-generationztext-retrievalztext-to-speechzobject-detectionzaudio-to-audioztext-generationconversationalztable-question-answeringzvisual-question-answeringzimage-to-textzreinforcement-learning)zvoice-activity-detectionztime-series-forecastingzdocument-question-answering)$rJ   rD   collectionsr   	itertoolsr   operatorr   typingr   r   r   yamlhuggingface_hubr	   rT   r   rg   r   infor   r   namingr   utils.loggingr   r,   logger
SafeLoaderr   rF   r   r:   rH   r<   known_task_idsr|   r+   r#   <module>r      s@   	     * *  + +  0  & 
H	T__ )C )E(3-:L4M )O#d3S#X./ O#j&B&2& "& 	&
 #B& B& 2& B& "& R& & R& "& R& r&  2!&" R#&$ B%&& %b'&( b)&* "+&, b-&. r/&0 %b1&2 b3&4 b5&6 7&8 b9&: r;&< b=&> ?&@  A&B RC&D bE&F !#!#%K&r+   