
    bi=                     0   d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ  ee      Z G d de      Z	 	 	 	 	 ddedeeeeef      dee	   deeeef      deeeef      deeeef      fdZ 	 	 	 	 ddedeeeef      dee	   deeeef      deeeeef      f
dZ!	 	 	 	 ddedeeeef      dee	   deeeef      deeeeef      dee   fdZ"	 	 	 	 	 	 ddedee   deeeee   eeeeee   f   f   f      dee	   deeeef      deeeef      deeeef      defdZ#	 	 	 	 	 	 ddedee   deeeee   eeeeee   f   f   f      dee	   deeeef      deeeef      deeeef      fdZ$y)zList and inspect datasets.    N)MappingSequence)OptionalUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classload_dataset_builder)
get_logger)Versionc                       e Zd Zy)SplitsNotFoundErrorN)__name__
__module____qualname__     K/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/inspect.pyr   r   &   s    r   r   path
data_filesdownload_configdownload_moderevisiontokenc                 r    t        | |||||      }|D ci c]  }|t        d| ||||||d| c}S c c}w )a6  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('cornell-movie-review-data/rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    )r   r   r   r   r   r   )r   config_namer   r   r   r   r   r   )get_dataset_config_namesget_dataset_config_info)	r   r   r   r   r   r   config_kwargsconfig_namesr    s	            r   get_dataset_infosr%   *   sv    X ,'#L& (  	, 	
#!+'	
 	
 		
  s   4c                    t        | f||||d|}t        |t        j                  j	                  |             }t        |j                  j                               xs+ |j                  j                  d|j                  xs d      gS )aG  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("nyu-mll/glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    r   r   r   r   dataset_namer    default)r   r   osr   basenamelistbuilder_configskeysbuilder_kwargsgetDEFAULT_CONFIG_NAME)r   r   r   r   r   download_kwargsdataset_modulebuilder_clss           r   r!   r!   m   s    h ,'# N ,NIYIYZ^I_`K++0023 %%))-9X9X9e\ef8 r   returnc                    t        | f||||d|}t        |t        j                  j	                  |             }t        |j                  j                               }|rt        |      dk(  r|d   nd}	nd}	|j                  xs |	S )aW  Get the default config name for a particular dataset.
    Can return None only if the dataset has multiple configurations and no default configuration.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Returns:
        Optional[str]: the default config name if there is one

    Example:

    ```py
    >>> from datasets import get_dataset_default_config_name
    >>> get_dataset_default_config_name("openbookqa")
    'main'
    ```
    r'   r(   r   r   Nr*   )
r   r   r+   r   r,   r-   r.   r/   lenr2   )
r   r   r   r   r   r3   r4   r5   r.   default_config_names
             r   get_dataset_default_config_namer:      s    Z ,'# N ,NIYIYZ^I_`K;66;;=>O474HA4Moa0SW'**A.AAr   r    c           
         t        | f||||||d|}|j                  }	|	j                  |r|j                         n	t	               }|||_        |j                  t        |j                  |             	 |j                  t        |j                  |            D 
ci c]  }
|
j                  |
j                  | d c}
|	_        |	S |	S c c}
w # t        $ r}t        d      |d}~ww xY w)a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    )namer   r   r   r   r   N)	base_pathr   )r<   r)   z<The split names could not be parsed from the dataset config.)r   infosplitscopyr   r   _check_manual_downloadr
   r=   _split_generatorsr<   	Exceptionr   )r   r    r   r   r   r   r   r#   builderr>   split_generatorerrs               r   r"   r"      s   > #	'#	 	G <<D{{4C/..0IY$)O!&&$w/@/@Rab	
	o (/'@'@,w7H7HZij(#  $$/C/CUY&ZZDK K4K  	o%&deknn	os*   8(C  !CC C 	C,C''C,c           
      n    t        | f||||||d|}t        |j                  j                               S )ae  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('cornell-movie-review-data/rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    )r    r   r   r   r   r   )r"   r-   r?   r/   )	r   r    r   r   r   r   r   r#   r>   s	            r   get_dataset_split_namesrH   *  sM    Z #	'#	 	D   "##r   )NNNNN)NNNN)NNNNNN)%__doc__r+   collections.abcr   r   typingr   r   download.download_configr   download.download_managerr	   #download.streaming_download_managerr
   r>   r   loadr   r   r   utils.loggingr   utils.versionr   r   logger
ValueErrorr   strdictr-   boolr%   r!   r:   r"   rH   r   r   r   <module>rW      sa    ! 	 - " 4 3 I  
 & " 
H		* 	 48048<.2(,@
@tT3/0@ n-@ E,"345	@
 uS'\*+@ E$)$%@J /3048<37?
?uS'\*+? n-? E,"345	?
 tT3/0?H /3048<37;B
;BuS'\*+;B n-;B E,"345	;B
 tT3/0;B c];B@ "&_c048<.2(,:
:#: sHSM73c8TW=FX@Y;Y3ZZ[\: n-	:
 E,"345: uS'\*+: E$)$%: :~ "&_c048<.2(,7$
7$#7$ sHSM73c8TW=FX@Y;Y3ZZ[\7$ n-	7$
 E,"3457$ uS'\*+7$ E$)$%7$r   