
    bi                         d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ ej                  j                  j                  e      Ze G d dej"                               Z G d d	ej&                        Zy)
    N)	dataclass)Optionalrequire_storage_cast)
table_castc                   \    e Zd ZU dZdZeej                     ed<   dZ	e
ed<   dZee
   ed<   y)	XmlConfigzBuilderConfig for xml files.Nfeatureszutf-8encodingencoding_errors)__name__
__module____qualname____doc__r
   r   datasetsFeatures__annotations__r   strr        \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/packaged_modules/xml/xml.pyr	   r	      s2    &,0Hhx(()0Hc%)OXc])r   r	   c                   Z    e Zd ZeZd Zd Zdej                  dej                  fdZ	d Z
y)Xmlc                 V    t        j                  | j                  j                        S )N)r
   )r   DatasetInfoconfigr
   )selfs    r   _infoz	Xml._info   s    ##T[[-A-ABBr   c                    | j                   j                  s"t        d| j                   j                         d|j                  _        |j                  | j                   j                        }g }|j                         D ]^  \  }}t        |t              r|g}|D cg c]  }|j                  |       }}|j                  t        j                  |d|i             ` |S c c}w )a  The `data_files` kwarg in load_dataset() can be a str, List[str], Dict[str,str], or Dict[str,List[str]].

        If str or List[str], then the dataset returns only the 'train' split.
        If dict, then keys should be from the `datasets.Split` enum.
        z=At least one data file must be specified, but got data_files=Tfiles)name
gen_kwargs)r   
data_files
ValueErrordownload_configextract_on_the_flydownload_and_extractitems
isinstancer   
iter_filesappendr   SplitGenerator)r   
dl_managerr#   splits
split_namer    files          r   _split_generatorszXml._split_generators   s     {{%%\]a]h]h]s]s\tuvv8<
""544T[[5K5KL
!+!1!1!3 	aJ%%=BCTZ**40CECMM(11zwX]N^_`		a
  Ds   C$pa_tablereturnc                    | j                   j                  u| j                   j                  j                  }t        d | j                   j                  j	                         D              r|j                  |      }|S t        ||      }|S |j                  t        j                  dt        j                         i            S )Nc              3   4   K   | ]  }t        |         y w)Nr   ).0features     r   	<genexpr>z"Xml._cast_table.<locals>.<genexpr>3   s     b+G44bs   xml)
r   r
   arrow_schemaallvaluescastr   paschemastring)r   r2   r?   s      r   _cast_tablezXml._cast_table0   s    ;;+[[))66FbDKKDXDXD_D_Dabb#==0 O &h7O==E299;+?!@AAr   c              #   (  K   | j                   j                  t        | j                   j                        ndg}t        t        j
                  j                  |            D ]  \  }}t        || j                   j                  | j                   j                        5 }|j                         }t        j                  j                  t        j                  |g      g|      }|| j                  |      f d d d         y # 1 sw Y   xY ww)Nr9   )r   errors)names)r   r
   list	enumerate	itertoolschainfrom_iterableopenr   r   readr>   Tablefrom_arraysarrayrA   )r   r    pa_table_namesfile_idxr0   fr9   r2   s           r   _generate_tableszXml._generate_tables=   s     7;{{7K7K7Wdkk223^c]d'	(E(Ee(LM 	;NHddT[[%9%9$++B]B]^ ;bcffh88//3%0A/X 0 0 :::; ;	;; ;s   BDAD;DD	DN)r   r   r   r	   BUILDER_CONFIG_CLASSr   r1   r>   rL   rA   rR   r   r   r   r   r      s6    $C$BBHH B B;r   r   )rG   dataclassesr   typingr   pyarrowr>   r   datasets.features.featuresr   datasets.tabler   utilslogging
get_loggerr   loggerBuilderConfigr	   ArrowBasedBuilderr   r   r   r   <module>r_      si     !    ; % 
			*	*8	4 *&& * *,;(
$
$ ,;r   