
    bi                         d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z	 ej                  j                  j                  e      Ze G d dej                               Z G d dej"                        Zy)	    N)	dataclass)Optional)
table_castc                   L     e Zd ZU dZdZeej                     ed<    fdZ	 xZ
S )ArrowConfigzBuilderConfig for Arrow.Nfeaturesc                 "    t         |           y N)super__post_init__)self	__class__s    `/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/packaged_modules/arrow/arrow.pyr   zArrowConfig.__post_init__   s        )__name__
__module____qualname____doc__r   r   datasetsFeatures__annotations__r   __classcell__)r   s   @r   r   r      s%    ",0Hhx(()0   r   r   c                   Z    e Zd ZeZd Zd Zdej                  dej                  fdZ	d Z
y)Arrowc                 V    t        j                  | j                  j                        S )N)r   )r   DatasetInfoconfigr   )r   s    r   _infozArrow._info   s    ##T[[-A-ABBr   c                    | j                   j                  s"t        d| j                   j                         d|j                  _        |j                  | j                   j                        }g }|j                         D ]  \  }}t        |t              r|g}|D cg c]  }|j                  |       }}| j                  j                  t        j                  j                  |      D ]p  }t        |d      5 }	 t         j"                  j%                  |      }ddd       t.        j0                  j3                  j4                        | j                  _         n |j7                  t/        j8                  |d|i             
 |S c c}w # t&        t         j(                  j*                  f$ r" t         j"                  j-                  |      }Y w xY w# 1 sw Y   xY w)z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=TNrbfiles)name
gen_kwargs)r   
data_files
ValueErrordownload_configextract_on_the_flydownload_and_extractitems
isinstancestr
iter_filesinfor   	itertoolschainfrom_iterableopenpaipcopen_streamOSErrorlibArrowInvalid	open_filer   r   from_arrow_schemaschemaappendSplitGenerator)	r   
dl_managerr$   splits
split_namer!   filefreaders	            r   _split_generatorszArrow._split_generators   s   {{%%\]a]h]h]s]s\tuvv8<
""544T[[5K5KL
!+!1!1!3 	aJ%%=BCTZ**40CECyy!!)%OO99%@ DdD) 9Q9%'VV%7%7%:F9
 *2):):)L)LV]])[DII& MM(11zwX]N^_`	a   D !()<)<= 9%'VV%5%5a%8F99 9s1   F=G?FAG	GG	GG$pa_tablereturnc                     | j                   j                  *t        || j                   j                  j                        }|S r
   )r-   r   r   arrow_schema)r   rD   s     r   _cast_tablezArrow._cast_table7   s5    99) "(DII,>,>,K,KLHr   c              #     
K   t        t        j                  j                  |            D ]  \  }}t	        |d      5 }	 	 t
        j                  j                  |      }t        |      D ]?  \  }}t
        j                  j                  |g      }| d| | j!                  |      f A 	 d d d         y # t        t
        j                  j                  f$ r@ t
        j                  j                  |      

fdt        
j                        D        }Y w xY w# t"        $ r-}	t$        j'                  d| dt)        |	       d|	         d }	~	ww xY w# 1 sw Y   6xY ww)Nr    c              3   @   K   | ]  }j                  |        y wr
   )	get_batch).0irB   s     r   	<genexpr>z)Arrow._generate_tables.<locals>.<genexpr>F   s     "a16#3#3A#6"as   _zFailed to read file 'z' with error z: )	enumerater.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   rangenum_record_batchesTablefrom_batchesrH   r%   loggererrortype)r   r!   file_idxr@   rA   batches	batch_idxrecord_batchrD   erB   s             @r   _generate_tableszArrow._generate_tables>   sO    '	(E(Ee(LM 	NHddD! Qb"$&&"4"4Q"7 4=W3E T/	<#%88#8#8,#H "*
!I;79I9I(9SSST 	
 $RVV%8%89 b!#!1!1!!4"afF_F_@`"ab " LL#8mDQRG9TVWXVY!Z[ sZ   ;E%EB9 AD.E%9A DDDD	E((EEEE"	E%N)r   r   r   r   BUILDER_CONFIG_CLASSr   rC   r2   rS   rH   r]    r   r   r   r      s3    &C2BHH  r   r   )r.   dataclassesr   typingr   pyarrowr2   r   datasets.tabler   utilslogging
get_loggerr   rU   BuilderConfigr   ArrowBasedBuilderr   r_   r   r   <module>ri      sf     !    % 
			*	*8	4  (((    7H&& 7r   