
    bi                         d dl Z d dlZd dlmZmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ d	d
lmZ  G d de      Z G d d      Zy)    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)Csv)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                   h     e Zd Z	 	 	 	 	 	 d
dee   dee   dee   dede	de	dee
   f fdZd	 Z xZS )CsvDatasetReaderpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingnum_procc           
          t        	|   |f||||||d| t        |t              r|n| j                  |i}t        d|||d|| _        y )N)r   r   r   r   r   r   )r   
data_filesr    )super__init__
isinstancedictr   r   builder)
selfr   r   r   r   r   r   r   kwargs	__class__s
            J/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/io/csv.pyr   zCsvDatasetReader.__init__   sv     			
)		
 		
 *4M4)Htzz[hNi 
$
 	
    c                 6   | j                   r(| j                  j                  | j                        }|S d }d }d }d }| j                  j	                  ||||| j
                         | j                  j                  | j                  || j                        }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r+   	in_memory)r   r"   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r#   datasetr)   r*   r+   r,   s         r&   readzCsvDatasetReader.read-   s    >>ll77djj7IG$  #O M $ILL-- /+"3# .  ll--jj4EQUQdQd . G r'   )NNNFFN)__name__
__module____qualname__r   r   r   r	   r   strboolintr   r2   __classcell__)r%   s   @r&   r   r      sx     '+'+$"&
.x8
 
#
 8$	

 
 
 
 3-
:r'   r   c                   n    e Zd Z	 	 	 ddedeeef   dee   dee   dee	   f
dZ
defd	Zd
 ZdedefdZy)CsvDatasetWriterNr1   path_or_buf
batch_sizer   storage_optionsc                     ||dk  rt        d| d      || _        || _        |r|nt        j                  | _        || _        d| _        |xs i | _        || _	        y )Nr   z	num_proc z must be an integer > 0.zutf-8)

ValueErrorr1   r<   r
   DEFAULT_MAX_BATCH_SIZEr=   r   encodingr>   to_csv_kwargs)r#   r1   r<   r=   r   r>   rC   s          r&   r   zCsvDatasetWriter.__init__F   sh     HMy
2JKLL&(2*8U8U .4"*r'   returnc                 .   | j                   j                  dd       }| j                   j                  dd      }| j                   j                  dd      }t        | j                  t        t
        t        j                  f      rZt        j                  | j                  dfi | j                  xs i 5 } | j                  d|||d| j                   }d d d        |S  | j                  d| j                  ||d| j                   }|S # 1 sw Y   S xY w)	Nr<   headerTindexFwb)file_objrF   rG   r   )rC   popr    r<   r6   bytesosr   fsspecopenr>   _write)r#   _rF   rG   bufferwrittens         r&   writezCsvDatasetWriter.writeZ   s   ""=$7##''$7""&&w6d&&eR[[(ABT--tT8L8L8RPRT iX^%$++hvfEhUYUgUghi  "dkkn4+;+;FRWn[_[m[mnG	i s   3!D

Dc                 6   |\  }}}}t        | j                  j                  t        ||| j                  z         | j                  j
                        } |j                         j                  dd |dk(  r|nd|d|}|j                  | j                        S )N)tablekeyindicesr   F)r<   rF   rG   r   )
r   r1   dataslicer=   _indices	to_pandasto_csvencoderB   )r#   argsoffsetrF   rG   rC   batchcsv_strs           r&   
_batch_csvzCsvDatasetWriter._batch_csvf   s    /3,},,##fft67LL))

 +%//#** 
!V%u
Xe
 ~~dmm,,r'   rI   c                    d}| j                   | j                   dk(  rft        t        dt        | j                        | j
                        dd      D ]+  }| j                  ||||f      }||j                  |      z  }- |S t        | j                        | j
                  }	}t        j                  | j                         5 }
t        |
j                  | j                  t        d||	      D cg c]  }||||f
 c}      ||	z  r||	z  dz   n||	z  dd      D ]  }||j                  |      z  } 	 ddd       |S c c}w # 1 sw Y   |S xY w)zWrites the pyarrow table as CSV to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   Nr   bazCreating CSV from Arrow format)unitdesc)totalre   rf   )r   hf_tqdmrangelenr1   r=   rb   rS   multiprocessingPoolimap)r#   rI   rF   rG   rC   rR   r_   ra   num_rowsr=   pools              r&   rO   zCsvDatasetWriter._writes   sY   
 == DMMQ$6!aT\\*DOO<5 3
 //665-*PQ8>>'223,  $'t||#4doojH %%dmm4 
7&IINSTUW_akNlmF&&%?m ;CZ:O8z1Q6U]akUk9  	7G x~~g66G	7
7  n	
7 s   +E/D?<8E?EE)NNN)r3   r4   r5   r   r   r   r   r   r8   r!   r   rS   rb   rO   r   r'   r&   r;   r;   E   s|    
 %)"&*.++ 8X-.+ SM	+
 3-+ "$+(
s 
-x C r'   r;   )rk   rL   typingr   r   r   rM    r   r   r	   r
   
formattingr   packaged_modules.csv.csvr   utilsr   rh   utils.typingr   r   abcr   r   r;   r   r'   r&   <module>rw      sC     	 , ,  4 4 $ * # < &3, 3lL Lr'   