
    bi)                         d dl Z d dlZd dlmZmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ d	d
lmZ  G d de      Z G d d      Zy)    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)Json)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                   t     e Zd Z	 	 	 	 	 	 	 ddee   dee   dee   dede	de	dee   dee
   f fd	Zd
 Z xZS )JsonDatasetReaderpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingfieldnum_procc	           
          t        
|   |f||||||d|	 || _        t        |t              r|n| j
                  |i}t        d||||d|	| _        y )N)r   r   r   r   r   r   )r   
data_filesr   r    )super__init__r   
isinstancedictr   r   builder)selfr   r   r   r   r   r   r   r   kwargs	__class__s             K/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/io/json.pyr    zJsonDatasetReader.__init__   s     			
)		
 		
 
)3M4)Htzz[hNi 
$	

 
    c                 6   | j                   r(| j                  j                  | j                        }|S d }d }d }d }| j                  j	                  ||||| j
                         | j                  j                  | j                  || j                        }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r,   	in_memory)r   r#   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r$   datasetr*   r+   r,   r-   s         r'   readzJsonDatasetReader.read0   s    >>ll77djj7IG$  #O M $ILL-- /+"3# .  ll--jj4EQUQdQd . G r(   )NNNFFNN)__name__
__module____qualname__r   r   r   r	   r   strboolintr    r3   __classcell__)r&   s   @r'   r   r      s     '+'+$#"&
.x8
 
#
 8$	

 
 
 
 }
 3-
@r(   r   c                   n    e Zd Z	 	 	 ddedeeef   dee   dee   dee	   f
dZ
defd	Zd
 ZdedefdZy)JsonDatasetWriterNr2   path_or_buf
batch_sizer   storage_optionsc                     ||dk  rt        d| d      || _        || _        |r|nt        j                  | _        || _        d| _        |xs i | _        || _	        y )Nr   z	num_proc z must be an integer > 0.zutf-8)

ValueErrorr2   r=   r
   DEFAULT_MAX_BATCH_SIZEr>   r   encodingr?   to_json_kwargs)r$   r2   r=   r>   r   r?   rD   s          r'   r    zJsonDatasetWriter.__init__I   sh     HMy
2JKLL&(2*8U8U .4",r(   returnc                    | j                   j                  dd       }| j                   j                  dd      }| j                   j                  d|dk(  rdnd      }d| j                   vr|dv rd| j                   d<   t        | j                  t        t
        t        j                  f      rd	nd }| j                   j                  d
|      }|dvrt        d| d      |s.| j                  | j                  j                  k  rt        d      t        | j                  t        t
        t        j                  f      r\t        j                  | j                  dfd
|i| j                  xs i 5 } | j                  d|||d| j                   }d d d        |S |rt        d| d       | j                  d| j                  ||d| j                   }|S # 1 sw Y   S xY w)Nr=   orientrecordslinesTFindex)r   tableinfercompression)NrL   gzipbz2xzz&`datasets` currently does not support z compressionzOutput JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead.wb)file_objrG   rI   zUThe compression parameter is not supported when writing to a buffer, but compression=z1 was passed. Please provide a local path instead.r   )rD   popr!   r=   r7   bytesosr   NotImplementedErrorr>   r2   num_rowsfsspecopenr?   _write)r$   _rG   rI   default_compressionrM   bufferwrittens           r'   writezJsonDatasetWriter.write]   s   ##M48$$((9=##''99LRWX$---&<N2N+0D( *4D4D4DsESUS^S^F_)`gfj))--m=PQBB%(N{m[g&hii4<<+@+@@% i  d&&eR[[(AB  $4?DHDXDXD^\^ j%$++ivfEiUYUhUhij  )klwkxGH  "dkko4+;+;FRWo[_[n[noGj s   /!GG!c                 T   |\  }}}}t        | j                  j                  t        ||| j                  z         | j                  j
                        } |j                         j                  dd ||d|}|j                  d      s|dz  }|j                  | j                        S )N)rK   keyindices)r=   rG   rI   
r   )r   r2   dataslicer>   _indices	to_pandasto_jsonendswithencoderC   )r$   argsoffsetrG   rI   rD   batchjson_strs           r'   _batch_jsonzJsonDatasetWriter._batch_json~   s    04-~,,##fft67LL))

 -5??$,,lfTYl]kl  &Ht}}--r(   rR   c                    d}| j                   | j                   dk(  rft        t        dt        | j                        | j
                        dd      D ]+  }| j                  ||||f      }||j                  |      z  }- |S t        | j                        | j
                  }	}t        j                  | j                         5 }
t        |
j                  | j                  t        d||	      D cg c]  }||||f
 c}      ||	z  r||	z  dz   n||	z  dd      D ]  }||j                  |      z  } 	 ddd       |S c c}w # 1 sw Y   |S xY w)zWrites the pyarrow table as JSON lines to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   Nr   bazCreating json from Arrow format)unitdesc)totalrr   rs   )r   hf_tqdmrangelenr2   r>   ro   r_   multiprocessingPoolimap)r$   rR   rG   rI   rD   r^   rl   rn   rW   r>   pools              r'   rZ   zJsonDatasetWriter._write   s]    == DMMQ$6!aT\\*DOO<6 4
  ++VVUN,ST8>>(334*  $'t||#4doojH %%dmm4 
8 'II((OTUVX`blOmnV&&%@n ;CZ:O8z1Q6U]akUk:! 	8H x~~h77G	8
8  o	
8 s   +E/D?<8E?EE)NNN)r4   r5   r6   r   r   r   r   r   r9   r"   r    r_   ro   rZ   r   r(   r'   r<   r<   H   s    
 %)"&*.-- 8X-.- SM	-
 3-- "$-(s B.## 
#r(   r<   )rx   rU   typingr   r   r   rX    r   r   r	   r
   
formattingr   packaged_modules.json.jsonr   utilsr   ru   utils.typingr   r   abcr   r   r<   r   r(   r'   <module>r      sC     	 , ,  4 4 $ - # < &6- 6rf fr(   