
    bi$                     *   d dl Z d dlZd dlmZ d dlmZ d dlZd dlZ	d dl
mZ d dlZd dlZd dlmZ d dlmZ ej&                  j(                  j+                  e      Zd Zd Zd Ze G d	 d
ej6                               Z G d dej:                        Zy)    N)	dataclass)Optional)
table_cast)readlinec                      	 t        j                  j                  j                  | i |S # t        $ r, t        j                  j                  j
                  | i |cY S w xY wN)pdiojsonujson_dumpsAttributeErrordumpsargskwargss     ^/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/packaged_modules/json/json.pyr   r      R    1uuzz%%t6v66 1uuzz0001   (+ 2A A c                      	 t        j                  j                  j                  | i |S # t        $ r, t        j                  j                  j
                  | i |cY S w xY wr   )r	   r
   r   ujson_loadsr   loadsr   s     r   r   r      r   r   c                     t         j                  j                  j                  dk\  rd|d<   t	        j
                  | fi |S )N   pyarrowdtype_backend)datasetsconfigPANDAS_VERSIONmajorr	   	read_json)path_or_bufr   s     r   pandas_read_jsonr"   #   s8    %%++q0"+<<.v..    c                        e Zd ZU dZdZeej                     ed<   dZ	e
ed<   dZee
   ed<   dZee
   ed<   dZeed	<   dZee   ed
<   dZeed<   dZee   ed<    fdZ xZS )
JsonConfigzBuilderConfig for JSON.Nfeaturesutf-8encodingencoding_errorsfieldTuse_threads
block_sizei   	chunksizenewlines_in_valuesc                 "    t         |           y r   )super__post_init__)self	__class__s    r   r1   zJsonConfig.__post_init__6   s    r#   )__name__
__module____qualname____doc__r&   r   r   Features__annotations__r(   strr)   r*   r+   boolr,   intr-   r.   r1   __classcell__)r3   s   @r   r%   r%   )   s|    !,0Hhx(()0Hc%)OXc])E8C=K $J$Is)--   r#   r%   c                   Z    e Zd ZeZd Zd Zdej                  dej                  fdZ	d Z
y)Jsonc                    | j                   j                  :t        j                  d       | j                   j                  | j                   _        | j                   j
                  durt        j                  d       | j                   j                  t        d      t        j                  | j                   j                        S )NzTThe JSON loader parameter `block_size` is deprecated. Please use `chunksize` insteadTzZThe JSON loader parameter `use_threads` is deprecated and doesn't have any effect anymore.zEThe JSON loader parameter `newlines_in_values` is no longer supported)r&   )r   r,   loggerwarningr-   r+   r.   
ValueErrorr   DatasetInfor&   )r2   s    r   _infoz
Json._info=   s    ;;!!-NNqr$(KK$:$:DKK!;;""$.NNl ;;))5dee##T[[-A-ABBr#   c                    | j                   j                  s"t        d| j                   j                         d|j                  _        |j                  | j                   j                        }g }|j                         D ]^  \  }}t        |t              r|g}|D cg c]  }|j                  |       }}|j                  t        j                  |d|i             ` |S c c}w )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=Tfiles)name
gen_kwargs)r   
data_filesrC   download_configextract_on_the_flydownload_and_extractitems
isinstancer:   
iter_filesappendr   SplitGenerator)r2   
dl_managerrJ   splits
split_namerG   files          r   _split_generatorszJson._split_generatorsI   s    {{%%\]a]h]h]s]s\tuvv8<
""544T[[5K5KL
!+!1!1!3 	aJ%%=BCTZ**40CECMM(11zwX]N^_`		a
  Ds   C$pa_tablereturnc           
         | j                   j                  t        | j                   j                        t        |j                        z
  D ]o  }| j                   j                  j                  j                  |      j                  }|j                  |t        j                  d gt        |      z  |            }q t        |j                        D ]  \  }}t        j                  j                  ||   j                        s3| j                   j                  j                  |d       t        j                   d      k(  sp||   j#                  t$        j&                        j)                  dd      }t        j                  d d|z   j+                  d	      D        t        j,                               }|j/                  |||      } t1        || j                   j                  j                        }|S )
N)typestring)types_mapperrecordsT)orientlinesc              3   H   K   | ]  }|sd |j                         z     yw){N)rstrip).0xs     r   	<genexpr>z#Json._cast_table.<locals>.<genexpr>h   s     TaRSqxxz)Ts   ""
z
{)r   r&   setcolumn_namesarrow_schemar*   r[   append_columnpaarraylen	enumeratetypes	is_structgetr   Value	to_pandasr	   
ArrowDtypeto_jsonsplitr\   
set_columnr   )r2   rX   column_namer[   ijsonlstring_arrays          r   _cast_tablezJson._cast_tableW   s   ;;+"4;;#7#783x?T?T;UU l{{++88>>{KPP#11+rxxQTU]Q^H^ei?jkl #,H,A,A"B Q;88%%h{&;&@&@AdkkFZFZF^F^G^^H-G. !-"> 	> 
 $&88TD5L3G3G3NT[][d[d[f$L  (221k<PHQ "(DKK,@,@,M,MNHr#   c              #   
  K   t        t        j                  j                  |            D ]  \  }}| j                  j
                  +t        || j                  j                  | j                  j                        5 }t        |j                               }d d d        | j                  j
                     }t        t        j                  t        |                  }|j                  j!                         dgk(  r=| j                  j"                  rt%        | j                  j"                        ndg|_        t&        j(                  j+                  |d      }|| j-                  |      f It        |d      5 }d}t/        | j                  j0                  dz  d      }	| j                  j                  | j                  j                  nd	}
	 |j                  | j                  j0                        }|sn	 ||j3                         z  }| j                  j                  d
k7  r6|j9                  | j                  j                  |
      j;                  d
      }	 	 	 t=        j>                  t        j@                  |      t=        jB                  |	            }	 ||f| j-                  |      f |dz  }d d d         y # 1 sw Y   4xY w# t4        t        j6                  f$ r |t3        |      z  }Y w xY w# t&        jD                  t&        jF                  f$ rp}tI        |t&        jD                        rdtK        |      vs|	tM        |      kD  r tN        jQ                  dtM        |       d|	 d|	dz   d       |	dz  }	Y d }~nd }~ww xY w;# t&        jD                  $ r}	 t        || j                  j                  | j                  j                        5 }t        |      }d d d        n# 1 sw Y   nxY wn6# tR        $ r* tN        jU                  d| dtW        |       d|        |w xY wj                  j!                         dgk(  r=| j                  j"                  rt%        | j                  j"                        ndg|_        	 t&        j(                  j+                  |d      }nR# t&        jD                  $ r<}tN        jU                  d| dtW        |       d|        tS        d| d      d d }~ww xY w|| j-                  |      f Y d }~ld }~ww xY w# 1 sw Y   xY ww)N)r(   errorsr   textF)preserve_indexrb    i @  strictr'   )r   )r,   )read_options
straddlingz	Batch of z* bytes couldn't be parsed with block_size=z. Retrying with block_size=r   .zFailed to load JSON from file 'z' with error z: z=Failed to convert pandas DataFrame to Arrow Table from file 'z<Failed to convert pandas DataFrame to Arrow Table from file    ),ro   	itertoolschainfrom_iterabler   r*   openr(   r)   r   readr"   r
   StringIOr   columnstolistr&   listrl   Tablefrom_pandasr}   maxr-   r   r   UnsupportedOperationdecodeencodepajr    BytesIOReadOptionsArrowInvalidArrowNotImplementedErrorrO   r:   rn   rA   debugrC   errorr[   )r2   rG   file_idxrV   fdatasetdfrX   	batch_idxr,   r)   batches                r   _generate_tableszJson._generate_tablesp   s    '	(E(Ee(LM O	'NHd{{  ,$)=)=dkkFaFab 4fg)!&&(3G4 "$++"3"34%bkk+g2F&GH::$$&1#-?C{{?S?Sdkk&:&:!;Z`YaBJ88//5/I 0 0 ::: $% @' !I "%T[[%:%:b%@(!KJ7;{{7R7R7^33dl $  !t{{'<'< =$!1!QZZ\1E  ;;//7:$)LL1E1EoL$^$e$efm$nE*""&!8/2}}(*

5(9cmHn0&H %*J  (3T5E5Eh5OOO!Q	q @' @'O	'4 46 !/0G0GH 1!Xa[0E1 )+9T9T'U !8(21boo(F,8A,F+5E
+B(- )/.7E
|Cmnxmy  zU  V`  cd  Vd  Ue  ef  -g)* )3a
!8 #'(  " "(%)$(4;;3G3GPTP[P[PkPk&" !=%&)9!)<B!= != != $. ( &/NtfTabfghbiajjlmnlo-p q&'(  "zz002qc9KO;;K_K_T$++2F2F-Gflem
,+-88+?+?SX+?+Y#%?? , &&cdhciivw{|}w~v  @B  CD  BE  %F!" '1&bcgbhhi$j'"'+!,	, #+D,<,<X,F"FF!+"S@' @'s   A=U ?KC'U  A=T3>KAT3!N#>L!T3?U K	U 'L?T3LT3#N(A&NNNNT026P
(O=	4	P
=PP
	T+
3P=	=AT+!R>=T+>T	7T	T	T+%T3+T00T33T=	8U N)r4   r5   r6   r%   BUILDER_CONFIG_CLASSrE   rW   rl   r   r}   r    r#   r   r?   r?   :   s4    %
CBHH  2P'r#   r?   )r
   r   dataclassesr   typingr   pandasr	   r   rl   pyarrow.jsonr   r   r   datasets.configdatasets.tabler   datasets.utils.file_utilsr   utilslogging
get_loggerr4   rA   r   r   r"   BuilderConfigr%   ArrowBasedBuilderr?   r   r#   r   <module>r      s    	  !       % . 
			*	*8	411/  ''     F'8%% F'r#   