
    bi=                        d dl Z d dlZd dlmZ 	 d dlmZ d dlmZm	Z	m
Z
mZ 	 d dlZd dlmZ e j$                  j
                  Ze j$                  j                  d        Zd Zd Ze j$                  j                  d        Ze j$                  j                  d        Ze j$                  j                  e j$                  j3                  d	d ej4                         g      d
               Ze j$                  j8                  e j$                  j                  d               Ze j$                  j                  e j$                  j3                  d	d ej4                         g      d               Ze j$                  j                  e j$                  j>                  d               Z e j$                  j                  e j$                  j>                  d               Z!e j$                  j                  e j$                  j>                  d               Z"e j$                  jF                  d        Z$d Z%d Z&d Z'd Z(y# e$ r dZY w xY w# e$ r dxZZY w xY w)    N)fs)_read_table_test_dataframe_test_table_range_integersc                    t        d      }d|d<   t        j                  j                  |d      }t        j                         }t        j                  ||j                  d      }g }t        d      D ]X  }||d<   t        j                  j                  |d      }|j                  |       |j                  |j                                Z |j                          |j                         }t        t        j                  |            }t!        j"                  |d	
      }	t%        j&                  |j)                         |	       y Nd   r   	unique_idFpreserve_index2.6version
   Tignore_index)r   paTablefrom_pandasBufferOutputStreampqParquetWriterschemarangewrite_tableappendcopyclosegetvaluer   BufferReaderpdconcattmassert_frame_equal	to_pandas
tempdirdfarrow_tableoutwriterframesibufresultexpecteds
             d/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_parquet_writer.py#test_parquet_incremental_file_buildr3   ,   s   		BB{O((&&r%&@K



!Cc;#5#5uEFF2Y !;hh**2e*D;'bggi ! LLN
,,.C-.Fyyd3H&**,h7    c                 P   t        j                  dt        j                               t        j                  dt        j                               g}t        j                  |      }t        j
                  dg      t        j
                  dg      g}t         j                  j                  |ddg      }| dz  }t        j                  ||ddd	      5 }t        j                  t              5  |j                  |       d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)
NPOSdesc   blazsimple_validate_schema.parquetr   snappyspark)r   compressionflavor)r   fielduint32stringr   arrayr   from_arraysr   r   pytestraises
ValueErrorr   )r(   simple_fieldssimple_schemasimple_from_arraysimple_tablepathws          r2    test_validate_schema_write_tablerL   G   s     			$
%M
 IIm,M 1#%(9:88''(9E6?KL55D			$"'&.w
@ (CD]]:& 	(MM,'	(( (	( 	(( (s$   D-D?DD	DD%c                    t        j                  t              5  t        j                  t        j
                  dt        j                               g      }t        j                  d |       d d d        t        j                  t              5  t        j                  | dz  d        d d d        y # 1 sw Y   ExY w# 1 sw Y   y xY w)Nx	some_path)	rC   rD   	TypeErrorr   r   r>   int32r   r   )r(   some_schemas     r2   test_parquet_invalid_writerrS   ]   s    	y	! ,ii#rxxz!: ;<
{+, 
y	! 6
;.56 6	, ,6 6s   AB1B=1B:=Cc                    t        d      }d|d<   t        j                  j                  |d      }t        j                         }t        j                  ||j                  d      5 }g }t        d      D ]X  }||d<   t        j                  j                  |d      }|j                  |       |j                  |j                                Z 	 d d d        |j                         }t        t        j                  |            }t        j                   d	
      }	t#        j$                  |j'                         |	       y # 1 sw Y   sxY wr	   )r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r"   r#   r$   r%   r&   r'   s
             r2   test_parquet_writer_context_objrU   g   s   		BB{O((&&r%&@K



!C			#{115	A %Vr 	%AB{O((..r%.HK{+MM"'')$	%% ,,.C-.Fyyd3H&**,h7% %s   (A)EEc                    t        d      }d|d<   t        j                  j                  |d      }t        j                         }d}	 t        j                  ||j                  d      5 }g }t        d	      D ]h  }||d<   t        j                  j                  |d      }|j                  |       |j                  |j                                |d
k(  s_t        |       	 d d d        |j                         }	t!        t        j"                  |	            }
t%        j&                  d      }t)        j*                  |
j-                         |       y # 1 sw Y   sxY w# t        $ r}t        |      |k(  sJ Y d }~d }~ww xY w)Nr
   r   r   Fr   zArtificial Errorr   r   r      Tr   )r   r   r   r   r   r   r   r   r   r   r   r   rE   	Exceptionstrr    r   r!   r"   r#   r$   r%   r&   )r(   r)   r*   r+   
error_textr,   r-   r.   er/   r0   r1   s               r2   .test_parquet_writer_context_obj_with_exceptionr\      sR   		BB{O((&&r%&@K



!C#J$c)00&+- 	106F2Y 1"#; hh222e2L"";/bggi(6$Z001	1 ,,.C-.Fyyd3H&**,h7'	1 	1  $1v###$s7   	"E# +A,EE%E# E E# #	F,FF
filesystemc                    t        d      }t        j                  j                  |d      }t        j                  j                  |d      }t        | dz        }t        | dz        }t        j                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       t        j                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       t        j                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       t        j                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       y # 1 sw Y   ~xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   _xY w)Nr
   Fr   zdata_table.parquetzdata_batch.parquetr   r]   r   )r   r   r   r   RecordBatchrY   r   r   r   r   r   r&   r$   r%   write_batchwrite)	r(   r]   r)   tablebatch
path_table
path_batchr,   r0   s	            r2   "test_parquet_writer_write_wrappersrg      s    
	BHH  E :ENN&&r%&@EW334JW334J			ELLZ
 "	5!"
 $..0F&"%			ELLZ
 "	5!"
 $..0F&"%			ELLZ
 	U
 $..0F&"%			ELLZ
 	U
 $..0F&"%=" "" "  s0   H9H(%H5IH%(H25H>I
c                      dd fd} |dz  dz
  d        |dz  d        |dz  dz   d        |dz   dz   d        |dz  dz  d        |d d        |dz   d d       y )Ni   i   c                 X   t         j                  j                  t        | d      gdg      }|t	        j
                  |dz         nt	        j
                  |dz  |       t	        j                  dz        }|
n|}|j                  |k(  sJ t        |	      }t        |dz
        D ]"  }|j                  |      j                  |k(  r"J  | ||dz
  z  z
  }|dk(  r$|j                  |dz
        j                  |k(  sJ y |j                  |dz
        j                  |k(  sJ y )NbrN   )namesztest.parquet)row_group_sizer8   r   )r   r   rB   r   r   r   read_metadatanum_row_groupsminr   	row_groupnum_rows)	data_size
chunk_sizeexpect_num_chunksrc   metadataexpected_chunk_sizelatched_chunk_size	chunk_idx	remainderabs_max_chunk_sizedefault_chunk_sizer(   s            r2   check_chunk_sizez8test_parquet_writer_chunk_size.<locals>.check_chunk_size   sY   $$Is+&
 %  NN5'N":;NN5'N"::V##Gn$<=4>4F0J&&*;;;; !46HI0145 	PI%%i099=OOOO	P !48IA8M!NO	>%%!A%''/x3EF F F %%&7!&;<EERRRr4      r
      r8    )r(   r|   rz   r{   s   ` @@r2   test_parquet_writer_chunk_sizer      s     %)S. '!+-?#-EqI'!+-?C'!+-?#-EqI'#-/AC/GK '!+-?!-CQG 'q1'!+T15r4   c                 h   t        d      }t        j                  j                  |d      }t	        | dz        }t        j                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       y # 1 sw Y   9xY w)Nr
   Fr   zdata.parquetr   r_   )r   r   r   r   rY   r   r   r   r   r   r&   r$   r%   )r(   r]   r)   rc   rJ   r,   r0   s          r2   $test_parquet_writer_filesystem_localr      s     
	BHH  E :Ew'(D			ellz5
 "	5!"
 ((*F&"%" "s   B((B1c                 X   t        d      }t        j                  j                  |d      }| \  }}}t	        j
                  ||j                  |d      5 }|j                  |       d d d        t        |      j                         }t        j                  ||       y # 1 sw Y   9xY w)Nr
   Fr   r   r_   r   r   r   r   r   r   r   r   r   r&   r$   r%   s3_example_fsr)   rc   r   urirJ   r,   r0   s           r2   !test_parquet_writer_filesystem_s3r     s     
	BHH  E :E!MBT			ellr5
 "	5!"
 '')F&"%" "s   B  B)c                 Z   t        d      }t        j                  j                  |d      }| \  }}}t	        j
                  ||j                  d      5 }|j                  |       d d d        t        ||      j                         }t        j                  ||       y # 1 sw Y   ;xY w)Nr
   Fr   r   r   r]   r   r   s           r2   %test_parquet_writer_filesystem_s3_urir     s     
	BHH  E :E!MBT			#u||U	; "v5!" "-779F&"%	" "s   B!!B*c                 d   t        d      }t        j                  j                  |d      }| \  }}|dz   }t	        j
                  ||j                  |d      5 }|j                  |       d d d        t        ||      j                         }t        j                  ||       y # 1 sw Y   ;xY w)Nr
   Fr   z/test.parquetr   r_   r   r   )s3_example_s3fsr)   rc   r   	directoryrJ   r,   r0   s           r2   #test_parquet_writer_filesystem_s3fsr   +  s     
	BHH  E :E#MB	&D			ellr5
 "	5!"
 "-779F&"%" "s   B&&B/c                  
   t        d      } t        j                         }t        j                  t
        d      5  t        j                  t        j                         | j                  |       d d d        y # 1 sw Y   y xY w)Nr
   zspecified path is file-likematchr   )r   r   LocalFileSystemrC   rD   rE   r   r   r   r   r   )rc   r]   s     r2   ,test_parquet_writer_filesystem_buffer_raisesr   =  sb    E##%J 
z)F	G 

!!#U\\j	

 
 
s   5A99Bc                    t        j                  dg di      }| dz  }t        j                  ||j                        5 }|j                  |       d d d        t        j                  |      }d|j                  v sJ |j                  d   sJ | dz  }t        j                  ||j                  d      5 }|j                  |       d d d        t        j                  |      }|j                  J y # 1 sw Y   xY w# 1 sw Y   9xY w)Na)r8   r}   r~   ztest_with_schema.parquets   ARROW:schemaztest_without_schema.parquetF)store_schema)r   rc   r   r   r   r   rm   ru   )r(   rc   path1r,   metapath2s         r2    test_parquet_writer_store_schemar   I  s    HHc9%&E 00E			%	. "&5!" E"Ddmm+++==))) 33E			%E	B "f5!" E"D==   " "" "s   C46D 4C= D	c                    t         j                  j                  t        j                  g d      gdg      }| dz  }t	        j
                  ||j                        5 }|j                  |       |j                  ddd       |j                  dd	d
       d d d        t	        j                  |      }|j                  j                  }|d   dk(  sJ |d   dk(  sJ |d   dk(  sJ y # 1 sw Y   SxY w)NrQ   )typef0zmetadata.parquet1rN   )key1key223)r   key3s   key1   1s   key2   2s   key3   3)r   r   rB   rA   r   r   r   r   add_key_value_metadataParquetFileru   )r(   rc   rJ   r,   readerru   s         r2   -test_parquet_writer_append_key_value_metadatar   ^  s    HH  "((2G"<!=vFE''D			$	- B5!%%sC&@A%%sC&@AB ^^D!F''HG$$$G$$$G$$$B Bs   :C))C2c           	      v   t        j                  dt        d      i      }t        j                  || dz  dd       t        j                  || dz  ddd	       t        j                  || d
z  ddddd	       t        j
                  | dz        }t        j
                  | dz        }t        j
                  | d
z        }|j                  |      sJ |j                  |      sJ t        j                  | dz        }t        j                  | dz        }t        j                  | d
z        }|j                  |j                  k(  sJ |j                  |j                  k(  sJ t        |j                        D ]  }|j                  |      }	|j                  |      }
|j                  |      }|	j                  |
j                  k(  sJ |	j                  |j                  k(  sJ |	j                  |
j                  k  sJ |	j                  |j                  k  sJ |
j                  |j                  k  rJ  y )Nr   i zunchunked.parquetFPLAIN)use_dictionarycolumn_encodingzchunked-default.parquetT)r   r   use_content_defined_chunkingzchunked-custom.parquet      min_chunk_sizemax_chunk_size)r   rc   r   r   r   
read_tableequalsrm   rn   rp   rq   total_byte_size)r(   rc   	unchunkedchunked_defaultchunked_customunchunked_metadatachunked_default_metadatachunked_custom_metadatar.   rg_unchunkedrg_chunked_defaultrg_chunked_customs               r2   %test_parquet_content_defined_chunkingr   m  s>   HHc5>*+E NN5'$77"'#*, NN5'$=="'#*046 NN5'$<<"'#*CICI1KL g(;;<ImmG.G$GHO]]7-E#EFNO,,,N+++ ))'4G*GH!//:S0ST ..w9Q/QR,,0H0W0WWWW,,0G0V0VVVV%445 
V)33A65??B3==a@$$(:(C(CCCC$$(9(B(BBBB ++.@.P.PPPP++.?.O.OOOO!114E4U4UUUU
Vr4   c                 Z   t        j                  dt        d      i      }| dz  }d}t        j                  t
        |      5  ddd}t        j                  |||	       d d d        dd
ddfddidfddidfg}|D ]A  \  }}t        j                  t        |      5  t        j                  |||	       d d d        C t        j                  ||d
	       ddd}t        j                  |||	       dddd}t        j                  |||	       y # 1 sw Y   xY w# 1 sw Y   xY w)Nr   r
   zchunked-invalid.parquetz2max_chunk_size must be greater than min_chunk_sizer   r   r   r   )r   T)r   unknown_optionzEUnknown options in 'use_content_defined_chunking': {'unknown_option'}r   zEMissing options in 'use_content_defined_chunking': {'max_chunk_size'}r   zEMissing options in 'use_content_defined_chunking': {'min_chunk_size'}r8   )r   r   
norm_level)	r   rc   r   rC   rD   rX   r   r   rE   )r(   rc   rJ   msgcdc_optionscasess         r2   0test_parquet_content_defined_chunking_parametersr     sK   HHc5:&'E..D ?C	y	, N)/6J
udMN  )DAS	

 y)S	

 y)S	
E " RS]]:S1 	RNN5$[Q	R 	RR
 NN5$TB &,vFKNN5$[I &,vUVWKNN5$[I?N N&	R 	Rs   DD!D!D*	))rC   pyarrowr   r   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   r   r   ImportErrorpandasr"   pandas.testingtestingr$   mark
pytestmarkr3   rL   rS   rU   r\   parametrizer   rg   large_memoryr   r   s3r   r   r   numpyr   r   r   r   r   r   r4   r2   <module>r      s_  $    L L [[  
 8 84(,6 8 80 8 8> B( %&	 
%&P &6  &6R B( &	 
& &  & 
&  
& &  &  
 
!*%+V\&JA  	B  NBs"   H2 
I  2H=<H= 	II