
    bi5                     Z   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZ		 d dl
mZ d dlmZ 	 d dlZd dlmZ d dlmZ ej,                  j                  Zej,                  j                   d        Zej,                  j                   d        Zej,                  j                   d        Zej,                  j                   d        Zej,                  j                   d	        Zej,                  j                   d
        Zd Zd Zej,                  j                   ej,                  jA                  dg d      d               Z!ej,                  j                   ej,                  jA                  ddg      d               Z"ej,                  j                   ej,                  jA                  dddg      d               Z#d Z$ej,                  jJ                  ej,                  jA                  dd      d               Z&d Z'd Z(d Z)d Z*d Z+d  Z,d! Z-y# e$ r dZY w xY w# e$ r dxZZY w xY w)"    N)mock)_write_table)alltypes_samplec                     t        d      } t        j                  j                  |       }t	        j
                         }t        ||dd       |j                  d       t        j                  |      }|j                  d       t        j                  ||      }t        j                  | |j                         j                                y )N'  sizesnappy2.6)compressionversionr   )metadata)r   paTablefrom_pandasioBytesIOr   seekpqread_metadataParquetFiletmassert_frame_equalread	to_pandas)dfa_tablebufr   filehs        b/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_parquet_file.pytest_pass_separate_metadatar!   1   s     
e	$Bhh""2&G
**,C#8UCHHQK$HHHQKNN32E"ejjl4467    c                     d\  } }t        |       }t        j                  j                  |      }t	        j
                         }t        ||| |z  dd       |j                  d       t        j                  |      }|j                  |k(  sJ t        |      D cg c]  }|j                  |       }}t        j                  |      }t        j                  ||j!                                y c c}w Nr      r   r
   r   row_group_sizer   r   r   )r   r   r   r   r   r   r   r   r   r   num_row_groupsrangeread_row_groupconcat_tablesr   r   r   )	NKr   r   r   pfi
row_groupsresults	            r    test_read_single_row_groupr3   E   s     DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK		B!!!05a91"##A&9J9j)F"f..01 :s   C0c                     d\  } }t        |       }t        j                  j                  |      }t	        j
                         }t        ||| |z  dd       |j                  d       t        j                  |      }t        |j                  d d       }t        |      D cg c]  }|j                  ||       }}t        j                  |      }	t        j                   ||   |	j#                                t        |      D cg c]  }|j                  |||z          }}t        j                  |      }	t        j                   ||   |	j#                                y c c}w c c}w 	Nr%   r   r
   r   r'   r      )columns)r   r   r   r   r   r   r   r   r   r   listr7   r*   r+   r,   r   r   r   )
r-   r.   r   r   r   r/   colsr0   r1   r2   s
             r    -test_read_single_row_group_with_column_subsetr:   \   s6   DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK		B

2AD>CAhG"##At#4GJGj)F"T(F$4$4$67 FK1XN"##Atd{#;NJNj)F"T(F$4$4$67 H Os   #E$E)c                     d\  } }t        |       }t        j                  j                  |      }t	        j
                         }t        ||| |z  dd       |j                  d       t        j                  |      }|j                  |k(  sJ |j                  t        |            }t        j                  ||j                                y r$   )r   r   r   r   r   r   r   r   r   r   r)   read_row_groupsr*   r   r   r   )r-   r.   r   r   r   r/   r2   s          r    test_read_multiple_row_groupsr=   u   s    DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK		B!!!a)F"f..01r"   c                  @   d\  } }t        |       }t        j                  j                  |      }t	        j
                         }t        ||| |z  dd       |j                  d       t        j                  |      }t        |j                  d d       }|j                  t        |      |      }t        j                  ||   |j!                                |j                  t        |      ||z         }t        j                  ||   |j!                                y r5   )r   r   r   r   r   r   r   r   r   r   r8   r7   r<   r*   r   r   r   )r-   r.   r   r   r   r/   r9   r2   s           r    0test_read_multiple_row_groups_with_column_subsetr?      s    DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK		B

2ADa$7F"T(F$4$4$67 a$+>F"T(F$4$4$67r"   c                  n   d\  } }t        |       }t        j                  j                  |      }t	        j
                         }t        ||| |z  dd       |j                  d       t        j                  |      }|j                         dk(  sJ |j                  |j                  d d       dk(  sJ y )	Nr%   r   r
   r   r'   r   r   r&   )r   r   r   r   r   r   r   r   r   r   scan_contentsr7   )r-   r.   r   r   r   r/   s         r    test_scan_contentsrB      s    DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK		B&&&BJJrN+u444r"   c                 b   | dz  }t        j                  t        |             dt        |       d}t        j                  t
              5 }t        j                  |       d d d        j                  t              rt        j                  dk(  ry |j                  |       y # 1 sw Y   DxY w)N	directoryzCannot open for reading: path 'z' is a directorywin32)osmkdirstrpytestraisesIOErrorr   r   errisinstancePermissionErrorsysplatformmatch)tempdirpathmsgexcs       r    0test_parquet_file_pass_directory_instead_of_filerU      s    [ DHHSY+CI;6F
GC	w	 3
t
)cllg.EIIcN	 s   B%%B.c                     t        j                  t        j                  ddg      t        j                  ddg      gddg      } t        j                         }t	        j
                  | |       t	        j                  |j                               }|j                  j                  d      j                         ddgk(  sJ |j                  j                  d	      j                         ddgk(  sJ d
D ]E  }t        j                  t        t        f      5  |j                  j                  |       d d d        G y # 1 sw Y   RxY w)Nr&      foobarintsstrs)namesr      )r6   )r   tablearrayBufferOutputStreamr   write_tabler   getvaluereaderread_column	to_pylistrI   rJ   
ValueError
IndexError)r_   biofindexs       r    test_read_column_invalid_indexrl      s	   HHbhh1v&%(@A"F+-E



!CNN5#
s||~&A88",,.1a&88888",,.5%.@@@ (]]J
34 	(HH  '	( 	((	( 	(s   D>>E	
batch_size)i,    i  c           	      6   d}d}t        |      }| dz  }t        j                  j                  |      }t	        ||d|       t        j                  |      }|j                  d d |j                  dd  fD ]  }|j                  ||      }	t        d	||z   |      }
t        |	|
      D ]l  \  }}t        |||z         }t        j                  |j                         |j                  ||d d f   j                   d d |f   j#                  d
             n  y )Ni  rn   r   pandas_roundtrip.parquetr   r   
chunk_size
   )rm   r7   r   Tdrop)r   r   r   r   r   r   r   r7   iter_batchesr*   zipminr   r   r   iloclocreset_index)rQ   rm   
total_sizerr   r   filenamearrow_tablefile_r7   batchesbatch_startsbatchstartends                 r     test_iter_batches_columns_readerr      s    JJ	j	)B33H((&&r*Kh&( NN8$EJJsORZZ_5 $$
G$LQ
: 5zB6 	LE5j%*"45C!!!c	1%))!W*5AAtAL	r"   rr   rn   c                    t        dd      }| dz  }t        j                  j                  |      }|j                  j
                  J t        ||d|       t        j                  |      }d }t         ||            }d}t        |j                        D ]  }	t        j                  ||   j                         |j                  |	g      j                         j!                  d	             |d
z  }t        j                  ||   j                         j#                  d      |j                  |	g      j                         j$                  d	d  j#                  d             |d
z  } y )Nr   T)r	   categoricalrp   r   rq   c              3   ~   K   t        | j                        D ]!  }| j                  d|g      }|D ]  }|  # y w)N  )rm   r1   )r*   r)   rv   )rj   	row_groupr   r   s       r    get_all_batchesz1test_iter_batches_reader.<locals>.get_all_batches   sQ     q//0 	Inn%; % G
 ! 	s   ;=r   r   r]   rt   )r   r   r   r   schemapandas_metadatar   r   r   r8   r*   r)   r   r   r   r<   headr{   ry   )
rQ   rr   r   r}   r~   r   r   r   batch_nor0   s
             r    test_iter_batches_readerr      s]    
e	6B33H((&&r*K--999h&( NN8$E ?5)*GH5''( 
H'')!!1#&00277<	

 	A
H'')5545@!!1#&00277=II J 	
 	Ar"   
pre_bufferFTc                 @   d\  }}t        |      }t        j                  j                  |      }t	        j
                         }t        ||||z  dd       |j                  d       t        j                  ||       }|j                         j                  |k(  sJ y )Nr%   r   r
   r   r'   r   )r   )r   r   r   r   r   r   r   r   r   r   r   num_rows)r   r-   r.   r   r   r   r/   s          r    test_pre_bufferr     s     DAq	a	 Bhh""2&G
**,C#a!e%u6 HHQK	
	3B779"""r"   c                    | j                  d      }t        j                  ddgddgd      }t        j                  ||       t        |d      5 }t        j                  |      5 }|j                          |j                  rJ |j                  rJ 	 ddd       |j                  rJ j                  rJ 	 ddd       j                  sJ j                  sJ t        j                  |      5 }|j                          |j                  rJ 	 ddd       |j                  sJ y# 1 sw Y   xY w# 1 sw Y   }xY w# 1 sw Y   0xY w)z
    Unopened files should be closed explicitly after use,
    and previously opened files should be left open.
    Applies to read_table, ParquetDataset, and ParquetFile
    zfile.parquetr   r]   )col1col2rbN)	joinpathr   r_   r   rb   openr   r   closed)rQ   fnr_   rj   ps        r    #test_parquet_file_explicitly_closedr   "  s"    
		.	)BHHq!fq!f56ENN5" 
b$ 1^^A 	 !FFHxx<xx<x	  88|88|8 88O888O8 
	 q	88|8 88O8	  	   s0   D5&-D)$D52E)D2	.D55D>E
use_uri)TFc                    | \  }}}|r|n|f}|ri nt        |      }t        j                  dt        d      i      }t	        j
                  |||       t	        j                  |i |}|j                         |k(  sJ |j                  rJ |j                          |j                  sJ t	        j                  |i |5 }	|	j                         |k(  sJ |	j                  rJ 	 d d d        	j                  sJ y # 1 sw Y   xY w)N
filesystemars   )
dictr   r_   r*   r   rb   r   r   r   close)
s3_example_fsr   s3_fss3_uris3_pathargskwargsr_   parquet_filerj   s
             r    !test_parquet_file_with_filesystemr   ?  s     +E67F7,DR 6FHHc59%&ENN5'e4>>4262L%'''""""		(	( Avvx5   88|8 88O8 s   $C==Dc                  F   t        j                  dt        j                  g d      i      } t        j                         }t        | |       |j                  d       t        j                  |      j                         j                  d   j                  d   j                  }|j                  dk(  sJ |j                  J |j                  du sJ |j                   dk(  sJ |j"                  sJ |j$                  dk(  sJ |j&                  sJ t)        |      dk(  sJ y )	Nvalue)r^   N   r   r]   Fr^   r   zmarrow.ArrayStatistics<null_count=1, distinct_count=None, min=-1, is_min_exact=True, max=3, is_max_exact=True>)r   r_   r`   r   r   r   r   r   r   r   r7   chunks
statistics
null_countdistinct_countis_distinct_count_exactrx   is_min_exactmaxis_max_exactrepr)r_   r   r   s      r    test_read_statisticsr   V  s   HHgrxx678E
**,CHHQK$))+33A6==a@KKJ  A%%%$$,,, --666>>R"""">>Q""""
 !< = = =r"   c                     |  d}t        j                  |      j                         }|j                  ddgk(  sJ |d   j	                         g dk(  sJ y )Nz/unknown-logical-type.parquetzcolumn with known typezcolumn with unknown type)s   unknown string 1s   unknown string 2s   unknown string 3)r   r   r   column_namesrf   )parquet_test_datadir	test_filer_   s      r     test_read_undefined_logical_typer   l  sh    '((EFINN9%**,E":<V!WWWW+,668 =   r"   c                     t        j                  d       t        j                  dt	        d      i      } t        j                  | d       t        j                  d      }| j                  |      sJ d}t        j                  t        j                  |      5  t        j                  d       d d d        y # 1 sw Y   y xY w)Nfsspecr   rs   fsspec+memory://example.parquetz#Unrecognized filesystem type in URIrP   znon-existing://example.parquet)rI   importorskipr   r_   r*   r   rb   
read_tableequalsrJ   ArrowInvalid)r_   table2rS   s      r     test_parquet_file_fsspec_supportr   x  s    
!HHc59%&ENN5;<]]<=F<<
/C	rc	2 8
678 8 8s   B::Cc                  x   	 ddl m}  t        j                  dt        d      i      }         }|j                  dd       |j                  d      sJ d	}t        j                  |d
|       t        j                  d      }|j                  |      sJ y # t        $ r t        j                  d       Y w xY w)Nr   MemoryFileSystemz&fsspec is not installed, skipping testbrs   z/path/to/prefixT)create_parentszfsspec+memory://path/to/prefixz	b.parquetr   z(fsspec+memory://path/to/prefix/b.parquet)fsspec.implementations.memoryr   ImportErrorrI   skipr   r_   r*   rG   existsr   rb   r   r   )r   r_   fsfs_strr   s        r    <test_parquet_file_fsspec_support_through_filesystem_argumentr     s    >B HHc59%&E		BHHtH499&'''-FNN5+&9]]EFF<<  ><=>s   B B98B9c                     	 ddl m}  t        j                  d      } |_        t        j                  j                  dd|i      5  d}t        j                  dt        d      i      }t        j                  ||       t        j                   |      }|j#                  |      sJ 	 d d d        y # t        $ r t        j                  d       Y w xY w# 1 sw Y   y xY w)	Nr   r   z3fsspec is not installed, skipping Hugging Face testhuggingface_hubzsys.modulesz'hf://datasets/apache/arrow/test.parquetr   rs   )r   r   r   rI   r   types
ModuleTypeHfFileSystemr   patchr   r   r_   r*   r   rb   r   r   )r   fake_hf_moduleurir_   r   s        r    $test_parquet_file_hugginface_supportr     s    KB %%&78N"2N	):N(K	L $7#uRy)*
uc"s#||F###$ $  KIJK
$ $s   B1 A!C1CCCc                  
   	 dd l } t        j                  d       t	        j
                  d      }t        j                  t        |      5  t        j                  d       d d d        y # t        $ r Y Zw xY w# 1 sw Y   y xY w)Nr   z"fsspec is available, skipping testzI`fsspec` is required to handle `fsspec+<filesystem>://` and `hf://` URIs.r   r   )	r   rI   r   r   reescaperJ   r   r   )r   rS   s     r    1test_fsspec_uri_raises_if_fsspec_is_not_availabler     st    : 	89
))SUC	{#	. 9
789 9  9 9s   A* A9*	A65A69Bc                 P   t        j                  g       }t         j                  j                  g |      }| dz  }t	        j
                  ||       t	        j                  |      }t        j                  t              5  |j                  d       d d d        y # 1 sw Y   y xY w)N)r   zempty_file.parquetr   )rm   )r   r   r   from_batchesr   rb   r   rI   rJ   rg   rv   )rQ   r   empty_tableparquet_file_pathr   s        r    (test_iter_batches_raises_batch_size_zeror     s    YYr]F((''6':K"66NN; 12>>"34L	z	" 0!!Q!/0 0 0s    BB%).r   rF   r   rN   r   rI   unittestr   pyarrowr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   pandaspdpandas.testingtestingr   r   mark
pytestmarkr!   r3   r:   r=   r?   rB   rU   rl   parametrizer   r   r   r   s3r   r   r   r   r   r   r   r    r"   r    <module>r      sA  $ 
 	 	 
     9< [[  
 8 8& 2 2, 8 80 2 2( 8 8, 5 5 

( '89 : . v.( / (V t}5# 6 #: M2 3 *=,	
8 $$ 90m  	B  NBs"   H H HH	H*)H*