
    bi                    '   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlZd dlZd dlZd dlmZ d dlZd dlmZ d dlZd dlmZmZ d dlmZ d d	lmZm Z  d d
l!m"Z" 	 d dl#Z$ ee$jJ                        Z&	 d dl(m)Z* e* Z+dZ,ejZ                  j]                  e+e,      Z/dZ0dZ1 ejd                   e3e0      D  cg c]
  } | dz  dz   c}  e3e0      D  cg c]  } | dz  	 c} d ejh                   e3e0      D  cg c]  } d| z  	 c} d            Z5 ejl                  e5e1      Z7 ejp                   ejr                  de/g      g      Z:e:d        Z;e:d        Z<e:d        Z=e/d         Z>ejZ                  j]                  e$d!      d"        Z?d# Z@d$ ZAejZ                  j                  d%dd&g      d'        ZCejZ                  j                  d(dd&g      d)        ZDd* ZEd+ ZFd, ZGd- ZHd. ZId/ ZJd0 ZKd1 ZLd2 ZMd3 ZNd4 ZOd5 ZPd6 ZQd7 ZRd8 ZSe/d9        ZTe/d:        ZUd; ZVejZ                  j                  d<dd&g      d=        ZWd> ZXe/d?        ZYd@ ZZe/ejZ                  j                  dAdd&g      ejZ                  j                  dBdd&g      dC                      Z[dD Z\dE Z]ejZ                  j                  d<dd&g      dF        Z^ejZ                  j                  d<dd&g      ejZ                  j                  dG ej                  dHdIdJK       ej                  dLM      f ej                  d dNdL      dOfg      dP               ZbdQ ZcejZ                  j                  d<dd&g      dR        Zde/ejZ                  j                  dS               Zfe/dT        ZgdU ZhdV ZiejZ                  j                  dW ejd                  dXg dYi      i i f ejd                  dXg dZi      i i f ejd                  dXg d[i      i i f ejd                  dXg d\i      d]e$rdX e$j                         indii f ejd                  dX ej                  g d^      i      i d_dXgif ejd                  dX ej                  g d`      i      i d_dXgif ejd                  dX el emej                  g da            i      i i f ejd                  dXg dai      j                  db      i i f ejd                  dXg dYi      j                  db      i i f ejd                  dXg dYi      j                  dc      i i f ejd                  dXg dYi      j                  dd      i i f ejd                  dXg dai      j                  de      i i f ejd                  dXg dai      j                  df      i i f ejd                  dXg dai      j                  dg      i i f ejd                  dXg dYi      j                  dh      i i f ejd                  dXg dYi      j                  di      i i f ejd                  dXg djig dY      i i f ejd                  dXg dki ejh                  g dldm            i i f ejd                  g dlg dYd      i i f ejd                  g dlg dYddndXgo      i i f ejd                  dg dYi      i i f ejd                  dXg dpi      i i f ejd                  dqg dri      i i f ejd                  dsg dri      i i f ejd                  dtg dri      i i fg      ejZ                  j                  du               Zpdv ZqejZ                  j                  dw        Zsdx ZtejZ                  j                  dyd&dg      dz        Zue/ejZ                  j                  d{               Zve/ejZ                  j                  d(dd&g      ejZ                  j                  d]d|d}g      d~                      Zwe/d        Zxe/d        Zyd Zzd Z{e/ejZ                  j                  dddgg      d               Z|d Z}d Z~d Ze/d        Zd Ze/d        ZejZ                  j                  dddg      d        Ze/ejZ                  j                  ddd&g      d               Zd Zd Zd ZejZ                  j                  dg d      d        ZejZ                  j                  dg d      d        Z ejp                  ddddddddddgdgddddddddddddgdgdddddiddddgddddddddddddgdgddg      d        Ze/d        Zd Zd Zd Zd Ze/d        Zd Zd Ze/ejZ                  j                  d ejd                  dXg di       ejd                  dXg dZi       ejd                  dXg d[i       ejr                   ejd                  dX ej                  g d^      i             ejr                   ejd                  dX ej                  g d`      i             ejd                  dX el emej                  g d            i       ejd                  dX el emej                  g da            i       ejd                  dXg dai      j                  db       ejd                  dXg dYi      j                  dc       ejd                  dXg dYi      j                  dd       ejd                  dXg dYi      j                  dh       ejd                  dXg dYi      j                  di       ejd                  dXg djig dY       ejd                  dXg di ejh                  g ddm             ejd                  g dlg dYd       ejd                  g dlg dYddndXgo       ejd                  dg dYi       ejd                  dXg dpi       ejd                  dqg dri       ejd                  dsg dri       ejd                  dtg dri      g      d               Zd Zd Zd Zd ZejZ                  j                  dd&dg      ejZ                  j                  dd&dg      d               Zd ZejZ                  j                  d]d|dg      d        Zd Zd Ze/d        Ze/ejZ                  j                  ddLdg      ejZ                  j                  dd&dg      dÄ                      Ze/ejZ                  j                  dddg      dƄ               Ze/ejZ                  j                  d ejr                  dgdɄ dejZ                  j                        dgd̄ dfg      ejZ                  j                  dd&dg      d΄                      Ze/ejZ                  j                  dd&dg      dτ               Ze/dЄ        Zdф Zd҄ Zdӄ Ze/ejZ                  j                  dd&dg      ejZ                  j                  dddg      ejZ                  j                  dddg      dل                             Ze/ejZ                  j                  dddg      d܄               Ze/ejZ                  j                  dd&dg      ejZ                  j                  dg dݢ      dބ                      Ze/ejZ                  j                  dd&dg      ejZ                  j                  dg dߢ      d                      Zd ZejZ                  j                  ddd&g      d        Ze/d        Ze/d        Ze/d        ZejZ                  j_                  edd&      ZejZ                  j                  dddg ejr                  g de      g      d        Ze/ejZ                  j                  ddg ejr                  ddge      g      d               Ze/ed               Zd Zd Zd Zd Zd Zd Ze/d        Ze/ejZ                  j                  dd&dg      d               Ze/ejZ                  j                  dddg      d               Ze/d        Ze/ejZ                  j                  dd&dg      d               Zd Ze/d        Ze/d        Ze/ejZ                  j                  dddg      d               Zd ZÐd  ZĐd ZŐd ZejZ                  j_                  ej                  dk(  d      d        ZȐd Zɐd Ze/d        Ze/d	        Z̐d
 Z͐d ZejZ                  j                  dg d      d        ZejZ                  j                  dd&dg      ejZ                  j                  ddd g      d               Ze/ejZ                  j                  dd      d               Zѐd ZҐd ZӐd Ze/d        ZՐd Z֐d Ze/d        ZejZ                  j                  ddLhdLgdfd      d        Zِd ZejZ                  j                  dd gd ggd!d gd!d ggfd"      d#        ZejZ                  j                  d$        Ze/ejZ                  j                  ejZ                  j                  ejZ                  j]                  e&j                  dk  d%      d&                             ZejZ                  j                  d'      e/d(               Zd) Zd* Ze/ejZ                  j                  d+d&dg      d,               Ze/d-        Ze/ejZ                  j                  d.d/dg      d0               Ze/d1        Ze/d2        Ze/ejZ                  j                  d3dd4ggg      d5               Ze/d6        Ze/d7        Zd8 Zy# e'$ r dZ$ ed      Z&Y ?w xY w# e'$ r dZ*Y Fw xY wc c} w c c} w c c} w (9      )annotationsN)date)Decimal)	MagicMock)Version)PANDAS_GE_202PANDAS_GE_300)
get_engine)	assert_eqpyarrow_strings_enabled)natural_sort_keyF0zpyarrow not foundreason(            g      @xy
   myindexnameindexnpartitionspyarrow)marks)paramsc                    | j                   S Nparamrequests    _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/io/tests/test_parquet.pyenginer*   >       ==    c                    | j                   S r$   r%   r'   s    r)   write_enginer.   C   r+   r,   c                    | j                   S r$   r%   r'   s    r)   read_enginer0   H   r+   r,   c                 L    ddl m}  t        d      | u sJ t        d      | u sJ y )Nr   ArrowDatasetEngineautor    )dask.dataframe.io.parquet.arrowr3   r
   r2   s    r)   test_get_engine_pyarrowr6   M   s-    Bf!3333i $6666r,   zpyarrow are installedc                 z    t        j                  t        d      5  t        d       d d d        y # 1 sw Y   y xY w)Nz`pyarrow` not installedmatchr4   )pytestraisesRuntimeErrorr
    r,   r)   test_get_engine_no_enginer>   U   s.    	|+D	E 6  s   1:c                 D    ddl m}   G d d|       }t        |      |u sJ y)zTest extensibility by dask_cudfr   )Enginec                      e Zd Zy)-test_get_engine_third_party.<locals>.MyEngineN)__name__
__module____qualname__r=   r,   r)   MyEnginerB   _   s    r,   rF   N)dask.dataframe.io.parquet.utilsr@   r
   )r@   rF   s     r)   test_get_engine_third_partyrH   [   s&    66  h8+++r,   c                 v    t        j                  t              5  t        d       d d d        y # 1 sw Y   y xY w)N{   )r:   r;   
ValueErrorr
   r=   r,   r)   test_get_engine_invalidrL   e   s*    	z	" 3  s   /8has_metadataTc           	     @   t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        t        j                  dt        j                        t        j                  j                  g dd      j                  d      d      }t        j                  |d      }|rd	d
ini } |j                  |fd|d| t        j                  |      }d|v |k(  sJ d|v |k(  sJ d|v sJ t        j                   |d|      }	t#        |	j$                        dkD  sJ |	j'                  d      j)                         }
|j*                  D ]  }||   |
|   k(  j-                         rJ  y )N  dtypehelloyopeoplesizeOi32i64fbhello  	chunksizewrite_metadata_fileTFwrite_indexr*   _common_metadata	_metadatapart.0.parquetr   r*      sync	scheduler)strpd	DataFramenparangeint32int64float64randomchoiceastypeddfrom_pandas
to_parquetoslistdirread_parquetlen	divisionscomputereset_indexcolumnsall)tmpdirr.   r0   rM   tmpdatadfkwargsfilesdf2outcolumns               r)   
test_localr   j   sj   
f+C<<99T299T24rzz2ii&&'@t&LSS		
	D 
	,B.:#T*FBMM#H5HHJJsOE%'L8885 \111u$$$
//#U;
?Cs}}!!!
+++
'
3
3
5C** 3VF+002223r,   r   c                    t        |       }t        j                  g dg dd      d d }|r|j                  dd      }t	        j
                  |d	      }|j                  |||d
       t	        j                  ||      }t        ||       y )Nabr      r      r   r   r   r   Tdrop   r   rc   r*   ra   r*   	rl   rm   rn   	set_indexrw   rx   ry   r|   r   )r   r.   r0   r   fnr   ddfread_dfs           r)   
test_emptyr      sy    	VB	O)<	=bq	AB\\#D\)
..
+CNN25SWNXoob5Gc7r,   c                   t        |       }t        j                  g dg dd      }|j                  dd      }t	        j
                  |d      }|j                  ||	       t	        j                  |dg|d
      }t        ||       y )Nr   r   r   r   Tr   r   r   r   r   r*   calculate_divisionsr   )r   r.   r0   r   r   r   r   s          r)   test_simpler      su    	VB	O)<	=B	c	%B
..
+CNN2lN+oo
3%G c7r,   c                   t        |       }t        j                  g dg dd      }|j                  dd      }t	        j
                  |d      }|j                  ||d	d	
      j                          t        j                  |      }d|vsJ t	        j                  t        j                  j                  |d      dg|d      }t        ||       y )Nr   r   r   r   Tr   r   r   F)r*   r   ra   re   	*.parquetr   )rl   rm   rn   r   rw   rx   ry   r   rz   r{   r|   pathjoinr   )r   r.   r0   r   r   r   r   r   s           r)   test_delayed_no_metadatar      s    	VB	O)<	=B	c	%B
..
+CNN
<E  giJJrNEe###oo
R%e 	G c7r,   c                   t        |       }t        j                  ||       t        j                  j                  t        j                  j                  |d            r3t        j                  t        j                  j                  |d             t        j                  |      }d|vsJ t        j                  t        j                  j                  |d      |dd      }t        t        |       y )Nr   re   r   r   Tr*   r   r   rl   r   ry   rz   r   existsr   unlinkr{   rw   r|   r   )r   r.   r0   tmp_pathr   ddf2s         r)   test_read_globr      s    6{HNN8LN1	ww~~bggll8[9:
		"'',,x56JJx Ee###??
X{+ 	D c4r,   c                    t        |       }t        j                  |d|       t        j                  ||dd      }t        t        |dd       y )NFrb   r   check_indexcheck_divisionsrl   r   ry   rw   r|   r   r   r.   r0   r   r   s        r)   test_calculate_divisions_falser      sG    6{HNN8|ND??!	D c4UEBr,   c                     t                t        j                   |       t         fdt	        j
                         D        t              }t        j                  ||dd      }t        t        |       y )Nr   c              3     K   | ]5  }|j                  d       s"t        j                  j                  |       7 yw)re   N)endswithrz   r   r   ).0r\   r   s     r)   	<genexpr>z!test_read_list.<locals>.<genexpr>   s4      	
::k* GGLL#	
s   ;>)keyr   Tr   )
rl   r   ry   sortedrz   r{   r   rw   r|   r   )r   r.   r0   r   r   s   `    r)   test_read_listr      sf    [FNN6,N/	
ZZ'	

 E ??kD c4r,   c                   t        |       }t        j                  ||       t        t	        j
                  |g |d      t        g           t        t	        j
                  |g |d      t        g    j                         d       t        t	        j
                  |dg|d      t        dg          t        t	        j
                  |dg|d      t        dg   j                         d       y )Nr   Tr   r*   r   Fr   r   rl   r   ry   r   rw   r|   clear_divisionsr   r.   r0   r   s       r)   test_columns_auto_indexr      s    	VBNN2lN+ 
B{PTUB 
B{PUVB! 
kt	
 	SE
	 
ku	
 	SE
""$r,   c           	        t        |       }t        j                  ||       t        t	        j
                  |g |dd      t        g           t        t	        j
                  |g |dd      t        g    j                         d       t        t	        j
                  |ddg|d      t        dg          t        t	        j
                  |ddg|d      t        dg   j                         d       t        t	        j
                  |ddd	g|d      t               t        t	        j
                  |ddd	g|d      t        j                         d       y )
Nr   r   T)r   r*   r   r   Fr   r   r   r   r*   r   r   r   r   s       r)   test_columns_indexr     s[   	VBNN2lN+ 
 $	
 	B	 
 %	
 	B!
 
E $	
 	SE
	 
E %	
 	SE
""$
 
#J $	
 		 
#J %	
 	
r,   c                   t        |       }t        j                  ||       t        j                  t
        t        f      5  t        j                  |dg|       d d d        t        j                  t        t        f      5  t        j                  |dgt        t        j                        z   |       d d d        y # 1 sw Y   exY w# 1 sw Y   y xY w)Nr   nonsenser   r*   )rl   r   ry   r:   r;   rK   KeyErrorrw   r|   	Exceptionlistr   r   r*   r   s      r)   test_nonsense_columnr   a  s    	VBNN2fN%	
H-	. A
ZL@A		8,	- U
ZL43D$DVTU UA AU Us   C 4C C	Cc           	     t   t        |       }t        j                  ||       t        j                         }t	        t        j                  |d|d      |dd       t	        t        j                  |dddg|d      |ddg   dd       t	        t        j                  |dd	dg|d      |d	dg   dd       y )
Nr   FTr   r   r   r   r   r   )rl   r   ry   r   r   rw   r|   )r   r.   r0   r   r   s        r)   test_columns_no_indexr   j  s    	VBNN2lN+??D
 
%QUV	 
#J $	
 	c3Z 
$ $	
 	ir,   c                    t        |       }t        j                  ||d       t        j                  ||d      }|j
                  j                  J |j                  rJ y )NFr*   rc   r*   r   )rl   r   ry   rw   r|   r   r   known_divisions)r   r.   r0   r   r   s        r)   !test_calculate_divisions_no_indexr     sU    	VBNN2lN>	Ku	=B88==   !!!!!r,   c           	     v   t         j                  j                  t        |       d      }t        j
                  j                  t        j                  d      t        j                  d      dz   gddg      }t	        j                  t        j                  j                  dd      dd	g|
      }|j                  d      }t        j                  t        j                   j#                  |j                         d      |       t%        j&                  |||j(                        }t+        ||       t%        j&                  |d||j(                        }t+        ||d          t%        j&                  |dd	gddg|      }t+        ||j-                  dd	g      ddg          t%        j&                  |d|      }t+        ||       t%        j&                  |d	gdg|      }t+        ||j-                  d      d	g          t%        j&                  |dd	gdg|      }t+        ||j-                  d      dd	g          t%        j&                  |ddgdg|      }t+        ||j-                  d      ddg          t%        j&                  |ddd	g|      }t+        ||dd	g          dD ]=  }t%        j&                  ||ddg|      }t+        ||j-                  |      ddg          ? dD ]E  }t/        j0                  t2        t4        f      5  t%        j&                  ||ddg|      }d d d        G dd|j-                  d      fdd	|fdd|dg   fdd|j-                  d      dg   fdd	|j-                  d      ffD ].  \  }}	}
t%        j&                  |||	|      }t+        ||
|	          0 y # 1 sw Y   xY w)Ntest.parquetr   rh   x0x1namesr   r   r   )r   r   Fr   preserve_indexr   )r   r*   r   )r   r   r*   rg   )r   r   r*   )r   r   )r   r   )rz   r   r   rl   rm   
MultiIndexfrom_arraysro   rp   rn   rt   randnr   pqwrite_tablepaTablerx   rw   r|   r   r   r   r:   r;   rK   r   )r   r*   r   r   r   r   r   dindcolsol_dfs              r)   #test_columns_index_with_multi_indexr     sZ   	c&k>	2BMM%%	2		")*4, & E 
biioob!,sCj	NB
..e.
$CNN288''(8'OQST
//"V5;;
?Cc2
CekkJAaC
3*tTl6RAaSz*D$<89 	%7Aa
SE#vFAas#SE*+
S#JtfVLAat$c3Z01 	T3KvfMAat$dC[12 	%$VLAadC[!" 8OOBedC[P!S]]5)4+678
  U]]J12 	U%$VTA	U 	UU 
tS]]4()	S	c4&k"	dCMM#&v./	c3==%& "S& OOBc3vF!VC[!"		U 	Us   N//N8	c                    t        |       }t        j                  g dg dd      }t        j                  |d      }|j                  ||       t        j                  ||      }t        ||d       y )	Nrh   r      r   r   r   r   r   Fr   )rl   rm   rn   rw   rx   ry   r|   r   )r   r.   r0   r   r   r   r   s          r)   test_no_indexr     sZ    	VB	II6	7B
..
+CNN2lN+??2k2Db$E*r,   c                   t        |       }t        j                  ||       t        j                  |dgd|d      }t        t        dg   |       t        j                  |dd|d      }t        t        j                  |       y )Nr   r   r   T)r   r   r*   r   )rl   r   ry   rw   r|   r   r   )r   r*   r   r   s       r)   test_read_seriesr     sq    	VBNN2fN%??
SE6tD c3%j$??
CyTD ceeTr,   c                   t        |       }t        j                  |       fd}t         ||      j                        t         ||      j                        k(  sJ t         ||      j                        t         ||dg      j                        k7  sJ t         ||d      j                        t         ||dg      j                        k(  sJ y )Nr   c                4    t        j                  | fdi|S )Nr*   rw   r|   )r   r   r*   s     r)   readztest_names.<locals>.read  s    r;&;F;;r,   r   r   r   )rl   r   ry   setdask)r   r*   r   r   s    `  r)   
test_namesr     s    	VBNN2fN%< tBx}}T"X]]!3333tBx}}T"se%<%A%A!BBBBtB',,-T"se5L5Q5Q1RRRRr,   c                    t        | j                  d            }t        j                         }d|j                  _        |j                  ||       t        j                  |d|      }t        ||       y )Nr   r   r   rg   
rl   r   r   copyr   r   ry   rw   r|   r   )r   r.   r0   r   dfpr   s         r)   test_roundtrip_from_pandasr    sW    	V[[(	)B
'')CCIINNN2lN+
//"GK
@Cc3r,   c                
   t        j                  t        j                  ddt         j                  ddgd      t        j                  dt         j                  dddgd	      t        j                  d
ddt         j                  dgd      t        j                  ddddt         j                  gd      d      }t	        j
                  |d      }|j                  |        t	        j                  |       }t        ||       y)zsTest round-tripping nullable extension dtypes. Parquet engines will
    typically add dtype metadata for this.
    rh   r   r   r   Int64rP   TFboolean皙?皙?333333?皙?Float64r   r   cr   stringr   r   r
  r   r   N)	rm   rn   SeriesNArw   rx   ry   r|   r   )r   r   r   r   s       r)   test_roundtrip_nullable_dtypesr    s    
 
Aq"%%A.g>D"%%e<INCc255#6iHCc36hG		

B ..
+CNN8??8$Db$r,   c                ,   t        j                  t        j                  ddt         j                  ddgd      t        j                  dt         j                  dddgd	      t        j                  d
ddt         j                  dgd      t        j                  ddddt         j                  gd      d      }t	        j
                  |d      }|j                  | |       t        j                         t        j                         i}t	        j                  | ddd|j                  i      }|j                  dt        j                         i      }|j                  j                  t        j                               |_        t        ||       y )Nrh   r   r   r   r  rP   TFr  r  r  r  r  r	  r   r   r
  r   r  r  r   r   r    numpy_nullabletypes_mapperr*   dtype_backendarrow_to_pandas)rm   rn   r  r  rw   rx   ry   r   rr   Float32Dtyper|   getrv   r   r   )r   r*   r   r   r  resultexpecteds          r)   *test_use_nullable_dtypes_with_types_mapperr    s2    
Aq"%%A.g>D"%%e<INCc255#6iHCc36hG		

B ..
+CNN8FN+ 	
BOO%L __&')9)9:	F yy#r012H^^**2??+<=HNfhr,   c                   t        |       }t        j                  dg ddz  id      }t        j                  |d      }t        j
                  |||       t        j                  |d|	      }|j                         j                  j                  j                  j                         g dk(  sJ t        j                  |dg|	      }|j                         j                  j                  j                  j                         g dk(  sJ t        j                  |g |	      }|j                  d d
 j                          |j                  |j                  j                         k(  j                         sJ y )Nr   r   r   r
  d   categoryrP   r   r   r   
categoriesr*   rO   )rl   rm   rn   rw   rx   ry   r|   r   r   catr   tolistlocr   r   r.   r0   r   r   r   r   s          r)   test_categoricalr%  7  s   
f+C	sOc12*	EB
..
+CMM#s<0??33{CD<<>**113FFF??3C5ED<<>**113FFF ??32kBDHHUdODDDFFNN$$))+++r,   metadata_filec           	        t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        t        j                  dt        j                        t        j                  j                  g dd      j                  d      d      }d|j                  _        t        |      dz  }t        j                  |j                   d	| d
      }t        j                  |j                   |d	 d
      }|j#                  |||       |r=t%        t        | j'                  d            d      5 }|j)                         }	d	d	d	       |j#                  |d|       |rDt%        t        | j'                  d            d      5 }|j)                         }
d	d	d	       
	k7  sJ t        j*                  ||      }t-        ||       y	# 1 sw Y   xY w# 1 sw Y   @xY w)5Test that appended parquet equal to the original one.rO   rP   rR   rV   rX   rY   r   r   Nr  r_   r*   ra   re   rbTappendr*   r   )rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   r   r   r}   rw   rx   ilocry   openr   r   r|   r   )r   r*   r&  r   r   halfddf1r   r\   	metadata1	metadata2ddf3s               r)   test_appendr4  J  s    f+C	99T299T24rzz2ii&&'@t&LSS		
	
B BHHMr7a<D>>"''%4.C8D>>"''$%.C8DOOCMOJ#fkk+./6 	!!I	!OOCVO4#fkk+./6 	!!I	!I%%%??3v.Db$	! 	!	! 	!s    G73H7H Hc           	        t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        t        j                  dt        j                        t        j                  j                  g dd      j                  d      d      }d|j                  _        t        |      dz  }t        j                  |j                   d	| d
      }t        j                  |j                   |d	 d
      }|j#                  |d|       |j#                  |d|       t        j$                  ||      }t'        ||       y	)r(  rO   rP   rR   rV   rX   rY   r   r   Nr  r_   Tr+  r   )rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   r   r   r}   rw   rx   r-  ry   r|   r   )r   r*   r   r   r/  r0  r   r3  s           r)   test_append_creater6  k  s   6{H	99T299T24rzz2ii&&'@t&LSS		
	
B BHHMr7a<D>>"''%4.C8D>>"''$%.C8DOOHT&O9OOHT&O9??8F3Db$r,   c           	     N   t        |       }t        j                  t        j                  ddd      t        j                  ddd      t        j                  ddd      d      }d	|j
                  _        t        j                  t        j                  ddd      t        j                  ddd      t        j                  d
dd      d      }d	|j
                  _        |d   j                  d      |d<   t        j                  |j                  |j
                  d   df<   |d   j                  d      |d<   t        j                  |d      }t        j                  |d      }t        j                  ||dgd       t        j                  ||dgddd       t        j                  |dd	d      j                         }|j                  j                  d      |d<   t!        |j#                  d      t        j$                  ||g      |j&                     d       y )Nr   r   rr   rP      r  n   )latlonvaluer   x      r:  r  rh   r   r;  r    partition_onr*   T)r@  r,  ignore_divisionsr*   r   r<  Fr   )rl   rm   rn   ro   rp   r   r   rv   nanr#  rw   rx   ry   r|   r   r;  r   sort_valuesconcatr   )r   r   df0df1dd_df0dd_df1r   s          r)   test_append_with_partitionrI    s   
f+C
,,99Q'299R73YYsCw7	
C CIIN
,,99R7399R73YYsCw7	
C CIIN U""7+CJ#%66CGGCIIaL% U""7+CJ^^CQ/F^^CQ/FMM&#UGIFMMW //IW$gi  (CJ "))S#J"7"DRWr,   c                   t        |       }t        j                  t        j                  j                  d      t        j                  j                  g dd      t        j                  j                  g dd      d      }t        j                  |d      }|j                  |dg|       t        j                  ||      }t        |j                  j                  j                        h d	k(  sJ y )
N2   r   r   zrV   r  r   r   r?  r   >   r   r   rM  rl   rm   rn   ro   rt   randru   rw   rx   ry   r|   r   r   r!  r   r   r*   r   r   r   s        r)   test_partition_on_catsrQ    s    
f+C
#!!/!;!!/!;	
	A 	q!ALLC5L8	V	,Brttxx""#666r,   metastatsc                   t        |       }t        j                  t        j                  j                  d      t        j                  j                  g dd      t        j                  j                  g dd      d      }t        j                  |d      }|j                  |dg|       t        j                  ||      }t        |j                  j                  j                        h d	k(  sJ y )
NrK  rL  rV   r  r   r   )r@  ra   r   >   r   r   rM  rN  )r   rS  rR  r   r   r   s         r)   test_partition_on_cats_pyarrowrV    s     f+C
#!!/!;!!/!;	
	A 	q!ALLC5dLC	%	8Brttxx""#666r,   c                   t        |       }t        j                  t        j                  j                  d      t        j                  j                  g dd      t        j                  j                  g dd      d      }t        j                  |d      }|j                  |dg|d       t        j                  ||d	d
      }t        |j                  j                  j                        h dk(  sJ y )NrK  rL  rV   r  r   r   F)r@  r*   ra   Trh   r*   r   metadata_task_size>   r   r   rM  rN  rP  s        r)    test_partition_parallel_metadatarZ    s     f+C
#!!/!;!!/!;	
	A 	q!ALLC5ULS	F
B rttxx""#666r,   c                P   t        |       }t        j                  t        j                  j                  d      t        j                  j                  g dd      t        j                  j                  g dd      d      }t        j                  |d      }|j                  |ddg|       t        j                  ||	      }t        |j                  j                  j                        h d
k(  sJ t        |j                  j                  j                        h d
k(  sJ t        j                  |ddg|      }t        |j                  j                  j                        h d
k(  sJ d|j                   vsJ t#        ||j%                                t        j                  |d|      }t        |j&                  j                        h d
k(  sJ d|j                   vsJ t        j                  |d|      }t        |j                  j                        h d
k(  sJ y )NrK  rL  rV   r  r   r   r
  r?  r   >   r   r   rM  r   r   rg   )rl   rm   rn   ro   rt   rO  ru   rw   rx   ry   r|   r   r   r!  r   r
  r   r   r   r   rP  s        r)   test_partition_on_cats_2r\    s   
f+C
#!!/!;!!/!;	
	A 	q!ALLC:fL=	V	,Brttxx""#666rttxx""#666	sCj	@Brttxx""#666bjj   b"**,	C	7Brxx""#666bjj   	c&	9Brvv  !_444r,   c           	        t        | j                  d            }t        j                  t	        j
                  dt        j                        t	        j
                  dt        j                        t	        j
                  dt        j                        t        j                  j                  g dd      j                  d      d      }t        |      dz  }t        j                  |j                  d	| d
      }t        j                  |j                  |d	 d
      }|j!                  |||       t#        j$                  t&              5 }|j!                  |dd|       d	d	d	       dt        j(                        v sJ t        | j                  d            }|j!                  |d||       |j!                  |dd|       t        j*                  |d|      }	t-        |j/                  d      |	       y	# 1 sw Y   xY w)z#Test append with write_index=False.ztmp1.parquetrO   rP   rR   rV   rX   rY   r   Nr  r_   r)  FT)rc   r,  r*   Appended columnsztmp2.parquetr   r\   rg   )rl   r   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   r}   rw   rx   r-  ry   r:   r;   rK   r<  r|   r   r   )
r   r*   r&  r   r   r/  r0  r   excinfor3  s
             r)   test_append_wo_indexr`    s    fkk.)
*C	99T299T24rzz2ii&&'@t&LSS		
	
B r7a<D>>"''%4.C8D>>"''$%.C8DOOCMOJ	z	" LgtFKLW]]!3333
fkk.)
*COOv=   	OOCU4OG??3c&9Dbll3&L Ls   H  H	)r   offsetz
2022-01-01z
2022-01-31D)freqrh   )daysr^   i  c                :   t        |       }t        j                  t        j                  t        |      t        j                        t        j                  t        |      t        j                        t        j                  t        |      t        j                        t        j                  j                  g dt        |            j                  d      d|      }t        j                  |d      }t        j                  |j                  |j                  |z         d      }|j!                  |||	       t#        j$                  t&        d
      5  |j!                  ||d       ddd       |j!                  ||dd       y# 1 sw Y   xY w)z1Test raising of error when divisions overlapping.rP   rR   rV   rX   rY   r   r  r_   r)  z)overlap with previously written divisionsr8   Tr*   r,  N)r*   r,  rA  )rl   rm   rn   ro   rp   r}   rq   rr   rs   rt   ru   rv   rw   rx   r   r   ry   r:   r;   rK   )	r   r*   r&  r   ra  r   r   r0  r   s	            r)   !test_append_overlapping_divisionsrg  %  s"    f+C	99SZrxx899SZrxx83u:RZZ8ii&&)E
 ' fSk	
 

B >>",D>>",,rxx&'89SIDOOCMOJ	z)T	U 9F489 	OOCtdOK9 9s   FFc                   t        |       }t        j                  t        j                  d      t        j                  dd      dt        j                  ddd            }t        j                  |dd	      }t        j                  t        j                  dd      t        j                  dd
      d      }|j                  j                  |j                  j                        |_        t        j                  |d      }|j                  ||d       |j                  ||d       t        j                  ||      }t        j                  ||g      }t        ||       y )Nr     r   r   r   r   F)r   sort,  r   Tr)  rf  r   )rl   rm   rn   ro   rp   rw   rx   r   rv   rQ   ry   r|   rD  r   )	r   r*   r   rF  r0  r   r   ressols	            r)   6test_append_known_divisions_to_unknown_divisions_worksro  K  s   
f+C
,,iin299S##67ryyaQS?TC >>#159D
,,RYYsC0ryyc7JK
LC 		  1CI>>#1-D 	OOCDOAOOCtO4
//#f
-C
))S#J
Cc3r,   c                   t        |       }t        j                  dt        j                  dt        j
                        i      }t        j                  dt        j                  dt        j                        i      }t        j                  dt        j                  dt        j                        i      }t        j                  |d      }t        j                  |d      }t        j                  |d      }	|j                  |||       t        j                  t              5 }
|j                  ||d	       d
d
d
       dt        
j                        v sJ t        j                  t              5 }
|	j                  ||d	       d
d
d
       dt        |
j                        v sJ y
# 1 sw Y   rxY w# 1 sw Y   /xY w)z-Test raising of error when non equal columns.rZ   r  rP   r[   r   r_   r)  Trf  Nr^  zAppended dtypes)rl   rm   rn   ro   rp   rq   rr   rw   rx   ry   r:   r;   rK   r<  )r   r*   r&  r   rF  r   df3r0  r   r3  r_  s              r)   test_append_different_columnsrr  g  sM    f+C
,,ryyBHH=>
?C
,,ryyBHH=>
?C
,,ryyBHH=>
?C>>#+D>>#+D>>#+DOOCMOJ	z	" 9gF489W]]!3333	z	" 9gF489GMM 22229 99 9s   ,F2;F>2F;>Gc                .   t        |       }t        j                  dd      }t        j                  dt	        t        |            D cg c]  }d|i c}i|      }t        j                  |d      }dt        j                  dt        j                         fg      i}|j                  |d|	       |j                  |d|d
       t        j                  |      }t        j                  ||g      }|j                         }	t        ||	       yc c}w )z-See: https://github.com/dask/dask/issues/7492z
2020-01-01
2021-01-01r<  r   r   rh   r   T)r,  schema)r,  ru  rA  N)rl   rm   
date_rangern   ranger}   rw   rx   r   structrq   ry   r|   rD  r   r   )
r   r   dtsr   r   r0  ru  r   expectr  s
             r)   test_append_dict_columnr{  ~  s     f+C
--l
3C		U3s8_5C856
B >>"!,Dryy3
"3!456F 	OOCVO4OOCVdOK ??3D YYBx F\\^Fff# 6s   D
c                   t         j                  j                  t        j                  dddddddddg      t        j                  g d      gt        j                  dt        j
                  dt        j                         fd	t        j                         fg      fd
t        j                         fg            }t        |       dz   }t        j                  ||       |j                         }t        t        j                  |      |       t        t        j                  |d
ddgfg      ||d
   dk7     d       t        t        j                  |d
ddgfg      ||d
   dk(     d       y )Nr      )	subfield1	subfield2r8     )aabbr  nested_columnr~  r  idru  z	file.parqnot inr  filtersFr   in)r   r   r   arrayru  rx  rq   r  rl   r   r   	to_pandasr   rw   r|   )r   tabler   pdfs       r)   test_filter_with_struct_columnr    sY    HH  HH"$26"$26"$26 HH'(		
 yy $IIRXXZ8;
:STU ryy{#
 ! E* 
V{	"BNN5"
//
Cboob!3'
dHtf%=$>?CI
 
dD4&%9$:;CIr,   c                :   t        |       }t        j                  g dg dg ddt        j                  g dd      g d	      }t	        j
                  |d
      }t	        j                  |||       t	        j                  |d|      }t        ||d       y )Nr   )r   r8  r  )r  ri  rl  r  )rj  r   r   r
  r   r   )r   r   r   r   r   rg   Fr   )	rl   rm   rn   Indexrw   rx   ry   r|   r   r$  s          r)   test_orderingr    st    
f+C	lAhh|)4
B
 ..
+CMM#s<0??3iDDc4/r,   c                   t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        d      }t        j                  |d      }|j                  ||       t        j                  |ddg|d	
      }t        |ddg   |d       t        j                  t        j                  j                  |d            }t        j                  |dg|      j!                         }|j#                  dd	       t        |dg   |dd       t        j                  |ddg|d	
      }t        |ddg   |d       y )NrO   rP   rZ   r\   rK  r_   r   rZ   r\   Tr   Fr   r   r   )inplacer   )rl   rm   rn   ro   rp   rq   rs   rw   rx   ry   r|   r   globrz   r   r   r   rC  )r   r*   r   r   r   r   fnsrq  s           r)    test_read_parquet_custom_columnsr    s#   
f+C<<		$bhh/biiBJJ6WXD 
	+BMM#fM%
//eS\&dC b%7
))BGGLLk2
3C
//#wv
>
F
F
HCOOE4O(b%k3E5I
//c5\&dC b#u7r,   zdf,write_kwargs,read_kwargsr   )r   r   rh   r  )ccr   bbb)   a   b   cru  )r   r   r   r   )rh   r   rh   )i  i  rO   M8[ns]M8[us]M8[ms]zdatetime64[ns]zdatetime64[ns, UTC]zdatetime64[ns, CET]uint16float32)r   rh   r   )r   rh   r   r   foor   r   )r   r   N-)      @       @N. c                P   d|v r4|j                   j                  dk(  r|dk(  rt        j                  d       d|v rL|j                   j                  dk(  s|j                   j                  dk(  r|dk(  rt        j                  d       t	        |       }|j
                  j                  d	|j
                  _        t        j                  |d
      }t        j                  ||fd|i| t        j                  |f|j
                  j                  |dd|}t        ||d       y )Nr   r  r    z7Parquet pyarrow v1 doesn't support nanosecond precisionr   r  r  z,https://github.com/apache/arrow/issues/15079r   r   r   r*   Tr   Fr   )r   rQ   r:   xfailrl   r   r   rw   rx   ry   r|   r   )r   r   write_kwargsread_kwargsr*   r   r   r   s           r)   test_roundtripr    s    P byRTTZZ8+)0CUV bybddjjH,

h0FYLLGH
f+C	xx}}
..
+CMM#s:6:\:??88==TMXD c4/r,   c                   t        |       }t        j                  g dt        d      d      }t        j
                  } |ddi      5  t        j                  |d      }|j                  j                  d      |d	<   d d d        j                  ||
       t        j                  |d	g|d      }t        j                  ||d      }t        ||       t        j                  t              5  |j                  j                   j"                   d d d        t%        |j                  j'                         j                   j"                        h dk(  sJ |j)                  d       j'                         }|j+                         g dk(  sJ t        j                  t,              xs t        j.                  t0              5  t        j                  |dg|       d d d        y # 1 sw Y   ixY w# 1 sw Y   xY w# 1 sw Y   y xY w)N)rh   r   r   r   r   caaabr   dataframe.convert-stringFr   r   r  r   r   T)r   r*   r   r*   r   >   r   r   r
  c                ^    | j                   j                  j                  j                         S r$   )r   r!  r   rC  r   s    r)   <lambda>z!test_categories.<locals>.<lambda>:  s    QSSWW-?-?-K-K-M r,   )r   r
  r   r   r  r  )rl   rm   rn   r   
contextlibnullcontextrw   rx   r   rv   ry   r|   r   r:   r;   NotImplementedErrorr!  r   r   r   map_partitionsr"  rK   warnsFutureWarning)	r   r*   r   r   ctxr   r   r3  cats_sets	            r)   test_categoriesr  %  s   	VB	O$w-@	AB

 
 C	(%0	1 ,nnRQ/55<<
+C, NN2fN%??
uVD
 ??2f$GDdD	*	+ 

tvv~~##../?BBB""#MNVVXH?? 4444	z	"	Afll=&A ?
wv>? ?', , ? ?s$   	6G&1!G3G?&G03G<?Hc                F   t        |       } t        j                  g dg ddg d      }t        j                  |d      }|j                  dg	      }|j                  | |
       t        j                  | |
      }t        |j                  |j                  d       y )Nr   )r   r   r   AB)r   r   r   )r   r   rh   r   r  r   r   Fr   )
rl   rm   rn   rw   rx   
categorizery   r|   r   r   r   r*   r   r   r   s        r)   test_categories_unnamed_indexr  B  s}    
 [F	?3?
B ..
+C
..#.
'CNN6&N)??6&1DciiU;r,   c                v   t        |       }t        j                  t        d      t        d      d      }t	        j
                  |d      }||j                  dk     }|j                  ||       t	        j                  ||d      }|j                  dk  sJ |j                         }t        ||dd	       y )
Nr   r   r   r   r   Tr  F)check_namesr   )rl   rm   rn   rw  rw   rx   r   ry   r|   r   r   r   )r   r*   r   r   r   r   r3  rn  s           r)   test_empty_partitionr  U  s    	VB	E"IE"I6	7B
..
+Csuuz?DOOBvO& ??2f$GDa
,,.Cc4U>r,   write_metadatac                   t        |       }t        j                  j                         }d|j                  _        t        j                  |d      }|j                  |||       t        j                  ||d      }t        ||       y )Nr  r   r   r)  Tr  )
rl   rw   _compatmakeTimeDataFramer   r   rx   ry   r|   r   )r   r*   r  r   r   r   r   s          r)   test_timestamp_indexr  e  sf    	VB		%	%	'BBHHM
..
+CNN2f.NI??2f$GDc4r,   c           
     d   g dddgt         j                  t         j                  g}g dddgd d g}ddt         j                  t         j                  g}g d}t        j                  dd	      }||t        j                  t        j                  g}|j                         |j                         t        j                  d
      t        j                  d
      g}d}t        j                  dd|      }	|	|	t        j                  t        j                  g}
|	j                  d       j                         |	j                  d       j                         t        j                  d
      t        j                  d
      g}t        j                  g d||||
d      }t        j                  |d      }t        j                  dt        j                  t        j                               fdt        j                         fdt        j                   d      fdt        j                   d|      fdt        j                         fg      }|j#                  t%        |       d|       t        j&                  t%        |       d      j)                         j+                  d      }|j,                  j.                  }t1        t3        |            D ]1  }t        j4                  ||   ||         r J t7        ||                 |j8                  j.                  }t1        t3        |            D ]B  }t        j:                  ||         rt        j:                  ||         r4J ||   ||   k(  rBJ  |j<                  j.                  }t1        t3        |            D ]B  }t        j:                  ||         rt        j:                  ||         r4J ||   ||   k(  rBJ  t        j4                  |j>                  j.                  |      sJ t        j4                  |j@                  |j@                        sJ y )N)r   rh   r   r   r   r   r   )r   r   NNiˌ4ZsunitNaTz
US/Eastern)r  tz)r   r   rh   rh   )partition_columnarraysstringststamps
tz_tstampsr   r   r  r  r  nsr  r  )r@  ru  FrU  Tr   )!ro   rB  rm   	Timestampr  to_datetime64
datetime64
tz_convertrn   rw   rx   r   ru  list_rr   r  	timestampry   rl   r|   r   r   r  valuesrw  r}   array_equaltyper  isnatr  r  r  )r   	in_arrays
out_arrays
in_stringsout_stringststamp
in_tstampsout_tstampstimezone	tz_tstampin_tz_tstampsout_tz_tstampsr   r   ru  ddf_after_writearrays_after_writeitstamps_after_writetz_tstamps_after_writes                       r)   Stest_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual_schemar  p  s    QFBFFBFF3IaVT40JsBFFBFF+J(K\\*3/F&"&&"&&1J 	
e
eK HZch?I	266266:M 	T"002T"002
e
eN 
 ,!!'	

B ..
+CYYrxx
+,		$T*+2<<h78,	
F NN3v;-?NO
F?		$	  )//663r7^ Y~~03Z]CXT*UV-EXXCY *11883r7^ <88'*+88KN+++&q)[^;;;< -77>>3r7^ B88*1-.88N1-...)!,q0AAAAB >>/1188+FFF >>/::B<O<OPPPr,   infercomplexc                   |dk(  r)t        j                         t        j                         d}t        |       } t	        j
                  g dt	        j                  g dt        sd nd      g ddt        d	d
            }|r't        j                  |d      j                  d      }nt        j                  |d      }|j                  | |       t        j                  | d      }t        ||       y )Nr  )r   amount)123r  r  r  4)
2017-01-01r  r  
2017-01-02r  z
2017-01-06z
2017-01-09msr  )r  ri  rl    r^   X  i  )r   r   r  r      r   r   r   r   r  TrU  )r   r  rr   rl   rm   rn   to_datetimer	   rw  rw   rx   r   ry   r|   r   )r   r   ru  r   df_outs        r)   test_pyarrow_schema_inferencer    s     99;"((*=[F	8NN "/TD :	
  Arl#
B& ^^BA.88A^^BA.MM&M(__V>Fb&r,   c                ^   t        j                  g dg dd      }t        j                  g dg dd      }t        j                  t	        j
                  |      t	        j
                  |      g|d      }t        j                  t              5 }|j                  t        |              d d d        t        j                        }d|v sJ d	t        |j                        v sJ t        rd
t        |j                        v sJ y dt        |j                        v sJ y # 1 sw Y   wxY w)Nr   g      @r   r   r   r   r  FrR  verify_metaz6Failed to convert partition to expected pyarrow schemaz	y: doublezy: large_stringz	y: string)rm   rn   rw   from_delayedr   delayedr:   r;   rK   ry   rl   r<  r	   )r   rF  r   r   recmsgs         r)   "test_pyarrow_schema_mismatch_errorr    s    
,,Y[9
:C
,,Y_=
>C
//	c	DLL-.SeC 
z	" $cs6{#$ cii.CCsJJJ#cii.((( C		N222c#))n,,,$ $s   D##D,c                   t        j                  g dg dd      }t        j                  g dg dd      }t        j                  t	        j
                  |      t	        j
                  |      g|d      }|j                  t        |       d        t        j                  |       }t        j                  ||g      }t        ||d	       y )
Nr   r  r   r   r  Fr  r  )check_dtype)rm   rn   rw   r
  r   r  ry   rl   r|   rD  r   )r   rF  r   r   rm  rn  s         r)   1test_pyarrow_schema_mismatch_explicit_schema_noner    s    
,,Y[9
:C
,,Y_=
>C
//	c	DLL-.SeC NN3v;tN,
//&
!C
))S#J
C c3E*r,   c           
         t        |       } t        j                  t        j                  j                  g dd      t        j                  j                  g dd      t        j                  j	                  d      t        j                  j                  ddd      t        j                  dd      d      }t        j                  |d	
      }|j                  | ddg|       t        j                  | |dd      j                         }|j                  j                         D ]O  }t        |j                   |j                  |k(           t        |j                   |j                  |k(           k(  rOJ  t        j                  | |ddg      j                         }|j"                  j                         D ]O  }t        |j                   |j"                  |k(           t        |j                   |j"                  |k(           k(  rOJ  y )Nr  r  Cr  rV   XYZrh   r   r   )a1a2r   r
  r   r   r   r  r  r?  Fr   r   )r*   r   )rl   rm   rn   ro   rt   ru   randintrp   rw   rx   ry   r|   r   r  uniquer   r   r  )r   r*   r   r   r   vals         r)   test_partition_onr    s   [F	))""?"=))""?"=!!s!+""1ac"21c"	

B 	rq)ALLtTl6LB //vUgi  uu||~ D244%&#ceeCFFcM.B*CCCCD //&#t
E
M
M
OCuu||~ D244%&#ceeCFFcM.B*CCCCDr,   c                X   t        |       } t        j                  t        j                  j                  g dd      t        j                  j                  g dd      t        j                  j	                  d      d      }t        j                  |d      }t        d      D ]  }|j                  | dd	g|
        t        j                  | |      j                         }t        |      t        |      k(  sJ t        j                  |       D ]  \  }}}|D ]  }|dv rJ   y )Nr  r  rV   r  )r  r  r   r   r   r  r  r?  r   )rf   zpart.1.parquetrd   re   )rl   rm   rn   ro   rt   ru   rw   rx   rw  ry   r|   r   r}   rz   walk)r   r*   r   r   _r   r   files           r)   test_partition_on_duplicatesr#  ,  s   [F	))""?"=))""?"=II$$#$.	

B 	rq)A1X G	V4,vFG //&
0
8
8
:Cr7c#hwwv 1e 	D    	r,   r@  r  c           
        t        |       } t        j                  j                  d      5  t        |       } t	        j
                  t        j                  j                  g dd      t        j                  j                  d      t        j                  j                  ddd      d      }t        j                  |d	
      }|j                  | |d       t        j                  | dd      }d d d        j                         }j                  j!                         D ]O  }t        |j"                  |j                  |k(           t        |j"                  |j                  |k(           k(  rOJ  y # 1 sw Y   xY w)Nzsingle-threadedrj   r  r  rV   rh   r   )r  r  r  r   r   F)r@  rc   )r   r   )rl   r   configr   rm   rn   ro   rt   ru   r  rw   rx   ry   r|   r   r  r  r  )r   r@  r   r   r   r  s         r)   test_partition_on_stringr&  H  s2    [F	#4	5 NV\\ii&&S&Aii&&C&0ii''13'7
 NN21-	V,EJoofEuMN ++-Cuu||~ F255#&'3svvcffm/D+EEEEFN Ns   CE88Fc                L   t        |       } g d}t        j                  g dt        j                  ||d      d      }t	        j
                  |d      j                  d      }|j                  | d	|
       t	        j                  | d|dgd      }t        |      dk(  sJ y )N)z
2018-01-01
2018-01-02z
2018-01-03z
2018-01-04)rh   rh   rh   rh   T)r   ordered)dummyDatePartr   r   r*  r+  r?  )r+  <=r(  )r   r*   r  r   r   )
rl   rm   rn   Categoricalrw   rx   r   ry   r|   r}   )r   r.   r0   catsdftestddftestddftest_reads          r)   test_filters_categoricalr2  ^  s    [FCD\\!tdK	
F nnV3==gFGvJ|L??12 L |!!!r,   c                   t        |       }t        j                  t        d      t	        d      d      }t        j                  |d      }|j                  dk(  sJ |j                  ||d       t        j                  ||dg	      }|j                  d
k(  sJ |j                  d
kD  j                         j                         sJ t        j                  ||dg	      }|j                  dk(  sJ |j                  dk(  j                         j                         sJ t        j                  ||ddg	      }|j                  dk  sJ t        |      rJ t        ||       t        j                  ||ddgddgg	      }	|	j                  d
k(  sJ |	j                  dkD  |	j                  dk  z  j                         j                         sJ t        j                  ||dg	      }
|
j                  dk(  sJ |
j                  dk  |
j                  dkD  z  j                         j                         sJ t        j                  ||dg	      }|j                  dk(  sJ t        |      sJ |j                  dk(  j                         j                         sJ t        j                  ||dg	      }|j                  dk(  sJ y )Nr   
aabbccddeer   r   r   Tr)  )r   >r   r*   r  r   )r   ==r
  rh   r
  )r   r5  r   )r   r5  rh   )r   <r   r   r5  r   )r   r8     r:  )r   r  )r   	   r   r   )r   =r
  )r   !=rh   )rl   rm   rn   rw  r   rw   rx   r   ry   r|   r   r   r   r   r}   r   )r   r.   r0   r   r   r   r   r   r
  r   er\   gs                r)   test_filtersr@  s  ss   6{H	E"ID,>?	@B
..
+C??aNN8LdNK
}oNA==ACC!G==?""$$$
?O>PQA==ACC3J%%'''
/?.O	A ==A1v:aO
 M*M*
	A ==ASS1Wq!&&(00222
?R>STA==ASS1Wq!&&(00222
>OPA==Aq6M6CC3J%%'''
~>NOA==Ar,   c                   t        |       }t        j                  dg di      }t        j                  |d      }|j                  dd      j                  |d|       t        j                  |d|d	g
      j                         }t        j                  |d|dg
      j                         }t        |||d   dk(     d       t        |||d   dk(     d       |j                  dd      j                  ||       t        j                  ||      j                         }t        ||       |j                  dd      j                  ||       t        j                  ||d	g      j                         }t        j                  ||dg      j                         }t        |      dkD  sJ t        |      dkD  sJ t        ||       y )Nat)abr  badar  rh   r   T)r   forceFrb   )rB  r7  r  r   r*   r  )rB  r<  r  r  r   r   r   r6  r   )rl   rm   rn   rw   rx   repartitionry   r|   r   r   r}   )r   r.   r0   r   r   r   r   r3  s           r)   test_filters_v0rI    s   	VB	t;<	=B
..
+C OOO.99
l :  ??
%6H5Igi 	 ??
%6G5Hgi 	
 dCD	T)*>dCD	T)*> OOO.99"\9R??2k2::<DdC OOO.99"\9R??
;);(<gi 	 ??
;):(;gi 	 t9q==t9q==dDr,   c                   t        |       }t        j                  t        d      ddgdz  d      }t	        j
                  |d      }|j                  |d|d	
       d}d}dd|fdd|fgg}t	        j                  |dd|      }d}	||d   |k     j                  D ]*  }
|	t        t        |
j                               dkD        z  }	, |j                  |	k(  sJ ||d   |k     }||d   |k(     }t        ||j                         d       y )Nr  r!  dogrK  )r  r  r   r   FTr   r   r  r8  r  r7  r    rG  r   r   )rl   rm   rn   rw  rw   rx   ry   r|   
partitionsintr}   r   r   r   )r   r*   r   r   r   aa_limbb_valr  r   nonemptyparts              r)   test_filtering_pyarrow_datasetrR    s!   	VB	U3Zu~/BC	DB
..
,CNN25TNR FFsF#dD&%9:;G??2U9gND HCI&'22 1CDLLN+a/001x''' 
BtHv	B	BtH	Bb$,,.e4r,   c                   t        j                  t        d      t        d      d      }t	        j
                  |d      }|j                  t        |       |       t        | j                  d            }t	        j                  |d|d	g
      }|j                  dk(  sJ t        ||d   dkD     |j                         d       t	        j                  t        | j                  d            d|d	g
      }t        |      dk(  sJ t        j                  t        j                  j                  | d      |      t!        |j"                           j                  t        j                  j                  | d      |       t	        j                  t        | j                  d            d|d	g
      }|j                  dk(  sJ t        ||d   dkD     |d       y )Nr   r4  r   r   r   r   r   Tr9  )r   r*   r  r   r   Fr   rf   r   zpart.4.parquet)rm   rn   rw  r   rw   rx   ry   rl   r   r|   r   r   r   r}   rz   r   reversedr   )r   r*   r   r   r   ddf_outr   r3  s           r)   test_filters_file_listrV    s   	E"ID,>?	@B
..
+CNN3v;vN.K()Eoo4G !###bC1ow0eD ??FKK()* 	D t9>> OOBGGLL)9:6Jjf&67jG??FKK$% 	D q   bC1ot7r,   c                   t        j                  g dt        d      d      j                  d      }|j                  d d j                  t        | j                  d            d       |j                  dd  j                  t        | j                  d	            d       t        j                  t        |       d
ddg      }|j                  dk(  sJ t        j                  t        |       dddg      }|j                  dk(  sJ y )N)r   rh   r   r}  r   r   r:  r;  r:  r   r   r   zfile.0.parquetr   row_group_sizezfile.1.parquetFT)r   r,  r   )split_row_groupsr   r  )r   r   r   )rm   rn   rw  r   r-  ry   rl   r   rw   r|   r~   )r   r   r   s      r)   test_pyarrow_filter_divisionsr[    s     
6U1XF	G	Q	QRU	VBGGBQK3v{{+;<=aPGGABK3v{{+;<=aP
 //F  	C ==I%%%
//F  	C ==I%%%r,   rk   threads	processesc                   t        |       } t        j                  g dg dd      }d|j                  _        t        j                  |d      }|j                  | d|      }t        |d	      sJ |j                  |
       t        j                  j                  |       sJ t        j                  | |d      }t        ||d       y )Nrh   r   r   r   )g      ?r  r  g      @r   r   r   r   F)r   r*   r   rj   Tr  r   )rl   rm   rn   r   r   rw   rx   ry   hasattrr   rz   r   r   r|   r   )r   rk   r*   r   r   r<  r   s          r)   test_to_parquet_lazyra  $  s    [F	L/CD	EBBHHM
..
+CNN65N@E5&!!!	MMIM&77>>&!!!??6&dKDc4/r,   r   c                   ddl m} t               }|j                  |d|       t        j                  | |       |j                  t        |             }|j                  sJ |j                  j                  d   |k(  sJ y )Nr   )LocalFileSysteminvalidate_cache)r   )fsspec.implementations.localrc  r   setattrr   ry   _strip_protocolrl   called	call_argsargs)r   monkeypatchr   rc  rd  r   s         r)   &test_to_parquet_calls_invalidate_cacherl  5  su     = {);=MNNN67N+**3v;7D""""%%**1-555r,   c                   t        |       }t        j                  t        j                  t        j
                  j                  g dd      d      t        j                  t        t        dd            d      t        j                  t        t        dd            d      d	      }t        j                  |d
      }|j                  ||       t        j                  |dg|      }t        |j                        dgk(  sJ t        j                  ||      }t        |j                        t        |      k(  sJ y )N)r   r   r
  r   r>  r\   r  rV   r  rP   r   rM  float)r   intsfloatsrh   r   ro  r   )rl   rm   rn   r  ro   rt   ru   r   rw  rw   rx   ry   r|   r   )r   r*   r   r   r   rddfs         r)   test_parquet_select_catsrr  B  s    	VB	))		  !?c J  IId5C=1?iiU1c] 37C	
	
B ..Q
CNN2fN%??2x?D&)))??2f-Db)))r,   c                <   t        |       }t        j                  dddgit        j                  ddgd            }d	|j                  _        t        j                  |d      }|j                  ||
       t        j                  ||dg      }t        ||       y )Nr  rh   r   r   r   idxr   r   colsr   r   )rl   rm   rn   r  r   r   rw   rx   ry   r|   r   )r   r*   r   r   r   r  s         r)   test_columns_namerv  W  s{    6{H	sQFm288S#JU+K	LBBJJO
..Q
CNN8FN+__XfUGDFfbr,   c                @   | dk(  sJ t         j                  j                  t        j                  j                  |d            }|j                  j                  }t        |j                        D ]  }|j                  |      }t        t        |            D ]  }|j                  |      }||j                  |j                  k(  r0J |}	|dk(  rd}	|	j                         |j                   j                         k(  sJ |j                  |j                  k7  rJ   y )Nr    re   defaultsnappy)r   parquetread_metadatarz   r   r   ru  r   rw  num_row_groups	row_groupr}   r   total_compressed_sizetotal_uncompressed_sizelowercompression)
r*   filenamer  metadatar   r  r}  jr   compress_expects
             r)   check_compressionr  b  s
   Yzz''X{(KLHOO!!E8**+ V&&q)	s5z" 		VA%%a(F"33v7U7UUUU"-)+&.O&,,.&2D2D2J2J2LLLL33v7U7UUUU		VVr,   zcompression,)Ngzipry  c                :   t        |       }t        j                  g ddz  g ddz  d      }d|j                  _        t        j                  |d      }|j                  |||d	       t        j                  ||d
      }t        ||       t        |||       y )Nr  r   r   r   r   r   r   T)r  r*   ra   r  )rl   rm   rn   r   r   rw   rx   ry   r|   r   r  )r   r  r*   r   r   r   r   s          r)   %test_writing_parquet_with_compressionr  t  s    	VB	Ob0y2~F	GBBHHM
..
+CNN2;vSWNX
//"V
FCc3fb+.r,   c                    t        |       }t        j                  g ddz  g ddz  d      }d|j                  _        t        j                  |d      }|j                  |||dgd	
       t        |||       y )Nr  r   r   r   r   r   r   r   T)r  r*   r@  ra   )	rl   rm   rn   r   r   rw   rx   ry   r  )r   r  r*   r   r   r   s         r)   6test_writing_parquet_with_partition_on_and_compressionr    sr    	VB	Ob0y2~F	GBBHHM
..
+CNN
U    fb+.r,   rt  rr   )r  r   
numpy_typepandas_typer  z0.21.0)r   index_columnspandas_versionencodingzUTF-8objectunicode)
field_namer  r   r  r  __index_level_0__)column_indexesr   r  r  c                    | j                   S r$   r%   r'   s    r)   pandas_metadatar    s    P ==r,   c                h   t        |       dz   }t        j                  j                  t        j                  g d      t        j                  g d      gddg      }t        j                  ||       t        j                  ||      }t        j                  g dg dd      }t        ||       y )	Nz
table.parqr   )r   r   r   r  r  r   r   r  )rl   r   r   r   r  r   r   rw   r|   rm   rn   r   )r   r*   r   r  r  r  s         r)   test_read_no_metadatar    s     f+
$CHH  	)	bhhy123* ! E NN5#__S0F||))<=Hfhr,   c           	     &   t        |       }t        j                  j                  |d      }t        j                  j                  |d      }t	        j
                  t        j                  j                  g dd      t        j                  j                  d      t        j                  j                  ddd      d      }d	|j                  _        t        j                  |d
      }ddd ddi} |j                  |fd|i||    t        j                  ||d      }t!        ||d       t"        j$                  j'                  d      5   |j                  |f|dgd||    d d d        t        j                  ||      j)                         }|j*                  j-                         D ]O  }	t'        |j.                  |j*                  |	k(           t'        |j.                  |j*                  |	k(           k(  rOJ  y # 1 sw Y   xY w)Nnormalpartitionedr  r  rV   rh   r   r  r   r   r   r    ry  T)r  coerce_timestampsuse_dictionaryr*   r  r   ri   rj   r   r*   r@  r   )rl   rz   r   r   rm   rn   ro   rt   ru   r  r   r   rw   rx   ry   r|   r   r   r%  r   r   r   r  r   )
r   r*   r   path1path2r   r   r  r   r  s
             r)    test_writing_parquet_with_kwargsr    s   	VBGGLLX&EGGLL]+E	!!/!<!!s!+""1ac"2	

B BHHM
..
+C 	#!%"
L CNN5@@<+?@
//%D
ICc3D) 
6	* YuXV3%X<PVCWXY
//%
/
7
7
9Ctt{{} B244$%SUU355C<-@)AAAABY Ys   HHc                    t        |       }t        j                  t              5  t        j                  ||d       d d d        y # 1 sw Y   y xY w)Nunknown_value)r*   unknown_key)rl   r:   r;   	TypeErrorr   ry   r   s      r)   (test_writing_parquet_with_unknown_kwargsr    sB    	VB	y	! Gr&oFG G Gs   AAc                b   ddl m t        |       } dgfd}t        j                  g dg dd      }t        j                  |d	      }|j                  | |d
|i       d   sJ t        j                  t        j                  j                  | d      |      }t        ||d       y )Nr   )r  Fc                     dd<    | i |S )NTr   r=   )rj  r   flagmp_gets     r)   my_getz(test_to_parquet_with_get.<locals>.my_get  s    Qt&v&&r,   r  r_  r   r   r   rk   )r*   compute_kwargs*r   r   )dask.multiprocessingr  rl   rm   rn   rw   rx   ry   r|   rz   r   r   r   )r   r*   r  r   r   r  r  r  s         @@r)   test_to_parquet_with_getr    s    2[F7D' 
0|D	EB
..
+CNN6&+v9NNO7N7__RWW\\&#6vFFfbe,r,   c                    t        |       }d}t        j                  j                  dd|      j	                         dz   t        j                  j                  g d|      t        j                  j                  g d|      d}t        j                  t        j                  |      d	      }|j                  |d
d|ddg       t        j                  ||      }||j                  dk(     j                          y )Nr8  r   r  rV   rK  r  )rb  EF)signal1fake_categorical1fake_categorical2r   ry  Fr  r  )r  rc   r*   r@  r   r  )rl   ro   rt   r  cumsumru   rw   rx   rm   rn   ry   r|   r  r   )r   r*   r   rW   r   r   df_partitioneds          r)   test_select_partitioned_columnr  -  s    	VBD99##As#6==?"DYY--oD-IYY--oD-I	A
 
Q	+BMM
)+>?   __R7N>33s:;CCEr,   c                   t        |       }t        j                  t        j                  d      d      j	                  t        j
                        t        j                  dd      j	                  t        j                        t        j                  dd      j	                  t        j                        t        j                  dd      j	                  t        j                        d}t        j                  |      }t        j                  |d      }|j                  |d	d
       t        j                  |d	      }|j	                  dt        j                  i      j                          y )Nr   r   rj  r   r  r  )pr   r
  r   r   Fr  )rc   r@  r   r   )rl   ro   repeatrp   rv   int8int16r  rs   rm   rn   rw   rx   ry   r|   r   )r   r   r   r  r   s        r)   test_arrow_partitioningr  B  s     v;DYYryy|Q'..rww7YYr1$$RXX.YYr1$$RZZ0YYr1$$RZZ0	D ,,t
C
..!
,CNN4UN=
//$e
,CJJRZZ !))+r,   c                     t        j                  t        d      5  t        j                  dd       d d d        y # 1 sw Y   y xY w)NzUnsupported engine: \"foo\"r8   r  r   )r:   r;   rK   rw   r|   r=   r,   r)   test_informative_error_messagesr  U  s4    	z)G	H -
e,- - -s	   =Ac                   t        |       }t        j                  dg di      }|d   j                  d      |d<   t	        j
                  |d      }t	        j                  |||       t	        j                  ||dd|       t	        j                  ||      j                         }|d   j                         g dd	z  k(  sJ y )
Nr   )r   r   r   r   r   r  rh   r   r   T)r,  rA  r*   r   )
rl   rm   rn   rv   rw   rx   ry   r|   r   r"  )r   r*   r   r   r   r   s         r)   test_append_cat_fpr  Z  s    v;D	s56	7BgnnZ(BsG
..
+CMM#tF+MM#tD4O
V,446AS6==?7!;;;;r,   r   r   r   r   rh   r   r   )i- i @B )rh   r   r   r   r   r   c                   t        |       }|j                  j                  sd|j                  _        t        j                  |d      }t        j
                  ||d       t        j                  |d      }t        ||       y )Nr   r   r   T)rc   rU  )rl   r   r   rw   rx   ry   r|   r   )r   r   r   r   r   s        r)   test_roundtrip_arrowr  h  s^    @ 6{H88==
..
+CMM#xT2??8>Dc4r,   c                    t        |       }t        j                  j                  ddd      j	                         }|j                  ||       t        j                  ||d      }t        ||       y )N
2000-01-01z
2000-01-101Dstartendrc  r   Tr  )	rl   r   datasets
timeseriespersistry   rw   r|   r   )r   r*   r   r   r   s        r)   test_datasets_timeseriesr    sc    6{H		!	!4 
" 
gi  MM(6M*
//(6t
LCb#r,   c                0   dd l }t        j                  dg di      }d|j                  _        t        j                  |d      }|j                  t        |             }|j                  ||       t        j                  ||d	      }t        ||       y )
Nr   r   r  r   r   r   r   Tr  )pathlibrm   rn   r   r   rw   rx   Pathrl   ry   r|   r   )r   r*   r  r   r   r   r   s          r)   test_pathlib_pathr    sr    	s./	0BBHHM
..
+C<<F$DNN4N'??4DIDc4r,   c                    t        |       }t        j                  ||       t        j                  t
        j                  j                  |d      |d      }t        t        |d       y )Nr   r   Fr  r   )	rl   r   ry   rw   r|   rz   r   r   r   r   s        r)   test_read_glob_no_metar    sO    6{HNN8LN1??
X{+!D
 c4/r,   c                b   t        |       }t        j                  ||d       t        j                  t        j
                  j                  |d            }|j                  t        j
                  j                  |d             t        j                  ||d      }t        t        |d       y )NTr)  r   re   Fr  r   )rl   r   ry   r  rz   r   r   r,  rw   r|   r   )r   r.   r0   r   pathsr   s         r)   test_read_glob_yes_metar    ss    6{HNN8LdNKIIbggll8[9:E	LLh45??5%PDc4/r,   r~   remove_commonc                   t        |       }t        j                  ||d       t        j                  j                  t        j                  j                  |d            r3t        j                  t        j                  j                  |d             t        j                  |      }d|vsJ |rpt        j                  j                  t        j                  j                  |d            r3t        j                  t        j                  j                  |d             t        j                  |||      }t        t        ||       y )NTr)  re   rd   r  r   r   )r   r.   r0   r~   r  r   r   r   s           r)   test_read_dir_nometar    s     6{HNN8LdNK	ww~~bggll8[9:
		"'',,x56JJx Ee###X?Q(RS
		"'',,x);<=??8KYWDc43r,   c                    t        |       }t        j                  ||d       t        j                  ||d      }t        t        |       y )NFr)  Tr  r   r   s        r)   test_statistics_nometar    s9    6{HNN8LeNL??8KTRDc4r,   c                &   t        | j                  d            }t        j                  j	                  |dd      }t
        j                  j                  ddd      j                         j                  d       }|j                  d      j                         j                         }|j                  j                  |j                  dk(  d       |_        |j                  ||d	|
       t!        j"                  ||      }t%        ||d	d	       y )Nr   z../r  z
2000-01-031hr  c                     | j                   d d S )Nr   )r#  r   s    r)   r  z1test_timeseries_nulls_in_schema.<locals>.<lambda>  s    !%%) r,   r   F)r*   ra   ru  r   r   r   )rl   mkdirrz   r   r   r   r  r  r   r  r   r  r   wherer  ry   rw   r|   r   )r   r*   ru  r   r   ddf_reads         r)   test_timeseries_nulls_in_schemar    s     6<<()Hww||HeW5H 	  |D Q		+	, 	
 >>#**,446D		, >EDI 	OOHVvOVx7HheGr,   c                   dd l }t        |       }t        j                  j	                  dddd      }|j                  ||       t        j                  ||      }t        |j                  |j                                     dk  sJ y )	Nr   r  z
2000-01-0260sr  )r  r  rc  partition_freqr   ia  )picklerl   r   r  r  ry   rw   r|   r}   dumps__dask_graph__)r   r*   r  r   r0  r   s         r)   test_graph_size_pyarrowr    sw    	VB==##5 $ D 	OOBvO&??2f-Dv||D//123e;;;r,   c                T   t        j                  dgdz  dgdz  dgdz  dgdz  d      }t        j                  |d      }t        j
                  j                  t        |             }|j                  ||       t        j                  ||      d   }t        j                  ||      d	g   }t        j                  ||      d	d
g   }t        j                  |||      \  }}	}
t        j                  |||d      \  }}}t        ||       t        |	|       t        |
|       y )Nrh   r  r   r   r   r  r  r  rb  r   r  r  r  F)optimize_graph)rm   rn   rw   rx   rz   r   r   rl   ry   r|   r   r   r   )r   r*   r   r   r   r   r   r
  r  r  a3b1b2b3s                 r)   test_getitem_optimization_multir    s    	QC#IQC#IQC#IQCRUIV	WB
..Q
C	c&k	"BNN2fN%
6*3/A
6*C51A
6*C:6AaA&JBBaAe<JBBb"b"b"r,   c                <   t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        d      }d|j                  _        t        |      dz  }t        j                  |j                  d| d      j                  |dd	
       t        j                  ||d      }|j                  dk(  sJ t        j                  ||dd      }|j                  dk(  sJ t        j                  |j                  |d d      j                  |ddd       t        j                  ||dd      }|j                  dk(  sJ t        j                  ||dd      }|j                  dk(  sJ y)z(Test split_row_groups read_parquet kwarg   rP   r  r   r   Nr   r    r  r*   rY  Tr*   rZ  r   F)r*   r   rZ  rK  r,  r*   rY  r}  )rl   rm   rn   ro   rp   rq   rs   r   r   r}   rw   rx   r-  ry   r|   r   )r   r*   r   r   r/  r3  s         r)   test_split_row_groupsr    so    f+C			#RXX.RYYs"**5UV
B BHHMr7a<DNN2775D>q1<<Ic =  ??3vEDq   ??FuD q   NN27745>q1<<D2 =  ?? 	D r!!!??FuD q   r,   rZ  r}  r   c                   t        |       }d}d}d}t        j                  t        j                  d|z  t        j
                        t        j                  d|z  t        j                        d      }t        |      dz  }	t        j                  |j                  d |	 |      j                  |d|	       t        j                  |j                  |	d  |      j                  |d
d|       t        j                  ||||      }
t        ||z        }|
j                  dt        j                   ||z        z  k(  sJ y )Nr   r   r   r   rP   r  r   r    r  Tr  )r*   rZ  r   )rl   rm   rn   ro   rp   rq   rs   r}   rw   rx   r-  ry   r|   rM  r   mathceil)r   rZ  r   r*   r   rY  r   	half_sizer   r/  r   expected_rg_couts               r)   test_split_row_groups_intr  4  s%    f+CNKI	99Q]"((;1y=

;	

B r7a<DNN2775D>{;FFIn G  NN27745>{;FFD> G  ??)/	D 9~56q499-=@P-P#QQQQQr,   r:     c                   d}d}t        j                  t        j                  |t        j                        t        j                  |t        j
                        d      }t        j                  |d      j                  t        |       d|d	       t        j                  t        |       ||d
      }t        j                  ||z  |z        }|j                  |k(  sJ t        |      |k(  sJ t        ||d       y )Nr   r  rP   r  r   r   r    F)r*   rY  rc   T)r*   rZ  aggregate_filesr   )rm   rn   ro   rp   rq   rs   rw   rx   ry   rl   r|   r  r  r   r}   r   )r   r*   rZ  rY  rW   r   r   npartitions_expecteds           r)   )test_split_row_groups_int_aggregate_filesr  U  s    
 ND	99T24rzz2	

B NN21%00FInRW 1  ??F)	D  99d^&;?O%OP3333t9b$E*r,   zfilters,op,length)r
  r=  r   c                    | | d   dk7     S Nr
  r   r=   r   s    r)   r  r  |  s    a##& r,      )r
  r7  r   c                    | | d   dk(     S r	  r=   r   s    r)   r  r    s    q33'7 r,   r   c                   | j                  d      }t        j                  dd gdz  d gdz  z   t        j                  d      j                         d gz   dd gdz  d gdz  z   d      }|j                  |d	d
       t        j                  ||||      }t         ||            |k(  sJ t         ||       ||      d       y )Nr   rh   r   r  r   r      r  r    r   r  )r*   r  rZ  Fr   )r   rm   rn   ro   rp   r"  ry   rw   r|   r}   r   )	r   r  oplengthrZ  r*   r   r   r  s	            r)   test_filter_nullsr  v  s     ;;~&D	TQ$!+2%%'4&0tqD6B;.	

B MM$yM<__)	F r&z?f$$$bj"R&e4r,   c                   | j                  d      }t        j                  ddd gdz  d gdz  z   i      j                  |d       t	        j
                  |ddt        j                  fg|      }t        |      dk(  sJ t        |d   j                         j                               sJ t	        j
                  |dd	t        j                  fg|      }|d   j                         j                         dgdz  k(  sJ y )
Nr   r   rh   r   r   rX  is)r  rZ  is not)r   rm   rn   ry   rw   r|   ro   rB  r}   r   r   isnar"  )r   rZ  r   result_isnaresult_notnas        r)   test_filter_isnar    s     ;;~&DLL#4y1}vz123>>tTV>W//tRVV$%)K
 {r!!!{3'')..0111??x())L
 $$&--/A37:::r,   c                ~   t        |       }t        j                  t        j                  dt        j
                        t        j                  dt        j                        d      }d|j                  _        d}dd|fg}t        j                  |d	      j                  |d
dd       t        j                  ||      }t        j                  ||d
d
|      }|d   |k(  j                         j                         sJ t        ||d   |k(     j                         ||d   |k(     j                                y )Nr  rP   r  r   r  r\   r7  r   r   Tr    rK  r  r   )r*   r   rZ  r  rZ   )rl   rm   rn   ro   rp   rq   rs   r   r   rw   rx   ry   r|   anyr   r   )r   r*   r   r   
search_valr  r   r3  s           r)   test_split_row_groups_filterr    s   
f+C			#RXX.RYYs"**5UV
B BHHMJT:&'GNN21%00D2 1  ??3v.D?? D K:%**,44666T%[J&'//1T%[J&'//1r,   c                H   t         j                  j                  | d      }t        j                  g dg dg dg ddg d      }|j                  ||	       t        j                  ||	      }|d
dg   j                  d      j                         j                          y )Npath.parquetr   r   r   rh   r   r   r   r   r   r   r  r  r   r   r   r   r   )rz   r   r   rm   rn   ry   rw   r|   rollingmaxr   )r   r*   r   r   r   s        r)   &test_optimize_getitem_and_nonblockwiser#    s}    77<</D	iiiH
B MM$vM&
//$v
.Cc
OA""$,,.r,   c                   t         j                  j                  | d      }t        j                  g dg dg dg ddg d      }|j                  ||	       t        j                  ||	      }|d
   j                  |d         j                         j                         }|d   j                  |d         j                         j                         }|d
dg   j                  d      j                         j                         }|j                  d      j                         j                         }t        j                  ||z   |z   |z         \  }	t        j                  |      d   d   t        j                  |      d   d   t        j                  |      d   d   t        j                  |      d   d   g}
t        |	|
      D ]  \  }}t!        ||        y )Nr  r  r  r   r   r  r  r   r   r   r
  r   r   r   )rz   r   r   rm   rn   ry   rw   r|   groupbyfirst
to_delayedr!  r"  r   r   zipr   )r   r*   r   r   r   df2adf2bdf2cdf2dr  r  r   r   s                r)   test_optimize_and_notr-    s   77<</D	iiiH
B MM$vM&
//$v
.Cs8CH%++-88:Ds8CH%++-88:DSz?""1%))+668D;;q>**,DTD[4/$67IV 	T1a T1a T1a T1a 	H FH% 1!Qr,   c                &   t        j                  t        j                  d      t        j                  d      d      }t        j                  |d      }|j                  | |d       t        j                  | |d	
      }t        ||d       y )NrM  rP   rn  r   rh   r   Tr)  adaptiver  Fr   )rm   rn   r  rw   rx   ry   r|   r   )r   r.   r0   r   r0  r   s         r)   test_split_adaptive_emptyr0    sn    	BIIE29QR	SB>>"!,DOOF<TOJ??#D
 dDe,r,   r  r   	blocksize   1MiBc           	        d}t        j                  t        j                  j	                  g d|      t        j                  j                  |      t        j                  j                  dd|      d      }t        j                  |d      }|j                  t        |       d	||d
       |r|nd}t        j                  t        |       d	|d|      }|dk(  r|j                  |j                  k  s+J |dk(  r$|r|j                  dk(  sJ |j                  dk(  sJ |rM|j                         j                  ddg      }	|j                  ddg      }t        |ddg   |	ddg   d
       y t        ||d
d
       y )Nr  applebananacarrotrV   rh   r   r  r;  r   r    F)r*   r@  ra   rc   Tr/  r*   r1  rZ  r  r2  r3  r   r   r
  r   r  rm   rn   ro   rt   ru   r  rw   rx   ry   rl   r|   r   r   rC  r   )
r   r1  r@  r  df_sizerF  r0  r  r   r   s
             r)   test_split_adaptive_filesr<    sv   
 G
,,!!"?g!N!!w!/""1ag"6	
C >>#1-DOOF!$   '3lO??F#'D D$"2"2222	f	##q(((##q((( lln((#s4oosCj)#sCj/3Sz?F$eGr,   r  r   c           
        d}ddg}d}t        j                  t        j                  j	                  g d|      t        j                  j	                  ddg|      t        j                  j                  |      t        j                  j                  d	d|      d
      }t        j                  |d      }|j                  t        |       d|d       t        j                  t        |       d|d|      }|dk(  r|j                  dk(  sJ |dk(  r|j                  dk(  sJ |j                         j                  ddg      }|j                  ddg      }t        |ddg   |ddg   d       y )Nr3  r   r   r  r5  rV   smalllargerh   r  r;  r   r    Fr*   r@  rc   r/  r9  r   r   r
  r   r   r:  )	r   r  r1  r@  r;  rF  r0  r   r   s	            r)   #test_split_adaptive_aggregate_filesrA  0	  sf    I:LG
,,!!"?g!N!!7G"47!C!!w!/""1c"8		
C >>#1-DOOF!	   ??F#'D #1$$$	C	1$$$ ,,.
$
$c3Z
0C
//3*
%Cc3*osC:EBr,   )N   r2  r3  c           	     D   d}d}d}t        j                  t        j                  j	                  g d|      t        j                  j                  |      t        j                  j                  dd|      t        j                  d|      d      j                  d	      }t        j                  ||
      }|j                  t        |       d||       |rt        |       }	nFt        |       }
t        j                  |
      }d|vsJ t        j                  j                  |
d      }	t        j                   |	||ddd	d      }t#        ||d       ||z  }|s|j$                  |j$                  k(  sJ y |j$                  |k  sJ |dk(  r|j$                  dk(  sJ y y )Nr   r  r   r5  rV   rh   r   r   r   r
  r   r   r   r    r*   rY  ra   re   r   r/  T)r*   r1  rZ  r   r   r  Fr   r3  )rm   rn   ro   rt   ru   r  rp   r   rw   rx   ry   rl   rz   r{   r   r   r|   r   r   )r   r1  r*   r  npartsr;  rY  r   r0  r   dirnamer   r   r|  s                 r)   test_split_adaptive_blocksizerH  Z	  s    FGN	!!"?g!N!!w!/""1ag"6YYq'*		

 i  >>"&1DOOF%$	   6{f+

7#%'''ww||G[1??# D dD%0.N4#3#3333 .000 ##q((( r,   )rx     rB  r3  c           	        d}d}d}t        j                  t        j                  j	                  g d|      t        j                  j                  |      t        j                  j                  dd|      t        j                  d|      d      j                  d	      }t        j                  ||
      }|j                  t        |       d||       |rt        |       }	nFt        |       }
t        j                  |
      }d|vsJ t        j                  j                  |
d      }	t        j                   |	||d	      }t#        ||       |dv r|j$                  |j$                  kD  sJ t        j                  j                  t        |       d      }|j                  ||       t'        |j$                        D ]  }t        j                  j                  |d| d      }t)        j*                  |      j,                  }t/        t'        |j0                        D cg c]  }|j3                  |      j4                   c}      }||k  rJ  y |j$                  |j$                  k(  sJ y c c}w )Nr   r  rh   r5  rV   r   r   rD  r   r   r    rE  re   r   )r*   r1  r   )rI  rB  r   r   part..parquet)rm   rn   ro   rt   ru   r  rp   r   rw   rx   ry   rl   rz   r{   r   r   r|   r   r   rw  r   ParquetFiler  sumr|  r}  total_byte_size)r   r1  r*   r  rF  r;  rY  r   r0  r   rG  r   r   outpathr  r   mdrgsizep0s                      r)   test_blocksizerT  	  s$    FGN	!!"?g!N!!w!/""1ag"6YYq'*		

 i  >>"&1DOOF%$	   6{f+

7#%'''ww||G[1 ??	D b$K$"2"2222'',,s6{E2/t''( 	'Agqc':;B#,,B<A"BSBS<TUbb!11UF Y&&&	' 4#3#3333 Vs   $"I3
c                    t        | j                  d            }t        j                         }d|j                  _        |j                  ||       t        j                  ||dddd      }t        ||       y )Nr   r   r   z10 kiBT)r*   r1  r   rZ  r   r   )r   r.   r0   r   r  r  s         r)   test_roundtrip_pandas_blocksizerV  	  sg    v{{>*+D
'')CCIINNN4N- H c8r,   c                   t        |       } t        j                  g ddz  t        j                  d      t        j
                  j                  ddgd      d      }t        j                  |d      }|j                  | d	d
g|       t        j                  | d	||dg      }|j                         }t        |      t        ||d   dk           k(  sJ |d   j                         dk  sJ y )Nr_  r      r!  rK  rV   )r  timert   r   Fr  )rc   r@  r*   )rY  r8  r   )r   r*   r   r  rY  r   )rl   rm   rn   ro   rp   rt   ru   rw   rx   ry   r|   r   r}   r"  )r   r.   r0   r   df_write	ddf_writer  df_reads           r)    test_filter_nonpartition_columnsr]  	  s     [F||"IIbMii&&u~B&?	
H xQ7IE|   /!"H  Gw<3wwv':;<<<<6? 1$$$r,   c           	     <   t        |       } t        j                  t        j                  t        j
                  g dd      t        j
                  g dd      d      d      }|j                  |        t        j                  | d	
      }t        ||d       y )N)rh   Nr   r  rP   )rK  r!  Nrl   r  rh   r   TrU  Fr   )	rl   rw   rx   rm   rn   r  ry   r|   r   )r   r0  r   s      r)   %test_pandas_metadata_nullable_pyarrowr_  	  sv    [F>>
XXl':XX2%@	
 D 	OOF??6t<DdDe,r,   c                   t        j                  t        j                  d            }t        j                  t
        d      5  t        j                  |j                  dz   |j                  dd      }d d d        j                  d      }t        j                  j                  t        j                  |      gdg	      }t        j                  j                  ||  d
d       t!        j"                  t%        |             j'                          ddlm}  G d d|      }t!        j"                  t%        |       |      j'                          y # 1 sw Y   xY w)Nrr   z!invalid value encountered in castr8   r   rB  )r  stopnumrQ   zdatetime64[ms]tsr   z/file.parquetF)use_deprecated_int96_timestampsr   r2   c                  H     e Zd Zedd       Ze	 	 d	 	 	 d fd       Z xZS )Mtest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClampc                &   g }|j                   D ]U  }t        j                  j                  |j                        r|j                  j
                  dv rdddd|j                  j
                     }|j                  }|j                  t        j                               j                         }t        j                  t        j                  d            }|j                  |j                  |z  dz   |j                  |z  d       t        j                  |t        j                               }|j                  |      }|j!                  |       E|j!                  |       X t        j"                  j%                  ||j&                  	      S )
zConstrain datetimes to be valid for pandas

            Since pandas works in ns precision and arrow / parquet defaults to ms
            precision we need to clamp our datetimes to something reasonable)r  r  usl    d(	 r  rO   rr   rh   T)r  upperr  r   )r   r   typesis_timestampr  r  castrr   r  ro   iinforQ   clipminr"  r  r,  r   r   column_names)	clsarrow_tablenew_columnsr   
multiplieroriginal_typeseriesinfo	new_arrays	            r)   clamp_arrow_datetimeszctest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClamp.clamp_arrow_datetimes&
  s5    K"** ,88((2HHMM%66'5Ye!T"J %(HHM(+(<(F(F(HF88BHHW$56DKK"hh*4q8"hh*4 $   
 !# <I )} =I&&y1&&s+-,0 88'';;S;S'TTr,   c                N    | j                  |      }t        |   ||f||d|S )N)r  convert_string)ry  super_arrow_table_to_pandas)rq  rr  r   r  r{  r   fixed_arrow_table	__class__s          r)   r}  zdtest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClamp._arrow_table_to_pandasH
  sE     !$ 9 9+ F71! ,-	
  r,   )rr  pa.Tablereturnr  )NF)rr  r  r  zpd.DataFrame)rC   rD   rE   classmethodry  r}  __classcell__)r  s   @r)   ArrowEngineWithTimestampClamprf  %
  sF    		U 
	UB 

  	!	 	 
	r,   r  r   )ro   rm  rQ   r:   r  RuntimeWarninglinspacero  r"  rv   r   r   r   r  rz  r   rw   r|   rl   r   r5   r3   )r   rw  arr_numeric	arr_datesr  r3   r  s          r)   &test_pandas_timestamp_overflow_pyarrowr  
  s   88BHHW%&D 
n,O	P 
kk((Q,TXX4w

 ""#34IHH  "((9"5!6tf EEJJ&'  
 OOCK ((*B3(: 3l OOCK(EFNNPM
 
s   1EEc                   t        j                  dt        j                  d      gi      }t        | j	                  d            }|j                  ||       ddi} t        j                  |      j                         j                  d	i |}t        j                  |d|      }t        ||       |j                  j                  |j                         j                  j                  k(  sJ y )
Nr  r  r   r   timestamp_as_objectTr    )r*   r  r=   )rm   rn   r  rl   r   ry   r   rM  r   r  rw   r|   r   r  rQ   r   )r   r*   r   r   r  rz  gots          r)   test_arrow_to_pandasr  ^
  s    
 
sR\\,789	:Bv{{>*+DMM$vM&,d3O2R^^D!&&(22E_EF
//$y/
RCfc55;;#++-///////r,   z,https://github.com/apache/arrow/issues/47177)	conditionr   strict
write_colsrQ  r   rQ  kindr   c                d   | j                  d       | j                  d       t        | j                  d            }t        | j                  d            }t        j                  j	                  |d      }t        j                  j	                  |d      }t        j                  ddt        d      d	      }t        j                  d
dt        d      d	      }||   }||   }|j                  |d       |j                  |d       t        |       }	t        j                  ||gd      }
|g d	k(  r&t        j                  |	|      }t        ||
d       y t        j                  t        t         j"                  f      5  t        j                  |	|       d d d        y # 1 sw Y   y xY w)Npart=apart=bzpart=a/kind=xzpart=b/kind=xdata.parquetr   r   r   r  r   Fr   Tignore_indexr   r   )r  rl   rz   r   r   rm   rn   rw  ry   rD  rw   r|   r   r:   r;   rK   r   ArrowTypeError)r   r*   r  path0r  _df1_df2rF  r   r   rz  r  s               r)   test_partitioned_column_overlapr  v
  sN    LL
LL_-.E_-.EGGLL/EGGLL/E<<c%(CDD<<c%(CDD
z
C
z
CNN5N&NN5N&v;DYYd|$7F,,f5&&e4 ]]J(9(9:; 	1OOD0	1 	1 	1s   F&&F/c                \   | j                  d      }| j                  d      }t        j                  j                  |d      }t        j                  j                  |d      }t	        j
                  dt        d      d      }t	        j
                  dt        d      d      }t        j                  j                  ||   d	      j                  i 
      }t        j                  ||       t        j                  j                  ||   d	      j                  i 
      }t        j                  ||       t	        j                  ||gd      }	t        j                  t!        |       |      }
|
d   j#                  d      |
d<   t%        |
t'        |	j(                           |	d       y )Nr  r  r  r   r   rQ  r   r   Fr   )r  Tr  r   rQ  r  r   )r  rz   r   r   rm   rn   rw  r   r   rx   replace_schema_metadatar   r   rD  rw   r|   rl   rv   r   r   r   )r   r*   r  r  r  r  r  t1t2rz  r  s              r)   #test_partitioned_no_pandas_metadatar  
  sb    LL"ELL"EGGLL/EGGLL/E
 <<U1X67D<<U1X67D			Z 
 
 r*  NN2u			Z 
 
 r*  NN2u YYd|$7F__S[8FF^**84F6NfT&..)*FFr,   c                   | j                  d      }| j                  d      }t        j                  j                  |d      }t        j                  j                  |d      }t	        j
                  dt        d      d      }t	        j
                  dt        d      d      }|j                  |       |j                  |       t	        j                  ||gd      }t        j                  t        |       dgt        |       d	
      }|d   j                  d      |d<   t        |t        |j                           |d       y )Nr   r   r  r   r  Tr  rQ  )partitioningpartition_base_dir)datasetr  Fr   )r  rz   r   r   rm   rn   rw  ry   rD  rw   r|   rl   rv   r   r   r   )r   r  r  r  r  rz  r  s          r)   #test_pyarrow_directory_partitioningr  
  s     LLELLEGGLL/EGGLL/E<<U1X67D<<U1X67DOOEOOE YYd|$7F__F"(VMF F^**84F6NfT&..)*FFr,   c                $   t        |       }d}d}t        j                  |d      j                  ||z        }t	        j
                  t        j                  |      t        j                  j                  |      t	        j                  |      d      j                  d      }d |j                  _
        t        j                  ||      }|j                  |d	|
       ||d	   dk(     }	t        j                  ||dg      }
t        |	|
       y )NrO   r   rq   rP   rV   )r   r  r  r   r   r  r?  rh   )r  r7  rh   r6  )rl   ro   rp   r  rm   rn   rt   r-  r   r   r   rw   rx   ry   r|   r   )r   r.   r0   r   rW   r   r   r   rF  rz  r  s              r)   test_partitioned_preserve_indexr  
  s    
f+CDK
		+W-44T[5HIA<<yy!!t!,"	
 i	 	 DJJO
..;
7CNN3SN>$s)q.!F
//#kN;K
LCfcr,   c                N   t        | j                  d            }t        j                  ddgddgddgd      j	                  d	      }d |j
                  _        |j                  ||d
       t        j                  |      }t        j                  ||      }t        ||       y )Nr   rh   r   r   r   r   r   r  r
  Tr   r   )rl   r   rm   rn   r   r   r   ry   r|   rw   r   )r   r*   r   r   rz  r  s         r)   $test_from_pandas_preserve_none_indexr  
  s    	V[[(	)B	QF!Qq!f=	>	H	H	MBBHHMMM"V4M0__R F
//"V
,Cfcr,   c                B   t         j                  d      }t        |j                        D ]D  \  }}| j	                  d| d      }|j                         j                  t        |      |       F t        j                  t        |       d|      }t        ||       y )NTr   ztest.rL  r   Frg   )r   r   	enumeraterL  r   r   ry   rl   rw   r|   r   )r   r*   r0  r  rQ  r   r   s          r)   %test_multi_partition_none_index_falser  
  s    ???%DT__- <4{{U1#X./!!#d)F!;<
 ??3v;eFCDdDr,   c                    t        | j                  d            }t        j                  dg dit        j                  dd            }|j                  ||       t        j                  ||      }t        ||j                                y )	Nr   tr   rh   r   )r  ra  r   r   )
rl   r   rm   rn   
RangeIndexry   rw   r|   r   r   )r   r.   r0   r   rE  rF  s         r)   )test_from_pandas_preserve_none_rangeindexr  	  se    	V[[(	)B
,,Y'r}}11/M
NCNN2lN+
//"[
1Cc3;;=!r,   c                    d}t        | j                  d            }t        j                  dddg|ddgi      j	                  d      }d |j
                  _        t        j                  |d      }t        j                  t        |	      5  |j                  ||d
       d d d        t        j                  t              5 }|j                  ||       d d d        |t        j                        v sJ y # 1 sw Y   XxY w# 1 sw Y   /xY w)N__null_dask_index__r   r   rh   r   r   r   r   r8   Fr   r   )rl   r   rm   rn   r   r   r   rw   rx   r:   r  UserWarningry   r;   rK   r<  )r   r*   	null_namer   r   r   r>  s          r)   test_illegal_column_namer    s     &I	V[[(	)B	sQFI1v6	7	A	A#	FBBHHM
..
+C 
k	3 =r&e<=
 
z	" *ar&)*AGG$$$= =
* *s   C8D8DDc                   t        j                  g dg dd      }t        j                  |d      }|j	                  t        |       |d       t        j                  t        |       |d	      }|j                  d
k(  sJ y )N)rh   r   NNr_  r   r   r   Fr   r   r   )NNN)rm   rn   rw   rx   ry   rl   r|   r~   )r   r*   r   r   r  s        r)   "test_divisions_with_null_partitionr  (  sg    	.\B	CB
..
+CNN3v;v5NAs6{6EH!3333r,   c                4   t        |       }t        j                  g dg dd      }|j                  dd      }t	        j
                  |d      }|j                  ||	       t	        j                  |d
d      }|j                          t        ||       y )Nr   r   r   r   Tr   r   r   r   r    r  )
rl   rm   rn   r   rw   rx   ry   r|   r   r   )r   r*   r   r   r   r   s         r)   test_pyarrow_dataset_simpler  1  sv    	VB	IO<	=B	c	%B
..
+CNN2fN%oobMGOOc7r,   test_filterc                   t        |       }t        j                  g dg dd      }|d   j                  d      |d<   t	        j
                  |d      }|j                  ||dd	       t	        j                  |d
|rdgnd d      }|r2t        ||d   dk(     j                         |j                                y t        ||       y )Nr   r   r   r   r  r   r   T)r*   r@  ra   r    r   r7  r   )r*   r  r   r   
rl   rm   rn   rv   rw   rx   ry   r|   r   r   )r   r*   r  r   r   r   r   s          r)    test_pyarrow_dataset_partitionedr  =  s     
VB	IO<	=BgnnZ(BsG
..
+CNN2f3DNQoo
&1!"t 	G #c#h#o&..0'//2CD#wr,   c                   t        j                  t        j                  g dd      t        j                  g dd      d      }t        j                  |d      }|j                  t        |       d	       t        j                  t        j                  j                  | d
            }t        |      dk(  sJ t        j                  t        |       dddt        j                  dt        j                         fg      di      }|j                   j#                  d      |_        t%        |ddg   |ddg   d|       y )N)r   rh   Nr  rP   r   )r  r   rh   r   r  r@  zid=*/*.parquetr   r  r  hiveflavorru  )r  r  r   F)r   rk   )rm   rn   r  rw   rx   ry   rl   r  rz   r   r   r}   r|   r   ru  rr   r   rv   r   )r   rk   r   r   r  r  s         r)   test_null_partition_pyarrowr  R  s	    
))L89G4	

B ..
+CNN3v;TN2
))BGGLL)9:
;Cs8q== F& ))dBHHJ%7$89
	H 		  )CIS$K#t	r,   c                   t        |       }t        j                  g dg dd      }|d   j                  d      |d<   t	        j
                  |d      }|j                  |d       t	        j                  |d	gd
      }t	        j                  |d	g      }t        ||       t        ||d   dk(     j                         |j                                y )Nr   r   r   r   r  r   r   r  r  F)r  read_from_pathsr  r   r  )r   r   r   r   	read_df_1	read_df_2s         r)   $test_pyarrow_dataset_read_from_pathsr  x  s    	VB	IO<	=BgnnZ(BsG
..
+CNN2CN(-=,>PUVI-=,>?Ii#c#c(c/"**,i.?.?.ABr,   c                j   t        |       }t        j                  g dg dg dd      }|d   j                  d      |d<   t	        j
                  |d      }|j                  |dd	g
       t	        j                  ||dg      }t        |j                         dg   ||d   dk(     dg   d       y )Nr   r   )r  r  r  r  r   r  r   r   r
  r  )r   r7  r   )rZ  r  r   r   Fr   r  )r   rZ  r   r   r   r   s         r)   'test_pyarrow_dataset_filter_partitionedr    s     
VB	  	

B gnnZ(BsG
..
+CNN2S#JN/ oo
) G
 3% 
2c7a<#r,   c                   t        j                  t        d      t        d      d      t	        j
                  fdt        d            }|j                  | |dg       t	        j                  | |dg      }|d   j                  d	      |d<   t        j                  d
d |       t	        j                  | |dgg      }|d   j                  d	      |d<   t        j                  d
d |       y )Nr   abcdefg)r  rQ  c                (    j                   | | dz    S )Nrh   )r-  )r  r   s    r)   r  z<test_pyarrow_dataset_filter_on_partitioned.<locals>.<lambda>  s    "''!a!e$ r,   rQ  r  )rQ  r7  r
  r6  r  r   r   )rm   rn   rw  r   rw   from_mapry   r|   rv   r   r-  )r   r*   r   read_ddfr   s       @r)   *test_pyarrow_dataset_filter_on_partitionedr    s    	eAhY@	AB
++$aC NN6&xN@ $%H
  '..x8HVbggalH% %&'H
  '..x8HVbggalH%r,   c                   t        |       }  t        j                  t        j                  j
                        g g dd      } t        j                  t        j                  j
                        g dg ddd      } t        j                  t        j                  j
                        g dg ddd      }t        j                  |||g      }|j                  | dgd	d
       t        j                  |       }d|v sJ d|v sJ t        j                  t        j                  j                  | d            j                  j!                         }|j"                  }|sJ |j%                  dd	      sJ y )Nr   )rM  rM  rP   rh   rh   r   r   rh   r   rh   r   rr   rr   rh   r   rh   r   r   FTr@  r,  ra   re   rd   r  )rl   r   r  rm   rn   	from_dictrw   r
  ry   rz   r{   r   rM  r   r   ru  to_arrow_schemar  r  )r   df_adf_bdf_cr   r   schema_commonr  s           r)   )test_parquet_pyarrow_write_empty_metadatar    sM    [F/4<<../r.D 04<<../.6HD 04<<../.6HD 
$d+	,BMMU 	   JJvE%&&& NN
V/0f__  $33O?666r,   c                   t        |       }  t        j                  t        j                  j
                        g dg ddd      } t        j                  t        j                  j
                        g dg ddd      }t        j                  ||g      }|j                  | dgd	d
        t        j                  t        j                  j
                        g g dd      } t        j                  t        j                  j
                        g dg ddd      }t        j                  ||g      }|j                  | dgd
d
d
       y )Nr  r  r   r  rP   r  )r   r   r   r   r   FTr  )r   r   r   r   )r@  r,  rA  ra   )	rl   r   r  rm   rn   r  rw   r
  ry   )r   r  r  rF  r  df_dr   s          r)   0test_parquet_pyarrow_write_empty_metadata_appendr    s$    [F/4<<../.6HD 04<<../.6HD //4,
'CNNU 	   04<<../r"4D 04<<../.6HD //4,
'CNNU   r,   c                v   t        |       } t        j                  t        d      g ddz  d      }d|j                  _        t        j                  |d      }|j                  | d|d	
       |r7t        j                  t        j                  j                  | |dz               }n3t        j                  t        j                  j                  | d            }t        j                  j                  j                  |d	d       t        j                   | ddd	d      }|rP|j#                  d      }|j%                         j#                  d      }|j&                  j)                  d      |_        t+        ||       t        j                  j                  j                  |d	dd      }t-        j.                  t        j                  j                  | d            j0                  }|j2                  |j2                  k(  sJ |j4                  |j4                  k(  sJ |j6                  |j6                  k(  sJ y )Nr  r  r  r   r   r   r   r   Fr    )ra   r@  r*   z=*/*.parquetr   r   )r*   split_everyT)r   rZ  r*   r   r   r  )r*   r  out_dirre   )rl   rm   rn   rw  r   r   rw   rx   ry   r  rz   r   r   iorz  create_metadata_filer|   rC  r   r   rv   r   r   rM  r  num_rowsnum_columnsr|  )r   r@  rF  r0  r  r   fmdfmd_files           r)   test_create_metadata_filer  
  s    [F ,,U3Z.BR.GH
ICCIIN>>#2.DOO!!	   iiV\N-JKLiiV[9:EEMM&& '  ?? D s#||~))#.x(dD %%--
,
,	 - C ~~bggll6;?@IIH<<8,,,,,??h22222!8!8888r,   c           	        t        j                  t        j                  t        j
                  j                  ddd      g d      d      }|j                  d	
      }t        j                  || |d	       |j                  d      }t        j                  || |d	       t        j                  |       }|D cg c]	  }|dvs| }}t        |      |j                  k(  sJ y c c}w )Nr   r  )r  r   )lowhighrW   )
r  r  r  rb  r  r  GHIJr   r   r   Tr   r*   	overwriter   )rd   re   )rw   rx   rm   rn   ro   rt   r  r   ry   rH  rz   r{   r}   r   )r   r*   r   r   r   r\   s         r)   !test_read_write_overwrite_is_truer  D  s     ..
II!#I>F	
 C //t/
$CMM#vf= ??q?)D MM$v> JJvEL1*K!KQLELu:))))) Ms   >	C)C)c           
        ddl m} t        j                  t	        j
                  t	        j                  dd      t	        j                  dd      t	        j                  dd      f            }g d|_        t        j                  |d	      }t        j                  || |d
dgd        ||       j                  d      }|D cg c]  }|j                          }}|j                  d	      }t        j                  || |d
dgd        ||       j                  d      }	|	D cg c]  }|j                          }
}t        |
      t        |      k  sJ y c c}w c c}w )Nr   )r  )rK  r   rh   )r8  r   r   r  r   r   r  r  T)r*   r@  r  r  r   )r  r  rm   rn   ro   vstackfullr   rw   rx   ry   rglobas_posixrH  r}   )r   r*   r  r   r   files_r\   r   r   files2_files2s              r)   .test_read_write_partition_on_overwrite_is_truer  `  s+    

		###	

B !BJ
..
+CMM#vfC:QUV &\$F#)*aQZZ\*E*??q?)D
 MM$vS#JRVW6l  %G$+,qajjl,F, v;U### + -s   E'Ec                    t        j                  dt        d      i      }t        j                  |d      }t
        j                  j                  t        |       d      }|j                  ||       t        j                  ||d      j                  d	
      }t
        j                  j                  t        |       d      }|j                  ||d       |j                  ||d       t        |t        j                  ||d             y )Nr      r:  r   r   r   r/  r  1GB)partition_sizepath_newTr  F)rm   rn   rw  rw   rx   rz   r   r   rl   ry   r|   rH  r   )r   r*   r   r   r   r   r  s          r)   -test_to_parquet_overwrite_adaptive_round_tripr    s    	sE#J'	(B
..
+C77<<FV,DNN4N'??# kk'	 	
 ww||CK4HOOHVtO<OOHVtO<
"	
r,   c                   t        j                  dt        d      i      }t        j                  |d      }t        j                  t              5  t        j                  |d|d       d d d        t        j                  t              5  t        j                  || |dd       d d d        y # 1 sw Y   FxY w# 1 sw Y   y xY w)	Nr   r}  r   r   z./Tr  )r*   r,  r  )	rm   rn   rw  rw   rx   r:   r;   rK   ry   r   r*   r   r   s       r)    test_to_parquet_overwrite_raisesr    s     
sE"I&	'B
..
+C	z	" @
c4$?@	z	" O
c6&NO O@ @O Os   B0B<0B9<Cwin32zFile not found error on windowsc                8   | j                  d      }t        j                  t        j                  dt        d      i      d      j                  ||       t        j                  |      }|| fD ]  }t        j                  t        d      5  |j                  |d	
       d d d        |j                  |j                  dz         }t        j                  t        d      5  |j                  |d	
       d d d         y # 1 sw Y   bxY w# 1 sw Y   xY w)Nsubdirr   r8  r   r   r   z)same parquet file|Cannot overwrite a pathr8   T)r  rh   )r   )r  rw   rx   rm   rn   rw  ry   r|   r:   r;   rK   assignr   )r   r*   r  r   targetr   s         r)   Etest_to_parquet_overwrite_files_from_read_parquet_in_same_call_raisesr    s     \\(#FNN2<<eBi 01qALLv M  //&
!C 6" 4]]I
 	3 NN6TN2	3
 zzCEEAIz&]]I
 	4 OOFdO3	4 	44	3 	3	4 	4s   D%DD	D	c                <   t        j                  t        d      t        d      d      }t        j                  |d      }t        j                  t        d      5  |j                  t        | j                  d            |       d d d        y # 1 sw Y   y xY w)	Nr   )r   rh   r   r   znon-string column namesr8   tempr   )rm   rn   rw  rw   rx   r:   r;   rK   ry   rl   r   r
  s       r)   .test_to_parquet_errors_non_string_column_namesr    ss    	E"I%)4	5B
..
+C	z)B	C @s6;;v./?@ @ @s   ,BBc                   t         j                  j                  ddddddt        j                  dddd	d
ddt        j                  dddddddddd      }t
        j                  j                  |d      }|j                  | d|       t        j                  | dg|      }|j                  j                  d      |d<   t        |||j                  dk(            y )Ng     u@g     0v@g     `v@g     p@g     p@)r   rh   r   r   r   r   r   g     @g     @g     T@g     @g     @g     @i    )r  r  yearrh   r   r  r?  )r  r7  r  )r  r*   rr   )rm   rn   r  ro   rB  r   	dataframerx   ry   rw   r|   r  rv   r   r  s        r)   test_dir_filterr    s    			 66 66 $4DTdS'	

B. ..
$
$RQ
$
7CNN6vN>??6,@+A&QD99##G,DLdBrww$'(r,   c           	        t        |       } t        d      D cg c]%  }t        j                  dd      t	        d      d' }}t        j                  t        j                  |      d      }|j                  | d	t        j                  d
d      i       t        j                  |       }t               rst        j                  j                  |d	   j                  j                         sJ |j#                  d	t        j$                  t        j                  d
d            i      }n!|d	   j                  |d	   j                  k(  sJ t'        ||dt(                t(        r"|d   j                  |d   j                  k7  sJ y y c c}w )N   rt  Europe/Berlinutcz123.00rc  col1rh   r   r   r   r   r   ru  Fr   r  rc  )rl   rw  rm   r  r   rw   rx   rn   ry   r   
decimal128r|   r   rj  
is_decimalrQ   pyarrow_dtyperv   
ArrowDtyper   r	   )r   r  r   r0  r   s        r)   test_roundtrip_decimal_dtyper'    sL    [F r
  ..?CH%	
D  >>",,t,!<DOOq!1D(EOF??6"D xx""4<#5#5#C#CDDD{{FBMM"--12E$FGHF|!!T&\%7%7777 dD%=NODz4:#3#3333 )s   *Fc           	        t        |       } t        d      D cg c]'  }t        j                  dd      t	        ddd      d) }}t        j                  t        j                  |      d	      }|j                  | d
t        j                         i       t        j                  |       }t               rqt        j                  j                  |d
   j                  j                         sJ |j#                  d
t        j$                  t        j                               i      }n!|d
   j                  |d
   j                  k(  sJ t'        ||dt(                t(        r"|d   j                  |d   j                  k7  sJ y y c c}w )Nr  rt  r  r  r  r   r  rh   r   r   r!  Fr"  rc  )rl   rw  rm   r  r   rw   rx   rn   ry   r   date32r|   r   rj  	is_date32rQ   r%  rv   r&  r   r	   )r   r!  r   r0  r   s        r)   test_roundtrip_date_dtyper+    sD    [F r
  ..?Cr2&	
D  >>",,t,!<DOO(=O>??6"D xx!!$v,"4"4"B"BCCC{{FBMM"))+$>?@F|!!T&\%7%7777dD%=NODz4:#3#3333 's   ,Fc                l   t         j                  j                  t        |       d      }t	        j
                  g dt        j                  j                  d            }|j                  |       t        j                  ||      }g d|_        g d|_        t        ||j                                y )Nr   r  )r   r   rV   )r   r   r   )r   r>  r\   )rz   r   r   rl   rm   rn   ro   rt   uniformry   rw   r|   r   r   r   )r   r*   r   rF  r   s        r)   test_roundtrip_rename_columnsr.  $  sz     77<<F^4D
,,RYY5F5FG5F5T
UCNN4 ??4/D"DL!CKc4<<>"r,   c                R   ddi}t        |       }t        j                  t        d      t        d      d      }t	        j
                  |d      j                  |||d       t        |t	        j                  ||	             t        rt        j                  t        j                  j                  |d
            }|t        j                  j                  |d      gz  }|D ]M  }t        j                  |      j                  j                  }|j!                         D ]  }||   ||   k(  rJ  O ddi}t#        j$                  t&              5 }	t	        j
                  |d      j                  |||       d d d        dt        	j(                        v sJ y # 1 sw Y   #xY w)Ns   my_keys   my_datar   r   r   r   T)r*   custom_metadatara   r   r   re   s   pandass   my_new_pandas_md)r*   r0  zUser-defined key/value)rl   rm   rn   rw  rw   rx   ry   r   r|   r   r  rz   r   r   rM  r  keysr:   r;   rK   r<  )
r   r*   r0  r   r   r   r   _mdkr>  s
             r)   test_custom_metadatar4  3  s|    !*-O v;D	E"IE"I6	7BNN21%00' 	 1  b"//$v67 
 		"'',,t[9:"'',,t[122 	4B..$--66C$))+ 41v!33334	4 !"56O	z	" 
a
rq)44+ 	5 	

 $s177|333
 
s   *FF&)TFNc                   t        |       } t        j                  j                  | d      }t        j                  j                  | d      }t	        j
                  t        d      ddgdz  d      }t        j                  |d	      }|j                  ||d
       |j                  ||d
       dt        j                  |      vsJ t        t        j                  j                  |d      d      5 }|j                  d       d d d        dt        j                  |      v sJ dt        j                  |      vsJ t        j                  ||d|      }t        j                  ||d|      }	t        ||	       y # 1 sw Y   zxY w)Ndata1data2r  rK  r!  rK  r   r   r   Fr   r*   ra   re   wzINVALID METADATAT)r*   ignore_metadata_filer   )rl   rz   r   r   rm   rn   rw  rw   rx   ry   r{   r.  writer|   r   )
r   r*   r   dataset_with_bad_metadatadataset_without_metadatarF  r0  r\   ddf2addf2bs
             r)   test_ignore_metadata_filer@  ]  se   [F "VW =!ww||FG< ,,U3Zuenr.AB
CC>>#1-DOO&v5   	OO%f%  
 bjj)BCCCC	bggll4kBC	H $A	"#$"**%>????bjj)ABBBB OO!!/	E OO !/	E eU'$ $s   4E??Fra   rY  c                2   t        |       } t        j                  t        d      ddgdz  d      }t	        j
                  |d      }|j                  t        |       ||       t	        j                  t        |       |d	
      }t	        j                  t        |       |d	|      }t        ||       t        j                  j                  d|i      5  t	        j                  t        |       |d	
      }d d d        t        |       y # 1 sw Y   xY w)Nr  rK  r!  rK  r   r   r   r8  Tr  rX  z*dataframe.parquet.metadata-task-size-local)rl   rm   rn   rw  rw   rx   ry   r|   r   r   r%  r   )	r   r*   ra   rY  rF  r0  r>  r?  ddf2cs	            r)   test_metadata_task_sizerC    s     [F
,,U3Zuenr.AB
CC>>#2.DOO[=P  
 OOF E
 OOF -	E eU		57IJ
 
 K $

 eU
 
s   "DD)r   Nc                2   t        |       } t        j                  t        d      ddgdz  d      }|j	                  |j
                  j                  d            }t        j                  |d	      }|j                  | |d
|       t        t        j                  j                  | d      d      j                          t        t        j                  j                  | d      d      j                          t        j                  t        j                  j                  | d             t        j                   | |d
      }t#        ||d       t#        |j
                  |j
                  d       dd}t        j                   | fd|i |d      dd
i}t#        ||d       t#        |j
                  |j
                  d       t%        j&                  t(        t*        j,                  j.                  f      5  t        j                   | fd|i |d       j1                          d d d        t%        j&                  t2              5  t        j                   | fd|i |d      j1                          d d d        y # 1 sw Y   XxY w# 1 sw Y   y xY w)Nr  rK  r!  rK  r   r  )r   r   r   T)r*   ra   r@  _SUCCESSr9  zpart.0.parquet.crcre   r  F)check_categorical)check_category_orderc                    |rdd| iiS d| iS )Nr  require_extensionparquet_file_extensionr=   )r  legacys     r)   _parquet_file_extensionz0test_extra_file.<locals>._parquet_file_extension  s+    
  ,c23	
 +C0	
r,   r*   rL  r   .foo)F)rl   rm   rn   rw  r  r   rv   rw   rx   ry   r.  rz   r   r   closeremover|   r   r:   r;   OSErrorr   libArrowInvalidr   rK   )r   r*   r@  r   r   r   rL  s          r)   test_extra_filerS    s   
 [F	E#JeU^b-@A	BB	RTT[[,	-B
..
+CNN !	   	fj	)3/557f2	3S9??AIIbggll6;/0
//&T
JCc2/ceeRTT6
 // "*
- !	C c2/ceeRTT6 
!4!45	6 
	
!	
%<T%B	

') 
z	" 
	
!	
%<V%D	

')   s   =-J-JJ
Jc           	        t         j                  j                  t        |       d      }t	        j
                  dt        d      i      }|j                  ||       t        |t        j                  ||dd             y )Nz	multi.foor   r   r   FTr   )rz   r   r   rl   rm   rn   rw  ry   r   rw   r|   )r   r*   r   rE  s       r)   test_unsupported_extension_filerU    s`     
c&k;	/B
,,U2Y'
(CNN2fN%R__ReQUVr,   c                    t        |       }t        j                  t        j                  dt        d      i      d      }|j                  ||d d       t        |t        j                  ||d             y )Nr   r   rh   c                    d|  dS )NrK  rM  r=   r  s    r)   r  z0test_unsupported_extension_dir.<locals>.<lambda>  s    %s$ r,   T)r*   name_functionra   r  )	rl   rw   rx   rm   rn   rw  ry   r   r|   )r   r*   r   ddf0s       r)   test_unsupported_extension_dirr[    se     v;D>>",,U2Y'78!<DOO/ 	   dBOODTRSr,   c                J   t        |       }t        j                  g dg dd      }t        j                  |d      }|j                  |dd |       t        j                  |      }d	|v sJ d
|v sJ d|v sJ d|v sJ t        |t        j                  ||d             y )Nr_  r   r:  r;  r   num1num2r   r   Tc                    d|  dS )Nhi-rL  r=   r   s    r)   r  z&test_custom_filename.<locals>.<lambda>
  s    #aS 1 r,   )ra   rY  r*   rd   re   hi-0.parquetzhi-1.parquetr  )
rl   rm   rn   rw   rx   ry   rz   r{   r   r|   )r   r*   r   r  r   r   s         r)   test_custom_filenamerd    s    	VB
,,}5C 
	+BMM
 1	   JJrNE&&&%U"""U"""b"//"VNOr,   c                    t        |       }t        j                  g dg dd      }t        j                  |d      }|j                  |dd        t        j                  d	gd
gd      }t        j                  |d      }|j                  |d dd       t        j                  |      }d|v sJ d|v sJ d|v sJ d|v sJ d|v sJ t        j                  g dg dd      }t        j                  |d      }t        ||d       y )Nr_  r]  r^  r   r   Tc                    d| dz   dS Nrb  r   rL  r=   r   s    r)   r  zMtest_custom_filename_works_with_pyarrow_when_append_is_true.<locals>.<lambda>       #a!eWH 5 r,   )ra   rY  !   ,   rh   c                    d| dz   dS rg  r=   r   s    r)   r  zMtest_custom_filename_works_with_pyarrow_when_append_is_true.<locals>.<lambda>)  rh  r,   )rY  r,  rA  rd   re   rc  zhi-2.parquetzhi-4.parquet)rh   r   r   r   ri  )r   r:  r;  r   rj  Fr   r   )
rl   rm   rn   rw   rx   ry   rz   r{   r|   r   )r   r   r  r   r   expected_pdfactuals          r)   ;test_custom_filename_works_with_pyarrow_when_append_is_truern    s"   	VB
,,}5C 
	+BMM
 5   ,,t$C 
	+BMM
5	   JJrNE&&&%U"""U"""U"""<<!+<=L __Ru-Ffl6r,   c                   t        |       }t        j                  g dg dd      }t        j                  |d      }t        j                  t        d      5  |j                  |d|	       d d d        t        j                  t        d
      5  |j                  |d |	       d d d        y # 1 sw Y   CxY w# 1 sw Y   y xY w)Nr_  r]  r^  r   r   z7``name_function`` must be a callable with one argument.r8   whatever.parquet)rY  r*   z0``name_function`` must produce unique filenames.c                     y)Nrp  r=   r   s    r)   r  zAtest_throws_error_if_custom_filename_is_invalid.<locals>.<lambda>H  s    r,   )	rl   rm   rn   rw   rx   r:   r;   rK   ry   )r   r*   r   r  r   s        r)   /test_throws_error_if_custom_filename_is_invalidrr  :  s    	VB
,,}5C 
	+B	S
 K 	b(:6JK
 
L
 U 	b(DVTU UK K
U Us   B1B=1B:=Cc           	        t        |       }t        j                  g dg dd      }t        j                  |d      }|j                  ||dgd d	       t        j                  |      D ]K  \  }}}|D ]  }|d
v rJ  |D ]3  }	|	g t        |j                        D 
cg c]  }
|
 d	 c}
ddv r3J  M t        j                  ||d      }t        ||ddd       y c c}
w )N)franklimarcelaluis)canadachina	venezuelarz  )
first_namecountryr   r   r|  c                    |  dS )N-cool.parquetr=   r   s    r)   r  z5test_custom_filename_with_partition.<locals>.<lambda>X  s    1#] 3 r,   F)r*   r@  rY  rc   )zcountry=canadazcountry=chinazcountry=venezuelar~  rd   re   r   )r   r  rF  )rl   rm   rn   rw   rx   ry   rz   r   rw  r   r|   r   )r   r*   r   r  r   r!  dirsr   dirr"  r  rm  s               r)   #test_custom_filename_with_partitionr  K  s'   	VB
,,<D	
C 
	+BMM
[3   ''"+ 4 	C    	  	D /4R^^/DE!QC}%E"    	 __Re<FVEU Fs   C#c                   dd l m} ddlm}m} t        j                  ddgddgd      }| j                  d      }t        j                  |d	      }|j                  ||d
gd       | j                  d      }t        j                  j                  |      }	 ||	|dd |t        j                  d
t        j                         fg                   d }
t        j                  ||      } |j                   |      j#                         }t%         |
|       |
|      d       t        j                  ||      } |j                   |      j#                         }t%         |
|       |
|      d       y )Nr   )HivePartitioningwrite_datasetrh   r   r   r   )r   col2zfoo-daskr   r   Fr@  zfoo-pyarrowzpart.{i}.parquetrz  )r   base_dirbasename_templateformatr  c                .    | j                  d      ddg   S )Nr  r   )rC  r   s    r)   _prepz9test_roundtrip_partitioned_pyarrow_dataset.<locals>._prep  s    }}V$ff%566r,   r   r   )pyarrow.parquetrz  pyarrow.datasetr  r  rm   rn   r  rw   rx   ry   r   r   ru  rq   r|   
read_tabler  r   )r   r*   r   r  r  r   	dask_pathr   pa_pathr  r  df_read_dask
df_read_pas                r)   *test_roundtrip_partitioned_pyarrow_datasetr  o  s<    !? 
1vSz:	;B Z(I
..
+CNN9V6(PUNV ll=)GHH  $E,%bii&"((*1E0F&GH7 ??9V<Ly)335JeL!5#4%H ??76:Lw'113JeL!5#4%Hr,   filter_value)rh   )r   r   tuple)idsc                   | dz  }t        j                  g dg dd      }|j                  ||       dd|fg}t        j                  |||      }t        j                  |||      }t        ||       t        j                  |d	
      }| dz  }|j                  ||       t        j                  |||      }t        j                  |||      }t        ||d       y)<Regression test for https://github.com/dask/dask/issues/8720z$in_predicate_iterable_pandas.parquetr_  r  r  r   r  r  r6  r   r   z"in_predicate_iterable_dask.parquetFr   N)rm   rn   ry   rw   r|   r   rx   )	r   r*   r  r   r   r  r  r  r   s	            r)   #test_in_predicate_can_use_iterablesr    s     <<D	.
B MM$vM&T<()G__T&'BFtFGDHfh ..
+C::DNN4N'__T&'BFtFGDHfhE2r,   c                   t        j                  t        d      g ddd      }|j                  | |       dgg}t        j                  | ||      }t        j                  | ||      }t        ||d	
       t        j                  t        d      5  dgg}t        j                  | ||      j                          d d d        y # 1 sw Y   y xY w)Nr:  )rh   rh   r   r   r   r   r   r   r  r   r   r   )r  r  )rh   r   r6  Fr   z"not a valid operator in predicatesr8   )r  znot eqrh   )rw   r  rw  ry   r|   rm   r   r:   r;   rK   r   )r   r*   r   r  r  r  unsupported_ops          r)   test_not_in_predicater    s    
,,Ah56C NN8FN+'()G__XfgFFxHHfhE2	z)M	N S-./
HPPRS S Ss   +C

C)r  r  r   )r  r8  r   )zone-item-single-nestzone-item-double-nestztwo-item-double-nestztwo-item-two-nestc                   | dz  }t        j                  g dg dd      }|j                  ||       t        j                  t
        d      5  t        j                  |||       d	d	d	       t        j                  |d
      }| dz  }|j                  ||       t        j                  t
        d      5  t        j                  |||       d	d	d	       y	# 1 sw Y   uxY w# 1 sw Y   y	xY w)r  zgh_8720_pandas.parquetr_  r  r  r   zValue of 'in' filterr8   r6  Nr   r   zgh_8720_dask.parquet)	rm   rn   ry   r:   r;   r  rw   r|   rx   )r   r*   r  r   r   r   s         r)   &test_in_predicate_requires_an_iterabler    s    " ..D	.
B MM$vM&	y(>	? C
V\BC
 ..
+C,,DNN4N'	y(>	? C
V\BC CC CC Cs   C;C)C&)C2c                n   t        |       }t        j                  d      }t        j                  d      }ddlm} |j                  |j                  f      d        }|j                  ddgdd	gd
      }|j                  |d      }|j                  |       |j                  |      }t        ||       y )Ncudf	dask_cudfr   )pyarrow_schema_dispatchc                6    | j                         j                  S r$   )to_arrowru  )objs    r)   get_pyarrow_schema_cudfz>test_gpu_write_parquet_simple.<locals>.get_pyarrow_schema_cudf  s    ||~$$$r,   abcdefr   rM  r   r   )rl   r:   importorskipdask.dataframe.dispatchr  registerrn   	from_cudfry   r|   r   )	r   r   r  r  r  r  r   r   r  s	            r)   test_gpu_write_parquet_simpler    s    	VBv&D##K0I?%%t~~&78% 9% 
s	

B 

b!
$CNN2

 
 
$Cb#r,   zRequires arrow 15c                 :    t        j                  ddddid        y )Nz$s3://coiled-data/uber/part.0.parquetr    	anonymousT)
filesystemstorage_optionsr1  r   r=   r,   r)   (test_pyarrow_filesystem_option_real_datar    s!     OO.$d+	r,   zignore:Dask annotationsc                   ddl m} d}t        | dz        }t        j                  dt        d      i      }  |d      d	      }|j                  ||
       t        j                  ||
      }t        ||       |j                  ||
       t        t        | j                                     dk(  sJ d       t        |j                  |d            dk(  sJ |j                  |d|       t        |j                  |d            dk(  sJ d       t        j                  ||
      }t        |t        j                  ||g             y )Nr   )get_filesystem_classz/read1write1r   r   memoryF)use_instance_cache)r  zwrote to local fs)detailrh   T)r,  r  r   zshould have two parts)fsspecr  rl   rm   rn   rw  ry   rw   r|   r   r}   r   iterdirlsrD  )r   r  key1key2r   fsr   rq  s           r)   (test_fsspec_to_parquet_filesystem_optionr  
  s,    ,Dx("#D	sE"I&	'B	'	h	'5	ABMM$2M& //$2
.Cc2NN4BN' tH$$&'(A-B/BB- ruuT%u()Q... NN4N4ruuT%u()Q.G0GG.??4B/DdBIIsCj)*r,   c                   t        j                  t        d      dgdz  d      }| dz  }|j                  |dd       t	        j
                  ||dg	      }t        ||       t	        j
                  ||d
g	      }t        ||       y )Nr   r!  r   z#test_select_filtered_column.parquetFT)r   write_statisticsr   r7  r!  r6  r   r  Nrm   rn   rw  ry   rw   r|   r   r   r*   r   r   r   s        r)   test_select_filtered_columnr  )  s|    	E"IUGbL9	:B;;DMM$edM;
//$v8J7K
LCb#
//$v8M7N
OCb#r,   c                   t        j                  t        d      dgdz  d      }| dz  }|j                  |d       t	        j
                  ||dg      }t        ||       t	        j
                  ||d	g      }t        ||       y )
Nr   r!  r   z,test_select_filtered_column_no_stats.parquetF)r  r  r6  r  r  r  s        r)   $test_select_filtered_column_no_statsr  4  sz    	E"IUGbL9	:BDDDMM$M/
//$v8J7K
LCb#
//$v8M7N
OCb#r,   r{  c                   t        j                  g dg dg dd      j                  d      }| dz  }|j                  ||       t        j
                  j                  d|i      5  t        j                  ||      }d d d        |r9|d	k(  r4|j                  d
di      }|j                  j                  d      |_
        n|}t        |       y # 1 sw Y   SxY w)Nr  r  ghir   r   r   rL  r  r  out.parquetr   r  r    r  string[pyarrow])rm   rn   r   ry   r   r%  r   rw   r|   rv   r   r   )r   r{  r*   r   outfiler   r  s          r)    test_read_parquet_convert_stringr  A  s     
#)/J
in  &GMM'&M)	4nE	F 6oogf56 &I-99c#456!../@Ac86 6s   (CCc           	        t        j                  t        j                  g dd      t        j                  g dd      t        j                  g dd      t        j                  g dd      d	      j                  d
      }| dz  }|j	                  ||       t        j                         t        j                         i}t        j                  j                  ddi      5  t        j                  | ddd|j                  i      }ddd       |j                  dt        j                         t        j                         d      }|j                   j                  d      |_        t#        |       y# 1 sw Y   oxY w)zcMake sure that when convert_string, dtype_backend and types_mapper are set,
    all three are used.r  r  rP   r  r  )g?g333333@g @Float32rL  )r  r  r  r  r  r  r   r  Tr    r  r  r  Nr  r  )rm   rn   r  r   ry   r   r  Float64Dtyper   r%  r   rw   r|   r  rv   
Int64Dtyper   r   )r   r*   r   r  r  r   r  s          r)   0test_read_parquet_convert_string_nullable_mapperr  V  s5    
0A9G4?)<?(;		

 in  &GMM'&M) 	

boo'L 
4d;	< 
oo*+\-=-=>	

 yy""	
H ^^**+<=HNc8#
 
s   '&E;;Fr  r  c                    |dk(  rdnd}t        j                  t        j                  ddt         j                  ddgd| 	      t        j                  d
t         j                  dd
dgd| 	      t        j                  dddt         j                  dgd| 	      t        j                  ddddt         j                  gd| 	      d      }t	        j
                  |d      }t        j                   fd       }|j                         }t        j                  t        |      D 	cg c]  \  }}	 ||	|       c}	}       t	        j                   ||      }
t        ||
d       yc c}	}w )zw
    Test reading a parquet file without pandas metadata,
    but forcing use of nullable dtypes where appropriate
    r   z	[pyarrow]rh   r   r   r   r  rP   TFr  r  r  r  r  r	  r   r   r
  r   r  r  r   c                    t         j                  j                  |       j                  i       }t	        j
                  |d| dz         y)z0Write a parquet file without the pandas metadatarK  rL  N)r   r   rx   r  r   r   )r   r  r  r   s      r)   write_partitionz+test_dtype_backend.<locals>.write_partition  s@     $$R(@@D
uh58)<<=r,   )r*   r  r   N)rm   rn   r  r  rw   rx   r   r  r'  r   r  r|   r   )r   r  r*   dtype_extrar   r   r  rL  r  r  r   s   `          r)   test_dtype_backendr  ~  s=    &)99"{K	Aq"%%A.k]6KLruueT517;-9P Cc255#6}>UVCc36{m>TU	
	
B ..
+C	\\> > !JLLIj4IJDAq/!Q'JK??8F-PDb$E* Ks   -E/
c           	     @   t        j                  ddgddgd      }| dz  }|j                  |dg       t        j                  |      }t        j                  ddgt        j
                  t        j                  ddgd	
            dddg      }t        ||       y )Nrh   r   r   r   r   r  r   )partition_colsrq   rP   r  r   r   )rm   rn   ry   rw   r|   r-  r  r   )r   r   outdirr   r  s        r)   3test_read_parquet_preserve_categorical_column_dtyper    s    	QF#s4	5B%FMM&#M/
//&
!C||Cjr~~bhh1vW.MNO!fH c8r,   c                   t        j                  t        j                  ddgd      ddgd      }| dz  }|j                  |       t	        j
                  |d	
      }t        j
                  |d	
      }t        ||t               y )Nr   r   r  rP   rh   r   r   r  r    )r  )sort_results)rm   rn   r  ry   rw   r|   r   r   )r   r   r  r   r  s        r)   test_dtype_backend_categoricalsr    sj    	BIIsCj
C1a&Q	RB%FMM&
//&	
:C
//&	
:Cc3]3r,   r  )r   r7  rK  c                   ddl m} t        j                  t	        d      ddgdz  d      }t        j                  |d	      }|j                  | d
gd       t        j                  d
t        j                         fg      }t        d|      }t        j                  | d|i|      }t        j                  |  |di ||      }t        ||d       |d
   j                  dk7  sJ y )Nr   )r  r  r!  rK  rK  r   r   r   r   F)r   r@  rc   r  r  r  )r  r  )r  r  r   r  r=   )r  r  rm   rn   rw  rw   rx   ry   r   ru  r  dictr|   r   rQ   )	r   r  pd_partitioningrF  r0  ru  r  r   r  s	            r)   )test_non_categorical_partitioning_pyarrowr    s     @
,,U3Zuenr.AB
CC>>#1-DOOseOGYYbiik*+,Fvf5L
//6C //_<|<gC c3E*s8>>Z'''r,   c                    t        j                  ddgddiddigd      }|j                  | dz          t        j                  | dz         j                         }t        ||       y )Nrh   r   r   r   r   r   )rm   rn   ry   rw   r|   r   r   )r   r   r  s      r)   &test_read_parquet_lists_not_convertingr    s]    	QF3(S!H)=>	?BMM&>)*__Vn45==?Fb&r,   c                J   t        j                  dg did      }|j                  | dz          t               st        j                  | dz         }t        j                  | dz         }t        ||       t         j                  j                  |j                         |       y )Nr   r  r  rP   string.parquet)
rm   rn   ry   r   r|   rw   r   testingassert_frame_equalr   )r   r  r   s      r)   test_parquet_string_roundtripr    s~    
,,_-5F
GCNN6,,-"$ oof'778	"22	3Bb#JJ!!"**,4r,   c                t   t        j                  d       ddlm} ddlm t        j                  dg did      }|j                  | d	z          fd
}t        j                  d      5   |d|      5  t        j                  | d	z          d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nbotocorer   )patch)BotoCoreErrorr   r  r  rP   r  c                    r$   r=   )r*   r  s    r)   mock_get_enginez8test_parquet_botocore_exception.<locals>.mock_get_engine  s	    r,   zAn unspecified error occurredr8   z.dask.dataframe.dask_expr.io.parquet.get_engine)r:   r  unittest.mockr  botocore.exceptionsr  rm   rn   ry   r;   rw   r|   )r   r  r  r  r  s       @r)   test_parquet_botocore_exceptionr    s    

##1
,,_-5F
GCNN6,,- 
},K	L 7C_U 	7OOF%556	77 7	7 	77 7s$   .
B.8B"B."B+	'B..B7)
__future__r   r  r  r  rz   sysdatetimer   decimalr   r  r   numpyro   pandasrm   r:   packaging.versionr   r   dask.dataframer  rw   r  dask.dataframe._compatr   r	   dask.dataframe.io.parquet.corer
   dask.dataframe.utilsr   r   
dask.utilsr   r    r   __version__pyarrow_versionImportErrorr  rz  r   SKIP_PYARROWSKIP_PYARROW_REASONmarkskipifPYARROW_MARKnrowsr   rn   rw  r  r   rx   r   fixturer&   _engine_fixturer*   r.   r0   r6   r>   rH   rL   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r%  r4  r6  rI  rQ  rV  rZ  r\  r`  rv  	Timedeltar  rg  ro  rr  skip_with_pyarrow_stringsr{  r  r  r  binaryr-  r   mapr  rv   r  r  xfail_with_pyarrow_stringsr  r  r  r  r  r  r  r  r#  r&  r2  r@  rI  rR  rV  r[  ra  rl  rr  rv  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r-  r0  r<  rA  rH  rT  rV  r]  r_  r  r  r  PYARROW_LARGE_STRING_XFAILr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  platformr  r  r  r'  r+  r.  r4  r@  rC  rS  rU  r[  rd  rn  rr  r  r  r  r  r  gpur  networkslowmajorr  filterwarningsr  r  r  r  r  r  r  r  r  r  r  r  rX  s   0r)   <module>r     s   "    	 
   #    %    ? 5 C '#bnn-O
 
 v) {{!!,7J!K
R\\!&u.Aa!eai.$U|,!a#g, "((E%L1qBF1	
B bnnR[1 &..Y|n5       7 7 B67 8
,
 %73 83@ 5$-0	 1		&"
C$"JRjU)X"7"t+S  $    8,& 5$-8 9@2 - -`7 %/5$-07 1 0 77&56 5$-8' 9'@ 5$-8
 BMM,3?BLLa 		
 
q#q	!3'L 9L28 5$-83 93, && ' 2 & &R080 !	sI&	'R0	sO,	-r26	s./	0"b9BLL#123RYRYY[)T:	
 BLL#~r~~o>?@C5!	

 
sNBNN956	7lSE=RS	sDR\\3E!FGH	I2rR	s./	0	7	7	A2rJ	sI&	'	.	.x	8"bA	sI&	'	.	.x	8"bA	sI&	'	.	.x	8"bA	s./	0	7	78H	I2rR	s./	0	7	78M	NPRTVW	s./	0	7	78M	NPRTVW	sI&	'	.	.x	8"bA	sI&	'	.	.y	92rB	sI&i	8"bA	sI&hbhhyu.M	NPRTVW	II6	7R@	II6c
	KRQST	sI&	'R0	sL)	*B3	s,-	.B7	s,-	.B7	s,-	.B7C"%L &&0 'M%N0*?: ''< (<$?  )D%=9 : &&RQ ' RQj 5$-0GY#78 9 1 D - -* + +D68 $8F 9 F("*-`#L 5 50!8H & &: y+&>?0 @0  UDM26 3 6**V$ )AB
/ C
/ )AB/ C/" 
 !%!")#*	 !%")#*	 $W&!	
, !%")#*	 !%!")#*	 $W&!	
, #'!+W 5 "*#, #& $")#* #6 $!")#*  22&7	
ODFNOFN     BFG-*F* , ,$-
< c-./c?+,c-./\R\\3(G"HIJ\R\\3y(A"BCDc4BLL2M NOPQc4BLL2D EFGHc-./66x@c9%&--h7c9%&--h7c9%&--h7c9%&--i8c9%Y7$%XRXX6Hu-U	
 	9956995SzJc9%&c<()c+,-c+,-c+,-1:; <		00 tUm44-84 9 54 GT?3H 4H(<$ $! $!N +aW5.u>R ? 6 R< +aW5+ 6 +> &++88		
 	7; +dE];5 < 5* +dE];; < ;(  :	/2	- dE]3$5tVn5+H 6 6 4 +H\ *S#J7%C 8 %CP dE]3&@A3) B 4 3)l dE]3&DE64 F 4 64r$ .t=% >%6 - -$ JQ JQZ 0 0  $[[..9 /   	#,	
	1	18 	UO,	
	G	 G@ G  G**	"%*4   u6  7  & t[&9:! ; !H C C +dE]; < 4&8 !7 !7H ! !H $559 6 59p*8 $F.	O LLG,  4	44@)@ 4 48 4 46#'4T .0CD$ E$N .u>-1v6 7 ?D 58 6 8v	TP*  7  7FU"!H &I &IR 1#sD)9?WX3 Y3,S& 	
		)
/	*+		  CC&  , O))B.7JK L    56+  7+:
 )D%=9 : & $ $N +;Y*GH+ I +B   4 4 T-?,@+A$BC( D (&   
5 
57S~  #	BclO#  	B /,1sC   *AN- AO AOAO
AON-AO N?AO OAOOAO