
    bik                    <   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ  ej                  d        ej                  d      Z
 ej                  d      Z ej                  d      Zd dlZd dlZd dlmZ ej&                  j)                  ej*                  d	k7  d
      ej&                  j-                  d      ej&                  j.                  gZ ej2                  dg      d        Z ed      j7                         Zej:                  j=                  d      e_        ej?                         Z ej2                  d      d        Z ej&                  jC                  dg d      d        Z"d Z#ej&                  jC                  dg d      d        Z$d Z%d Z&y)    )annotationsN)
timeseriespandaspysparkpyarrowzdask.dataframe)	assert_eqlinuxzAUnnecessary, and hard to get spark working on non-linux platforms)reasonz/pyspark doesn't yet have support for pandas 2.0)paramsc                X    t        j                  | j                         | j                  S )N)pytestimportorskipparam)requests    W/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/tests/test_spark_compat.pyenginer   #   s      &==    1h)freqUTCmodule)scopec               #    K   t        j                  t         j                        } t        j                  j
                  j                  j                  d      j                  d      j                  dd      j                         }| |j                          t        j                         t        j                         u r%t        j                   t         j                  |        y y w)NlocalzDask Testingzspark.sql.session.timeZoner   )signal	getsignalSIGINTr   sqlSparkSessionbuildermasterappNameconfiggetOrCreatestop	threadingcurrent_threadmain_thread)prevsparks     r   spark_sessionr+   4   s     
 FMM*D 	  ((//8		 	,e	4		 
 K	JJL!Y%:%:%<<fmmT* =s   C&C(npartitions)      
   c                z   t        |      }| j                  t              }|j                  |      j                  j                  |d       t        j                  ||      }|j                  |j                  j                  j                  d            }|j                  |k(  sJ t        |t        d       y )N	overwritemoder   r   	timestampFcheck_index)strcreateDataFramepdfrepartitionwriteparquetddread_parquetassignr6   dttz_localizer,   r   )r+   r,   tmpdirr   sdfddfs         r   $test_roundtrip_parquet_spark_to_daskrG   J   s    [F

'
'
,C OOK &&..vK.H
//&/
0C
**s}}//;;EB*
CC??k)))c3E*r   c                   t        |      }| j                  t              }|j                  j	                  |dd       t
        j                  ||      }|j                  |j                  j                  j                  d            }|j                         j                  d      }|j                  |j                  j                  d	      
      }t        |t        j                  d      d       y )Nr1   name)r3   partitionByr4   r   r5   r-   )axisr9   )rI   Fr7   )r9   r:   r;   r=   r>   r?   r@   rA   r6   rB   rC   compute
sort_indexrI   astyper   )r+   rD   r   rE   rF   s        r   )test_roundtrip_hive_parquet_spark_to_daskrO   [   s    [F

'
'
,C IIf;FC
//&/
0C
**s}}//;;EB*
CC ++-
"
"
"
*C **#((//%0*
1Cc3>>q>)u=r   c                T   t        |      }t        j                  t        |      }|j	                  ||d       | j
                  j                  |      }|j                         }|j                  |j                  j                  j                  d            }t        ||d       y )N)r,   F)r   write_indexr   r5   r7   )r9   r?   from_pandasr;   
to_parquetreadr>   toPandasrA   r6   rB   rC   r   )r+   r,   rD   r   rF   rE   s         r   $test_roundtrip_parquet_dask_to_sparkrV   q   s    [F
..+.
6CNN6&eN<



$
$V
,C
,,.C **s}}//;;EB*
CCc3E*r   c                   t        |      }d}d}t        j                  t        |      t        j
                  j                  |      ddg|dz  z  ddg|dz  z  d	      }|j                  d
dddd	      }t        |j                  D cg c]+  }t        j                  j                  j                  |      - c}      sJ | j                  |      }|j                  |      j                  j                  |d       t         j#                  |dd      }t        |j                  D cg c]+  }t        j                  j                  j                  |      - c}      sJ |j                         t%        ||d       y c c}w c c}w )Nr.      )sizeTF   alicebob)abcdInt64Float64booleanstringr1   r2   r   numpy_nullabler   dtype_backendr7   )r9   pd	DataFramerangenprandomrN   alldtypesapitypesis_extension_array_dtyper:   r<   r=   r>   r?   r@   r   )r+   rD   r,   rY   r;   dtyperE   rF   s           r   5test_roundtrip_parquet_spark_to_dask_extension_dtypesrs      s^   [FKD
,,t!!t!,$!),5!TQY/		
C **		
C #**U55e<UVVV

'
'
,C OOK &&..vK.H
//&BR/
SCCF::N%	.	.u	5N 
zz  c3E* V 	Os   
0F+0Fc           	     ,   t        |      }d}d}t        j                  d      t        j                  d      t        j                  d      t        j                  d      t        j                  d      t        j                  d      g}t        j                  t        |      |d	      }| j                  |      }|j                  d
|d
   j                  t        j                  j                  j                  dd                  }|j                  |      j                  j                  |d       t         j#                  |dd      }|j$                  j&                  j(                  t*        j-                  dd      k(  sJ |j$                  j/                         j&                  j(                  t*        j-                  dd      k(  sJ |j1                  dt        j2                  t*        j-                  dd            d	      }t5        ||d       y )N      z8093.234z8094.234z8095.234z8096.234z8097.234z8098.234)r]   r^   r^      r1   r2   r   rf   zint64[pyarrow]Fr7   )r9   decimalDecimalrh   ri   rj   r:   
withColumncastr   r   rp   DecimalTyper<   r=   r>   r?   r@   r^   rr   pyarrow_dtypepa
decimal128rL   rN   
ArrowDtyper   )	r+   rD   r,   rY   decimal_datar;   rE   rF   expecteds	            r   test_read_decimal_dtype_pyarrowr      s   [FKD 	
#
#
#
#
#
#L ,,t	
C 
'
'
,C
..c#hmmGKK,=,=,I,I!Q,OP
QC OOK &&..vK.H
//&)/
LC55;;$$a(;;;;55==?  .."--12EEEEzz!r}}Q23	
H c8/r   )'
__future__r   rx   r   sysr&   r   dask.datasetsr   r   r   r~   r?   numpyrk   r   rh   dask.dataframe.utilsr   markskipifplatformskipskip_with_pyarrow_strings
pytestmarkfixturer   rL   r;   indexrC   reset_indexr+   parametrizerG   rO   rV   rs   r    r   r   <module>r      s   "   
   $   H 
&

i
(V#V)*   * KKR   KK@   KK))

 

 d##%II!!%(		oo h+  +* 
3+ 4+ >, 
3+ 4+%+P#0r   