
    biB                    
   d dl mZ d dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZ g ZdD ]S  Z ej:                   ej<                   ed	      e
            Z  ejB                  e d      Z"ejG                  e e"f       U  ej:                   e$d       ej<                   ed	                  Z  ejB                  e d      Z"ejG                  e"jK                         jL                  e"jL                  f        ejN                   ed       ed       ejP                  d       ed       ejP                  dd      d      Z) ejN                   ed       ed       ejP                  dd       ed       ejP                  ddd      d      Z* ejN                   ed       ed       ejP                  dd       ed       ejP                  ddd      d      Z+e)e*e+gZ,g Z-e,D ]  Z.e.j^                  ja                  d      e._/        e.jb                  ja                  d      e._1        e-jG                  e.je                  e.j^                  jf                  ji                   ed            e.jb                  jf                  ji                   ed             !              e,D  cg c]  } | jk                  | jb                         c} Z6e-D  cg c]  } | jk                  | jb                         c} Z7e,D  cg c])  } | jk                  | jb                  | jp                  g      + c} Z9e-D  cg c])  } | jk                  | jb                  | jp                  g      + c} Z:d" Z;e	jx                  jz                  d#        Z>e	jx                  j                  d$d% e	j                  d&e	jx                  j                  d'(      g)       e	j                  de	jx                  j                  d'(      )      g      e	jx                  j                  d*ddg      e	jx                  j                  d+d,d-g      e	jx                  j                  d.      e	jx                  j                  d/      d0                                    ZCd1 ZDd2 ZEd3 ZFd4 ZGe	jx                  j                  d5g d6      e	jx                  j                  d*g d6      d7               ZHe	jx                  j                  d*d,d-g      d8        ZId9 ZJd: ZKd; ZLd@d<ZMd= ZN G d> d?      ZOyc c} w c c} w c c} w c c} w )A    )annotationsN)_compat)PANDAS_GE_210PANDAS_GE_300tm)to_pyarrow_string)_concat)	assert_eqget_string_dtypepyarrow_strings_enabled)TFbacbac)ordered   npartitions   indexabcdexxxxx   abcbcf8dtypevwxyzfghijyyyyy
   abbbaklmnozzzzz   bcbcccategoryxyzabc)r   r    c                    t        j                  t        t              t	        j
                  t                     t        j                  t        t        D  cg c]  } | j                   c}       t	        j
                  t        D  cg c]  } | j                   c}              t        j                  t        t        D  cg c]  } | j                   c}       t	        j
                  t        D  cg c]  } |  c}       j                         t        j                  t        t              t	        j
                  t                     t        j                  t        t        D  cg c]	  } | ddg    c}       t	        j
                  t        D  cg c]	  } | ddg    c}              t        j                  t        t        D  cg c]  } | j                   c}       t	        j
                  t        D  cg c]  } | j                   c}              t        j                  t        t        D  cg c]  } | j                   c}       t	        j
                  t        D  cg c]  } | j                   c}              t        j                  t        t        D  cg c]  } | j                   c}       t	        j
                  t         D  cg c]  } |  c}       j                         t        j                  t        t              t	        j
                  t                      y c c} w c c} w c c} w c c} w c c} w c c} w c c} w c c} w c c} w c c} w c c} w c c} w )Nr   r!   )r   assert_frame_equalr	   framespdconcatframes2assert_series_equalr    assert_index_equalframes3r   frames4r!   r   frames5frames6is    `/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/tests/test_categorical.pytest_concat_unions_categoricalsr<   M   s#   '&/299W+=> f%%&		2H11332H(I
 '*Q*+RYY77Ka7K-L-R-R
 ''*BIIg,>? 01C:01
		'2Q1c3Z=23 g&&'3IAACC3I)J
 g&&'3IAACC3I)J
 '*Q*+RYY77Ka7K-L-R-R
 ''*BIIg,>?C &2H
 +7K 12
 '3I
 '3I
 +7KsH   L"L'L,1	L1%L6L;M 5M/M
MM		Mc                 4   t        j                  d       t        j                  j	                  ddi      5  t        j                  dg did      } d d d         d   j                  d      | d<   | d   j                  j                  rJ y # 1 sw Y   <xY w)	N	dask_cudfzdataframe.backendcudfa)r      r   rA   r   r*   )
pytestimportorskipdaskconfigsetdd	from_dictastypecatknown)ddfs    r;   test_unknown_categories_cudfrM   w   s     $	-v6	7 <llC+;<3xz*CH3x||!!!!!< <s   BBnumeric_onlyTFz"numeric_only=False not implemented)reason)marksr   	split_outrA      z(ignore:The default value of numeric_onlyzignore:Droppingc                b   t        t              D ci c]
  \  }}d|f| }}}dddddd}t        j                  t	        j
                  |j                               j                  |      d      }	||	j                  |      }	|	j                         }t        |	j                  j                         |j                  j                                t        |	j                  j                         |j                  j                                t        r!t        st!        j"                  t$        d	
      nt'        j(                         }
|i nd|i}|
5   |j+                  |j                        j,                  di |}d d d        |
5   |	j+                  |	j                        j,                  di |}d d d        t               |
5  |j+                  |j                        j.                  j                         }d d d        |
5  |	j+                  |	j                  d      j.                  j                  |      }d d d        t        ||       |
5  |j.                  j+                  |j                        j1                         }d d d        |
5  |	j.                  j+                  |	j                        j1                         }d d d        t        ||       y c c}}w # 1 sw Y   gxY w# 1 sw Y   >xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   [xY w)Nunknownobjectr*   i8r   r   rR   r   zThe default of observed=FalsematchrN   F)sort)rQ    )	enumerater/   rG   from_pandasr0   r1   valuesrI   repartitioncomputer
   r   value_countsnuniquer   r   rB   warnsFutureWarning
contextlibnullcontextgroupbysumr    count)shuffle_methodrN   r   rQ   requestr:   dfdskmetarL   ctxnumeric_kwargsexpectedresults                 r;   test_unknown_categoricalsrr      s   . .7v->
?'1bIq>2
?C
?
JTRD
..3::<077=1
MCoo+o6	Bcee  "BDD$5$5$78ceemmortt||~.  	]*IJ##% 
 (/Rnl5SN	 :'2::bdd#''9.9:	 :'SUU#''9.9:fh	 0::bdd#%%--/0	 OSUU/1199I9NOfh	 .44<<%++-.	 .suu%++-.fhC @": :: :0 0O O. .. .sM   K ,K&,K34L 8L4L4L%&K03K= L
LL"%L.c                 
   t         d   } t               rt        |       } t        t              D ci c]
  \  }}d|f| }}}t        j                  |j                               j                  ddi      j                  ddd      } | j                  j                  d      | _
        t        j                  | dd	
      }|j                  j                  j                         |d<   |j                   j                  j                         |d<   |j                  j                  j                         |_
        |j#                  |j                  j                  j%                  g d            }|j                  j                  j&                  sJ |j                   j                  j&                  rJ |j                  j                  j&                  rJ |j)                         }dD ]`  }|d	u}|j+                  |      }|j                   j                  j&                  sJ |j,                  j                  j&                  sJ |j                  j                  j&                  |k(  sJ t/        ||j                  ddi      d	       |j+                  |d      }|j                   j                  j&                  sJ |j,                  j                  j&                  sJ |j                  j                  j&                  |k(  sJ t/        ||j                  ddi      d	       |j+                  d|      }|j                   j                  j&                  rJ |j,                  j                  j&                  sJ |j                  j                  j&                  |k(  sJ t/        ||j                  ddi      d	       |j+                  d|      }|j                   j                  j&                  sJ |j,                  j0                  t3               k(  sJ |j                  j                  j&                  |k(  sJ t/        ||       c |j+                  g d      }|j                  j                  j&                  sJ t/        ||       |j+                  dgd	      |u sJ |j+                  g d	      |u sJ |j+                  dg      |u sJ |j+                  g       |u sJ t5        j6                  t8              5  |j+                  d       d d d        t5        j6                  t8              5  |j+                  d       d d d        y c c}}w # 1 sw Y   DxY w# 1 sw Y   y xY w)Nr   rT   r    y_)columnsr*   )r   rt   rR   Fr   rY   r   )r   r    r!   )r   )NTFr   r   )check_categoricalr   )r   split_everyT)ru   r   rA   )rx   foo)r6   r   r   r[   r5   r0   r1   r]   renamerI   r   rG   r\   r   rJ   
as_unknownrt   assignset_categoriesrK   r_   
categorizer   r
   r   r   rB   raises
ValueError)	pdfr:   rk   rl   rL   r   known_indexddf2ddf_known_indexs	            r;   test_categorizer      sZ    !*C $-6w-?
@'1bIq>2
@C
@
		#**,	d	$	j
3	4 
 		  ,CI
..!%
8Cuuyy##%CH

%%'CI		((*CI
**suuyy//@*
AC5599???vvzzyy}}""""	B$ 5(~~E~*ww{{    vvzzzz~~##{222$		3
"34N ~~Eq~9ww{{    vvzzzz~~##{222$		3
"34N ~~c~/77;;$$$$vvzzzz~~##{222$		3
"34N~~d%~0ww{{    vv||/1111zz~~##{222$7: nnRtn<O  $$****or" >>3%u>-444>>"E>*c111%%se,???%%b)_<<< 
z	" &1%& 
z	" *5)* *{ At& &* *s   U U4U"U"U+c                 x   t         j                  j                  t        g dd      } t	        | j
                  t        j                  g d             t	        | j                  d       t         j                  j                  t        g d      } t	        | j
                  t        j                  g d             t	        | j                  d       t         j                  j                  t        g dd      } t	        | j
                  t        j                  g d             t	        | j                  d       y )Nr@   bcF)rm   
categoriesr   )rm   r   )rA   d      T)	rG   categoricalcategorical_dtyper@   r
   r   r0   Indexr   )	cat_dtypes    r;   test_categorical_dtyper     s    00?E 1 I i""BHH_$=>i'00aO0TIi""BHH_$=>i'00=$ 1 I i""BHH]$;<i&    c                 $   t        j                         } t        j                  | d      }|j	                         }|j                         }|j                  j                  j                  sJ t        ||j                  t        j                  |j                              dd       |j                  d      |u sJ t        j                  |j                  |j                  j                  d            d      }|j	                         }|j                  d      }|j                  j                  j                  sJ t        ||j                  t        j                  |j                              dd       |j                         |u sJ y )Nr   r   F)check_divisionsrw   r   idxT)r   makeDataFramerG   r\   r_   r~   r   rJ   rK   r
   	set_indexr0   CategoricalIndexArz   )r   rL   rq   r   s       r;   test_categorize_indexr     s8   



!C
..!
,C[[]F>>D::>>,,V\\:;	 >>>&#--- ..))&((//%*@Aq
QC[[]F>>>%D::>>,,V\\:;	 >>s"""r   c                   t        j                  g dg dd      }t        j                  |d   g dd      |d<   t        j                  |d	      }t
        j                  j                  d
      5  |j                  d|j                  	      }|j                  d      |j                  d      }}t        |j                  j                               dgk(  sJ t        |j                  j                               g dk(  sJ |j                  |j                  |j                  	      }|j                  d      |j                  d      }}t        |j                  j                               dgk(  sJ t        |j                  j                               g dk(  sJ |j                  dg d|j                        }|j                  d      |j                  d      }}t        |j                  j                               dgk(  sJ t        |j                  j                               g dk(  sJ 	 d d d        y # 1 sw Y   y xY w)N)rA   r      rR   )r@   r   r   r   r   r    r    r   T)r   r   r   r   sync)	schedulerr   rA   r@   )r   r   r   )	divisionsr   )r0   	DataFrameCategoricalrG   r\   rD   rE   rF   r   r   get_partitionlistr   r_   sortedr    )ri   rk   r@   r   d1d2s         r;   test_categorical_set_indexr   :  s   	L/CD	EBnnRW$OBsG
rq)A	6	* =KKK7#Q__Q%7BBHH$$&'C5000bhh&&()_<<<KKK7#Q__Q%7BBHH$$&'C5000bhh&&()_<<<KKAMMKR#Q__Q%7BBHH$$&'C5000bhh&&()_<<<= = =s   4GIIncategories)rA   r   r   c                   d}||z  }t        |      D cg c]  }dt        |      z    }}t        j                  ||z  t        j
                  j                  |      d      }t        j                  ||       }|d   j                  d      j                  j                         |d<   |j                  d      }yc c}w )z(https://github.com/dask/dask/issues/5343r$   CAT)idvaluer   r   r*   N)rangestrr0   r   nprandomrG   r\   rI   rJ   
as_orderedr   )r   r   rows_per_categoryn_rowsr:   r   r   rL   s           r;   5test_categorical_set_index_npartitions_vs_ncategoriesr   P  s     ,,F*/*<=Q%#a&.=J=
,,--		8H8H8PQC ..+
6CD	  ,00;;=CI
--
C >s   B<c                   t        j                  t        d      t        d      d      }t	               rt        |      }t        j                  |d      }|d   j                  d      |d<   |j                  |       }|j                         }|d   j                  d      |d<   t        ||       t        ||       y )Nr$   
abababcbcbr   r   r   r    r*   )r0   r   r   r   r   r   rG   r\   rI   r^   copyr
   )r   rk   rL   r   s       r;    test_repartition_on_categoricalsr   a  s    	E"ID,>?	@B  r"
..
+C3xz*CH??{?3D	BgnnZ(BsGb#b$r   c                 P   t        j                  t        d      t        d      d      } | j                  j                  d      | _        t        j                  | d      }dt        |j                        v sJ dt        |j                        vsJ t        |j                  d      sJ t        |j                  d      rJ | j                  | j                        }t        j                  |dd	      }t        |j                  d
      sJ t        |j                  d
      rJ y )Naaaaabbbbbcccccr(   r   r*   r   r   rJ   Frv   r   )r0   r   r   r   r   rI   rG   r\   dirr    hasattrr   r   )rk   rL   df2r   s       r;   "test_categorical_accessor_presencer   s  s    	D!<=E"IN	OB44;;z"BD
..
+CCJCEE
"""355%   suue$$$
,,rtt
C>>#159D4::|,,,syy,////r   c                    t        j                  t        j                  ddddt	        d      gi      d      } t        j                  d      5 }| j                         j                          d d d        rJ y # 1 sw Y   xY w)	Nr   r@   r   nanr   r   T)record)	rG   r\   r0   r   floatwarningscatch_warningsr~   r_   )rk   r   s     r;   test_categorize_nanr     ss    	
cCc5<89:
B 
	 	 	- "
!":v" "s   A<<Bc                R    t        | t        j                        r| S | j                  S )N)
isinstancer0   r   rJ   )r   s    r;   get_catr     s     1b1121==r   c                ~    t        | t        |t        j                        rt	        j
                  |      n||       y)z@left and right are equal, treating index and array as equivalentr   N)r
   r   r   ndarrayr0   r   )leftrightr   s      r;   assert_array_index_eqr     s*    %eRZZ8e'r   c                    t        j                  dg di      } | d   j                  d      | d<   t        j                  | d      }|j
                  j                  j                         }t        |t        j                        sJ y )Nr   r   r*   r   )
r0   r   rI   rG   r\   r   rJ   as_knownr   Series)rk   dask_dfret_types      r;   !test_return_type_known_categoriesr     se    	sO,	-BgnnZ(BsGnnR#Gyy}}%%'Hh		***r   c                     e Zd Zej                  j                  de      ej                  j                  ddefdefdefg      d               Z	ej                  j                  de      ej                  j                  dd e
d	d
g      fdi fdi fdi fd e
dg      fd e
g d      fd e
g d      fd e
g d      fdi fg	      d               Zd Zej                  j                  de      d        Zd Zd Zy)TestCategoricalAccessorserieszprop, comparer   r   codesc                x    |\  }}t        t        |      |      }t        t        |      |      } |||d       y NFr   )getattrr   )selfr   propcomparesdsrp   rq   s           r;   test_propertiesz'TestCategoricalAccessor.test_properties  s9     271:t,d+%8r   zmethod, kwargsadd_categoriesde)new_categoriesr   as_unorderedremove_categoriesr@   )removalsrename_categories)r   r   freorder_categoriesr   r}   )r@   r   r   remove_unused_categoriesc                   t        j                  |fi |}|\  }} |t        |            } |t        |            }t        ||d       t        t        |j                        j
                  t        |      j
                  d       t        t        |j                        j                  t        |      j                  d       y r   )operatormethodcallerr   r
   _metar   r   )	r   r   methodkwargsopr   r   rp   rq   s	            r;   test_callablez%TestCategoricalAccessor.test_callable  s      ""64V4 2gaj>GBK&(E:FLL!,,H((!	

 	FLL!))H%%!	
r   c                    d }d }t        j                   t        j                  |              t        j                  |             g      }|j                  j
                  j                   y )Nc                     t        j                  dt        j                  t        j                  t        j                  g      i      S )Nr   )r0   r   r   r   r   rZ   r   r;   
make_emptyzBTestCategoricalAccessor.test_categorical_empty.<locals>.make_empty  s,    <<bnnbffbff5E&F GHHr   c                 Z    t        j                  dt        j                  ddg      i      S )Nr   r@   )r0   r   r   rZ   r   r;   	make_fullzATestCategoricalAccessor.test_categorical_empty.<locals>.make_full  s#    <<bnnc3Z&@ ABBr   )rG   from_delayedrD   delayedr   rJ   r   )r   r   r   r@   s       r;   test_categorical_emptyz.TestCategoricalAccessor.test_categorical_empty  sR    	I	C OO5T\\*579Pi9P9RST	r   c                   |\  }}|j                   j                  sJ |j                   j                         }|j                   j                  rJ t        j                  t
        d      5  |j                   j                   d d d        t        j                  t
        d      5  |j                   j                   d d d        t        j                  t        d      5  |j                   j                   d d d        t        j                  t        d      5  |j                   j                   d d d        |j                   j                  g d      }|j                   j                  sJ t        j                  |j                   j                  t        |      j                         t        |j                   j                  t        |      j                         |j                   j                         }|j                   j                  sJ |j                         }t        j                  |j                   j                  t        |      j                         t        |j                   j                  t        |      j                         y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nzwith unknown categoriesrW   r   )rJ   rK   r{   rB   r   NotImplementedErrorr   r   AttributeErrorr}   r   r4   r   r   r   r_   )r   r   r@   dadbress         r;   test_unknown_categoriesz/TestCategoricalAccessor.test_unknown_categories  s   2vv|||VV 66<<]].6OP 	FF	]].6OP 	FFLL	 ]]>1JK 	FF	]]>1JK 	FFLL	 VV""?3vv|||
bff//1F1FGbffllGAJ,<,<=VV__vv|||jjl
bff//1H1HIbffllGCL,>,>?'	 		 		 		 	s0   +J%JJ*J7JJ'*J47Kc                    t        j                  g dd      }t        j                  |d      }|j                  j                         }|j                  j                         }t        ||       y )N)r@   r@   r   r*   r   r   )r0   r   rG   r\   r   upperr
   )r   r@   r  rq   rp   s        r;   test_categorical_string_opsz3TestCategoricalAccessor.test_categorical_string_ops  sJ    IIoZ8^^Aq!55;;=&(#r   c                    t        j                  g dd      }t        j                  |d      }t	        j
                  t              5  |j                  j                          d d d        y # 1 sw Y   y xY w)N)rA   r   r   r*   r   r   )	r0   r   rG   r\   rB   r   r   r   r  )r   r@   r  s      r;   "test_categorical_non_string_raisesz:TestCategoricalAccessor.test_categorical_non_string_raises  sP    IIiz2^^Aq!]]>* 	FFLLN	 	 	s   	A--A6N)__name__
__module____qualname__rB   markparametrize
cat_seriesr   r
   r   dictr   r   r  r  r	  rZ   r   r;   r   r     s?   [[Xz2[[01	"+,	
9 39 [[Xz2[[tC:>?2R 2 $"67 $o"FG!4#GHt?CD',
	

 3
& [[Xz2@ 3@6$r   r   )F)P
__future__r   rd   r   r   numpyr   pandasr0   rB   rD   dask.dataframe	dataframerG   r   dask.dataframe._compatr   r   r   dask.dataframe._pyarrowr   dask.dataframe.corer	   dask.dataframe.utilsr
   r   r   r  r   r   r   r   r   r\   r   appendr   r_   r   r   aranger@   r   r   r/   r2   rk   r   rI   r    r|   rJ   r}   r   r5   r6   r   r7   r8   r<   r  gpurM   r  paramxfailfilterwarningsrr   r   r   r   r   r   r   r   r   r   r   r   r   r9   s   0r;   <module>r      s   "         " C C 5 ' U U 
 G		."..hABA	q	)Bq"g BIIeAhnbnnT(^<=R^^A1% 
  2::<%%rxx0 1 BLL']']RYYq\']RYYq% BLL']']RYYq"']RYYq"D) BLL']']RYYr2']RYYr2T* Q

 B44;;z"BD44;;z"BDNN
		ddhh%%d5k2ddhh%%d5k2 	 	
 &,
,1;;qss
,%,
-1;;qss
-,2
3q1;;QSSz"
3,3
4q1;;QSSz"
4'@T " " !!)M!N	
 	++##+O#P	
  r
3q!f-FG-.$  / H . 4!($ NE*P'$#B=, 	2	2 3 3 A/ 0"0 >+f fu
 -
-
3
4s   "T39"T8".T=.U