
    bi̱                    r   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Zd dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2 d d	l3m4Z4m5Z5 d d
l6m7Z7m8Z8 d dl9m:Z:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZE d dlFmGZGmHZHmIZImJZJmKZKmLZL  G d de      ZM G d deM      ZNd ZO G d deM      ZP G d deeN      ZQ G d deQ      ZR G d deQ      ZS	 	 	 	 	 	 d9dZT G d deQ      ZUd ZVd ZWd  ZXd! ZY G d" d#e      ZZ G d$ d%e      Z[ G d& d'e[      Z\ G d( d)e[      Z] G d* d+e\      Z^ G d, d-e      Z_ G d. d/e      Z` G d0 d1e      Za G d2 d3e      Zb G d4 d5e      Zc e7d6      Zd	 	 	 d:	 	 	 	 	 	 	 d;d7Ze	 	 	 d:	 	 	 	 	 	 	 d;d8Zfy)<    )annotationsN)CategoricalDtype)compute)TaskTaskRef)_concat)	Assign	BlockwiseExprFilterPartitionsFiltered
ProjectionToSeriesIndexdetermine_column_projectionis_filter_pushdown_available)AllAnyCountDropDuplicatesLenMaxMeanMemoryUsageMinModeNBytesNFirstNLargestNLast	NSmallestProdSizeSumUniqueValueCounts)RepartitionRepartitionToFewer)LRU_convert_to_list)is_categorical_dtype	make_meta)	barriercollectensure_cleanup_on_exceptionmaybe_buffered_partdpartitioning_indexset_partitions_preshuffle_groupshuffle_group_2shuffle_group_get)Mdigitget_default_shuffle_methodinsertis_index_likeis_series_likec                      e Zd Zg dZddddddZdZdZd Zd Ze	j                  d        Ze	j                  d	        Zd
 Zd Ze	j                  d        Zd Zy)ShuffleBase)framer0   npartitions_outignore_indexmethodoptionsindex_shuffleoriginal_partitioning_indexFN)r?   r@   rA   rB   rC   Tc                (    d| j                   dd   dS )NzShuffle(i)_nameselfs    \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/_shuffle.py__str__zShuffleBase.__str__^   s    $**RS/*!,,    c                2    | j                   | j                  gS N)r=   r0   rH   s    rJ   _node_label_argszShuffleBase._node_label_argsa   s    

D3344rL   c                P    | j                   }t        |t        t        f      r|g}|S rN   )r0   
isinstancestrint)rI   r0   s     rJ   _partitioning_indexzShuffleBase._partitioning_indexd   s,    !44(3*5"4!5!!rL   c                    | j                   xs | j                  }t        |t              rt	        |      hS t               S rN   )rC   rT   rQ   listtupleset)rI   idxs     rJ   -unique_partition_mapping_columns_from_shufflez9ShuffleBase.unique_partition_mapping_columns_from_shufflek   s4    ..J$2J2J)#t4c
|?#%?rL   c                   t        |t              r#| j                  ||      r| j                  |      S t        |t              rt        t        | ||            }| j                  }| j                  }|j                  D cg c]  }||v s||v r| }}t        |      t        |j                        k  r4 t        |       ||   g| j                  dd   |j                  d         S t        |t        t        t         t"        t$        t&        t(        t*        t,        t.        t0        t2        t4        t6        t8        t:        t<        t>        f      r) t        |      | j                  g|j                  dd   S y c c}w )N   columns) rQ   r   _filter_passthrough_available_filter_simplificationr   r)   r   rT   r=   r]   rX   typeoperandsoperandr$   r   r#   r!   r   r   r   r   r   r"   r   r   r   r   r   r    r%   r   )rI   parent
dependents
projectionr0   targetcolnew_projections           rJ   _simplify_upzShuffleBase._simplify_upp   sg   ff%$*L*LJ+
 ..v66fj) *+D&*EJ "&!9!9ZZF ">>--
1B N 
 >"S%88!tDz&"8M4==;LMNN9-  %
.  4<

AV__QR-@AA/
s    E8c                    t        |  d      )NzC is abstract! Please call `simplify`before generating a task graph.)NotImplementedErrorrH   s    rJ   _layerzShuffleBase._layer   s    !f . /
 	
rL   c                    | j                   j                  }| j                  r!| j                  dk(  r|j	                  d      }|S )NtasksTdrop)r=   _metar?   r@   reset_indexrI   metas     rJ   rq   zShuffleBase._meta   s=    zz!7###.DrL   c                &    d| j                   dz   z  S NrN   r\   )r>   rH   s    rJ   
_divisionszShuffleBase._divisions   s    $..233rL   )__name__
__module____qualname___parameters	_defaults_is_length_preserving_filter_passthroughrK   rO   	functoolscached_propertyrT   rZ   ri   rl   rq   rw    rL   rJ   r<   r<   I   s    	K '+I !-5 " " @ @/Bb
  4rL   r<   c                      e Zd ZdZd Zy)Shuffleau  Abstract shuffle class

    Parameters
    ----------
    frame: Expr
        The DataFrame-like expression to shuffle.
    partitioning_index: str, list
        Column and/or index names to hash and partition by.
    npartitions: int
        Number of output partitions.
    ignore_index: bool
        Whether to ignore the index during this shuffle operation.
    method: str or Callable
        Label or callback function to convert a shuffle operation
        to its necessary components.
    options: dict
        Algorithm-specific options.
    index_shuffle : bool
        Whether to perform the shuffle on the index.
    c                   | j                   }| j                  }| j                  xs
 t               }||j                  k  r|dk7  rt        ||      }| j                  | j                  | j                  | j                  | j                  g}|dk(  rt        |g| S |dk(  rt        |g| S |dk(  rt        |g| S |dk(  rt        |g| S t        | d      )Np2p)new_partitionsdisksimplern   z not supported)r=   r>   r@   r7   npartitionsr&   r0   r?   rA   rC   
P2PShuffleDiskShuffleSimpleShuffleTaskShuffle
ValueError)rI   r=   r>   r@   opss        rJ   _lowerzShuffle._lower   s    
 

..< : <U...6U?oFE ##  LL,,
 U?e*c**vu+s++x ---wu+s++x~677rL   N)rx   ry   rz   __doc__r   r   rL   rJ   r   r      s    *8rL   r   c                    t         j                  j                  j                  |       xsE t	        | t
              xr3 t         j                  j                  j                  | j                        S rN   )pdapitypesis_numeric_dtyperQ   r   
categories)dtypes    rJ   _is_numeric_cast_typer      sN    
%%e, 	<e-. <FFLL))%*:*:;rL   c                      e Zd Zd Zy)RearrangeByColumnc           	     .   | j                   }| j                  }| j                  }| j                  }| j                  }| j
                  }t        |t              r|g}|rn-t        |t        t        f      st        t        |       d      t        |t              sn|sl|D cg c]  }||j                  vs| }}t        |      dk(  r@t        |d|j                        } |j                          }d| |j                  |d         <   t#        ||d||j$                  |      }	t'        |	d||| j(                  || j*                        }
|j,                  dk(  r|
|	j                  d      S |
|
j                  D cg c]	  }|dvs| c}   S c c}w c c}w )Nz, not a supported type for partitioning_indexr\   _partitions_0r   _partitions)rC   )r   r   )r=   r0   r>   r?   rA   rB   rQ   rR   rV   r   r   r`   r]   lenr	   indexcopyAssignPartitioningIndexrq   r   r@   rT   ndim)rI   r=   r0   r>   r?   rA   rB   rg   csindex_addedshuffledcs               rJ   r   zRearrangeByColumn._lower   s   

!44..((,,** (#."4!5.t=*+,,XY  ,d3M!3P#s%--7O#PBP2w!|uou{{C%<%7%<%<%>"FU"#;#5#;#;BqE#BC .KK
 KK(,(@(@
 ::?K//233  ((V1A5U,UQV
 	
? Q@ Ws   "F6F:	FFN)rx   ry   rz   r   r   rL   rJ   r   r      s    5
rL   r   c                  d    e Zd Zg dZddddddZej                  d        Zed        Z	d Z
d	 Zy)
r   )r=   r0   r>   r?   rA   rC   r   Nr   F)r   rC   r0   r?   rA   c                .    | j                   j                  S rN   r=   rq   rH   s    rJ   rq   zSimpleShuffle._meta;      zzrL   c                    |t        | g| S t        | g| j                         D ci c]  \  }}||v s|| c}}S c c}}w )z$Filter the output of `shuffle_group`)r2   items)df_filterargskvs        rJ   _shuffle_groupzSimpleShuffle._shuffle_group?  sN     ? +d++!.r!9D!9!?!?!ARAQ'\1RRRs
   AAc                Z   d| j                   z   }d| j                   z   }| j                  }i }| j                  r| j                  nd}t	        | j                        D ]  \  }}t        | j                  j                        D cg c]  }|||f	 }	}t        |	| j                  f|| j                   |f<   |	D ]l  \  }
}}t        j                  ||f|f||||f<   ||f|vs)| j                  | j                  j                   |f|| j                  d||| j                  |f	|||f<   n  |S c c}w )z/Construct graph for a simple shuffle operation.group-split-Nr   )rG   r>   	_filteredr   	enumerateranger=   r   r   r?   operatorgetitemr   r0   )rI   shuffle_group_name
split_namer   dskr   global_partpart_outpart_in_concat_list_	_part_out_part_ins                rJ   rl   zSimpleShuffle._layerF  sd   %

2

*
**&*nn$""$%.t/?/?%@ 	!K  %TZZ%;%;< Xw/L 
 !!.C[)*
 +7 &9h$$'2:ZH56
 '1<++))84//##))#
;C+X67	: 
9s   D(c                     y rN   r   rH   s    rJ   r   zSimpleShuffle._lowerm  s    rL   )rx   ry   rz   r{   r|   r   r   rq   staticmethodr   rl   r   r   rL   rJ   r   r   (  s\    K '++I     S S%NrL   r   c                  F     e Zd ZdZej
                  d        Z fdZ xZS )r   z(Staged task-based shuffle implementationc                n    | j                   j                  }| j                  r|j                  d      }|S )NTro   )r=   rq   r?   rr   rs   s     rJ   rq   zTaskShuffle._metat  s2    zz###.DrL   c                   | j                   xs i j                  dd       xs d}| j                  j                  }t	        | j
                        |k  s||k  rt        |          S | j                  }t        t        j                  t        j                  |      t        j                  |      z              }|dkD  r%t        t        j                  |d|z  z              n|t        |z        D cg c]   t        fdt        |      D              " }}t        |      D ci c]  \  }}||
 }}}t        t	        |            }	i }
| j                  j                   }t#        | j                  j$                        }t        |      D ]?  }|}||dz
  k(  r.||k(  r)| j                   }| j
                  }	| j&                  r|	nd }nd| d| j                    }d }d|z   }d|z   }t        |	      D ]  \  }}||   }g }t              D ](  t)        ||      }||   }|j+                  |||f       * t,        || j.                  f|
||f<   |D ]y  \  }}}t0        j2                  ||f|f|
|||f<   ||f|
vs)||   }|d	k(  r||k  r||f}n||d
f}||
|<   n||f}| j4                  ||| j6                  ||| j.                  |f	|
||f<   {  B ||k7  rd|z   }t        |      D ci c]&  }||ft8        ||f| j6                  | j.                  |f( }}t        | j
                        D ]"  \  }t:        |||z  f|f|| j                   f<   $ |
j=                  |       |
S c c}w c c}}w c c}w )N
max_branch    r\   c              3  8   K   | ]  }t        |        y wrN   )r6   ).0jinsplitss     rJ   	<genexpr>z%TaskShuffle._layer.<locals>.<genexpr>  s     >1%1g&>s   zstage--r   r   r   emptyzrepartition-group-)rA   getr=   r   r   r   superrl   r>   rS   mathceillogr   rW   r   rG   r+   rq   r   r8   appendr   r?   r   r   r   r0   r3   r4   update) rI   r   npartitions_inputr   stagesr   inputsinpinp_part_map	parts_outr   name
meta_inputstage
name_inputr   r   r   r   partoutr   _inp_idxr   _part	input_keyrepartition_group_namedsk2pr   	__class__s         `                        @rJ   rl   zTaskShuffle._layer{  s   ll(b--lDAGR
 JJ22t J.2Cz2Q
 7>## **TYYtxx(9:TXXj=QQRSA:$))$5!f*$EFGG'G
 7F?+
 >f>>
 
 .7v->?61cQ??#f+&	 zztzz//0
6] ?	EJ!$8I)Izz ,,	'+~~)4wa

|4!)D!DJ%.y%9 1!TTl!w BA!#ua0Du:D ''T4(@A	B  %%,T;'( &2  MAtT ((+T25CT401 +D1< ,T 2 A:$'88-7,?	 .@w,O	1;I)3U(;I !//%# 33!#- --'
;/67- #1?	B ++%9D%@" 01	  (+#1I++%%. 	D 	 "$"2"23 1%+Q1B-BC)djj!_% JJt
E
 @V	s   9%M(.M-.+M3)	rx   ry   rz   r   r   r   rq   rl   __classcell__)r   s   @rJ   r   r   q  s*    2 u urL   r   c                  &    e Zd ZdZed        Zd Zy)r   z!Disk-based shuffle implementationc                    t        |      5  | j                  |      }|j                  D ci c]  }||v s||j                  |       }}|j	                  |d       d d d        y c c}w # 1 sw Y   y xY w)NT)fsync)r.   groupbygroups	get_groupr   )r   rg   r   r   gr   ds          rJ   r   zDiskShuffle._shuffle_group  sn    (+ 	$

3A,-HHEqWAKKN"EAEHHQdH#	$ 	$E	$ 	$s!    A-	A(A(
A-(A--A6c           
     h   ddl m} | j                  }| j                  }t	        j
                         j                  }d|z   f} ||j                        }|t        |      fi}d|z   }t        |j                               D 	
ci c]#  \  }	}
||	f| j                  |
|| j                  |f% }}	}
d|z   f}|t        t        |      fi}t        | j                        D ci c]'  \  }}| j                  |ft         |||j                  |f) }}}t#        j$                  ||||      S c c}
}	w c c}}w )Nr   )partd_encode_dispatchzzpartd-)
encode_clszshuffle-partition-zbarrier-)dask.dataframe.dispatchr   r0   r=   uuiduuid1hexrq   r/   r   __dask_keys__r   r   r,   rV   rG   r-   toolzmerge)rI   r   columnr   always_new_tokenr   r   dsk1r   r   keyr   barrier_tokendsk3r   r   dsk4s                    rJ   rl   zDiskShuffle._layer  sF   A((ZZ::<++))+*2884
(J?AB $&66 $B$4$4$67
3 1I++S&$:J:JANN
 
 $&668d45
 "$"2"23
1 ZZOgq!RXX}EE
 

 {{4tT22

s   (D(!,D.N)rx   ry   rz   r   r   r   rl   r   rL   rJ   r   r     s    +$ $3rL   r   c                "    ddl m}  || ||      S )Nr   )shuffle_transfer)distributed.shuffle._shuffler  )inputidinput_partitionr  s       rJ   _shuffle_transferr    s    
 >
 rL   c                  :    e Zd ZdZej
                  d        Zd Zy)r   z'P2P worker-based shuffle implementationc                b    | j                   j                  j                  | j                        S )N)r]   )r=   rq   rp   r0   rH   s    rJ   rq   zP2PShuffle._meta.  s&    zz$$T-D-D$EErL   c                   ddl m}m}m}m} ddlm}m} i }| j                  j                  d      d   } ||      }	 ||	      }
d|z   }| j                  r| j                  nt        t        | j                              }| j                  rt        | j                        n| j                  }t               }t        | j                   j"                        D ]d  }t%        ||ft&        t)        | j                   j                  |f      ||      }|||j*                  <   |j-                  |j/                                f  ||
||g|d ||	| j                  | j0                  | j                   j2                  |dd	      i}|||j*                  <   | j                  }t5        |      D ]4  \  }}t%        ||f||| |j.                               }|||j*                  <   6 |S )
Nr   )P2PBarrierTask	ShuffleIdbarrier_keyp2p_barrier)DataFrameShuffleSpecshuffle_unpackr   zshuffle-transfer-specT)r  r   r  rt   r   r   drop_column)distributed.shuffle._corer  r  r  r  r  r  r  rG   splitr   r   rV   r   r>   rW   r=   r   r   r  r   r  r   refr0   rq   r   )rI   r  r  r  r  r  r  r   token
shuffle_id_barrier_keyr   r   parts_out_argtransfer_keysr   tr,   r   s                      rJ   rl   zP2PShuffle._layer2  s   	
 	
 	V

  %b)u%
":."U* !%DDt?S?S9T4U 	
 (,~~E$""#4;O;O 	 tzz--. 		*Aq	!))1-.A CJ  )		* !
 	

 & 00..ZZ%%' 
 #GKK zz$Y/ 	KAxq	A CJ	 
rL   N)rx   ry   rz   r   r   r   rq   rl   r   rL   rJ   r   r   +  s$    1F F?rL   r   c                    t        |t              r|n|g}|D cg c]  }t        | |      s| }}| |   }t        | |      r|j	                  | j
                        }|S c c}w )z
    Make a column selection that may include the index

    Parameters
    ----------
    columns_or_index
        Column or index name, or a list of these
    )_index)rQ   rV   _is_column_label_reference_contains_index_nameassignr   )r   columns_or_indexncolumn_namesselected_dfs        rJ   _select_columns_or_indexr/  y  su     ''7>EUDV   0U!3MbRS3TAULU\"KB 01!(((9 Vs
   A!A!c                    t        |t               xrb t        j                  |      xs; t        j
                  j                  j                  |      xs t        |t              xr || j                  v S )z
    Test whether a key is a column label reference

    To be considered a column label reference, `key` must match the name of at
    least one column.
    )
rQ   r   npisscalarr   r   r   	is_scalarrW   r]   )r   r  s     rJ   r(  r(    s]     sD!! 	[[V!7!7!<V
3PU@V	2::rL   c                d     t        |t              rt         fd|D              S t         |      S )zL
    Test whether the input contains a reference to the index of the df
    c              3  6   K   | ]  }t        |        y wrN   )_is_index_level_reference)r   r,  r   s     rJ   r   z'_contains_index_name.<locals>.<genexpr>  s     N,R3N   )rQ   rV   anyr6  )r   r+  s   ` rJ   r)  r)    s0     "D)N=MNNN(-=>>rL   c                   t        | t              r | j                  j                  j                  n| j                  j                  }|duxr} t        |t               xrj t        j                  |      xs; t        j                  j                  j                  |      xs t        |t              xr ||k(  xr |t        | dd      vS )z
    Test whether a key is an index level reference

    To be considered an index level reference, `key` must match the index name
    and must NOT match the name of any column.
    Nr]   r   )rQ   r   r   rq   r   r1  r2  r   r   r   r3  rW   getattr)r   r  
index_names      rJ   r6  r6    s     )32t(<$$"((--J$ 	23%%	2[[V!7!7!<V
3PU@V	2 :	2 wr9b11rL   c                  <    e Zd ZdZg dZdddddZdZed	d       Zy)
r   aU  Assign a partitioning index

    This class is used to construct a hash-based
    partitioning index for shuffling.

    Parameters
    ----------
    frame: Expr
        Frame-like expression being partitioned.
    partitioning_index: Expr or list
        Index-like expression or list of columns to construct
        the partitioning-index from.
    index_name: str
        New column name to assign.
    npartitions_out: int
        Number of partitions after repartitioning is finished.
    index_shuffle : bool, default False
        Whether we are using solely the index for the shuffle
    )r=   r0   r;  r>   rt   rB   NFr   )
cast_dtyperB   r0   r;  Tc                `   fd} |||      } |||       }i }|j                   j                         D ]$  \  }	}
t        |
      st        j                  ||	<   & |r|j                  |d      }t        ||      }| j                  dk(  r| j                         }  | j                  di ||iS )z)Construct a hash-based partitioning indexc                    t        | d      r!| j                  dk(  r| j                         } | S r*| dgk(  rddini } |j                  j                  di |} | S t	        ||       } | S )Nr   r\   r'  r   r   )hasattrr   to_framer   r/  )rY   objr   rB   s      rJ   
_get_indexz5AssignPartitioningIndex.operation.<locals>._get_index  sw    sF#88q=,,.C J -0XJ->)B(cii((040 J /sC8JrL   ignore)errorsr\   r   )
dtypesr   r   r1  float64astyper0   r   rA  r*  )r   r   r   r   rt   rB   rC  
meta_indexrF  rg   r   s        `     rJ   	operationz!AssignPartitioningIndex.operation  s    
	  t,
5"%$++113 	)JC$U+ jjs	) LLL9E"5+677a<Bryy)D%=))rL   )r   rR   r   rS   rB   bool)	rx   ry   rz   r   r{   r|   #_preserves_partitioning_informationr   rJ  r   rL   rJ   r   r     s=    (K +#	I +/'* *rL   r   c                  6    e Zd ZdZd Zed        Zed        Zy)BaseSetIndexSortValuesTc                R   d| j                   v r| j                  | j                  S | j                  dk(  ryt        | j                  j
                        rY| j                  j                  rC| j                  j                  | j                  j                  k(  r| j                  j                  S t        | j                  | j                  | j                  | j                  | j                        \  }}}}|r/t        |      | j                  k(  r|j                         |d   gz   }|S )Nuser_divisionsr\   NNupsampler  )r{   rP  _npartitions_inputr9   _divisions_columnrq   known_divisionsr   r=   other	divisions_get_divisions	ascendingrS  r   r   rI   rX  minsmaxes	presorteds        rJ   rw   z!BaseSetIndexSortValues._divisions  s    t///D4G4G4S&&&""a' $00667&&66&&22djj6L6LL::''',:JJ""##NN]]-
)	4	 Td&=&==		uRyk1IrL   c                T    | j                  d      xs | j                  j                  S )Nr   )rb   r=   r   rH   s    rJ   rT  z)BaseSetIndexSortValues._npartitions_input  s     ||M*Ddjj.D.DDrL   c                `    | j                  d      xs t        | j                               dz
  S )Nr   r\   )rb   r   rw   rH   s    rJ   r   z"BaseSetIndexSortValues.npartitions!  s'    ||M*Hc$//2C.Dq.HHrL   N)rx   ry   rz   r}   rw   propertyrT  r   r   rL   rJ   rN  rN    s9     0 E E I IrL   rN  c            
          e Zd ZdZg dZdddddddddd	ZdZed	        Ze	j                  d
        Zed        Zed        Zd Zd Zd Zy)SetIndexa  Abstract ``set_index`` class.

    Simplifies (later lowers) either to Blockwise ops if we are already sorted
    or to ``SetPartition`` which handles shuffling.

    Parameters
    ----------
    frame: Expr
        Frame-like expression where the index is set.
    _other: Expr | Scalar
        Either a Series-like expression to use as Index or a scalar defining the column.
    drop: bool
        Whether we drop the old column.
    sorted: str
        No need for shuffling if we are already sorted.
    user_divisions: int
        Divisions as passed by the user.
    upsample: float
        Used to increase the number of samples for quantiles.
    )r=   _otherrp   rP  partition_sizerZ  r   rS  shuffle_methodr   rA   TN    A      ?F)	rp   rP  re  rZ  r   rS  rf  rA   r   c                r    | j                   t        | j                  t              s| j                  gz   S g z   S rN   )r]   rQ   rd  r   rH   s    rJ   _projection_columnszSetIndex._projection_columnsV  s6    ||!+DKK!>T[[M
 	
DF
 	
rL   c                    t        | j                  t              r| j                  j                  }n| j                  }| j                  j                  j                  || j                        S Nro   )rQ   rd  r   rq   r=   	set_indexrp   )rI   rW  s     rJ   rq   zSetIndex._meta\  sL    dkk4(KK%%EKKEzz))%dii)@@rL   c                    | j                   S rN   )rW  rH   s    rJ   rU  zSetIndex._divisions_columnd  s    zzrL   c                    t        | j                  t              r| j                  S | j                  | j                     S rN   )rQ   rd  r   r=   rH   s    rJ   rW  zSetIndex.otherh  s-    dkk4(;;zz$++&&rL   c                ~   | j                  d      dk(  sN| j                  j                  dk(  r| j                  t	        | j                        dk(  ro| j                  d      ^| j                  }| j                  j                  dkD  rt        |d      }t        || j                  | j                  d       }t        |      S | j                  | j                         }t        | j                  j                        r| j                  j                  |k(  rd}nEt!        | j                  | j                  | j"                  | j$                  | j&                        d   }|re| j                  | j                  j                  k(  rBt        | j                  | j                  | j                  || j(                        }t        |      S t+        | j                  | j                  | j                  | j                  | j"                  nd | j$                  | j&                  | j                  | j,                  | j.                  	      S )Nr   r\      TrR     )rb   r=   r   rP  r   r'   SetIndexBlockwiserd  rp   SortIndexBlockwiserw   r9   rU  rq   rW  rX  rY  rT  rZ  rS  r   SetPartitionrf  rA   )rI   expr	index_setrX  r^  s        rJ   r   zSetIndex._lowern  s   LL'1,zz%%*$$,D4G4G0HA0M]+3::Dzz%%))$2)$TYYMI%i00&)Id44::;JJ((I5 	*JJJJ++NN!]] 	 T--1G1GG-JJTYY	4;;	 *)44JJKKII'+':':'BD##NNMMLL

 
	
rL   c                   ddl m}m}m} t	        ||      rt	        | j
                  t        t        f      rf| j
                  | j                  j                  v rDt        | j                  |j                  | j
                  d      }t        || j
                        S t	        ||      rt	        | j
                  t        t        f      rf| j
                  | j                  j                  v rDt        | j                  |j                  | j
                  d      }t        || j
                        S t	        |t              rt	        | j
                  t              s| j
                  gng }t!        | |||      }	t#        |	      }	| j                  j                  D 
cg c]	  }
|
|	v s|
 }	}
| j                  j                  |	k(  ry  t%        |       t%        |       | j                  |	   g| j&                  dd   |j)                  d            S t	        ||      r$| j+                  ||      r| j-                  |      S y y c c}
w )	Nr   r   HeadTailTr,  _columnsrZ  )rd  additional_columnsr\   r]   )dask.dataframe.dask_expr._exprr   rz  r{  rQ   rd  rS   rR   r=   r]   r   r,  rc  r   r   r   r   r)   r`   ra   rb   r^   r_   )rI   rc   rd   r   rz  r{  headtailaddition_columnsr]   r   s              rJ   ri   zSetIndex._simplify_up  s   EE vt$4;;c
3tzz111$**4;;RVWDD55 vt$4;;c
3tzz111vxx$++QUVDD55fj)%/T%B  2fj=MG 'w/G"&**"4"4EQWqEGEzz!!W,4<T
4::g.Cqr1BCy)  ff%$*L*LJ+
 ..v66+
% Fs   	I $I c                    t        | ||      r5ddlm |j                  }t	        fd|j                         D               S y)Nr   )Indexc              3  6   K   | ]  }t        |        y wrN   )rQ   )r   xr  s     rJ   r   z9SetIndex._filter_passthrough_available.<locals>.<genexpr>  s     BA:a/Br7  F)r   r  r  	predicater8  walk)rI   rc   rd   r   r  s       @rJ   r^   z&SetIndex._filter_passthrough_available  s:    'fjA<  ABBBBBrL   )rx   ry   rz   r   r{   r|   r~   ra  rj  r   r   rq   rU  rW  r   ri   r^   r   rL   rJ   rc  rc  &  s    *K 
I 
 

 A A   ' '
.
`&7PrL   rc  c            
          e Zd Zg dZdddddddddd	ZdZd	 Zedd
       Zed        Z	ed        Z
ej                  d        Zej                  d        Zd Zd Zy)
SortValues)r=   byrZ  na_positionr   re  sort_functionsort_function_kwargsrS  r?   rf  rA   rg  TNlastrh  F)	re  rZ  r   r  r  r  rS  r?   rf  c                0   | j                   j                  dk(  ryt        | j                   | j                   | j                  d      | j                  | j
                  | j                        \  }}}}|r| j                   j                  S dt        |      z  S )Nr\   rQ  r   rR  rN   )	r=   r   rY  r  rT  _divisions_ascendingrS  rX  r   r[  s        rJ   rw   zSortValues._divisions  s    ::!!Q&,:JJJJtwwqz"##%%]]-
)	4	 ::'''Y''rL   c                l    | j                   }t        |t              s|d   }t        |t              sJ |S Nr   )rZ  rQ   rK  )rI   divisions_ascendings     rJ   r  zSortValues._divisions_ascending  s9    "nn-t4"5a"8-t444""rL   c                f    | j                  d      | j                  d      S t        j                  S )Nr  )rb   r5   sort_valuesrH   s    rJ   r  zSortValues.sort_function  s*    <<(4<<00}}rL   c                    | j                   | j                  | j                  | j                  d}| j	                  d       |j                  | j	                  d             |S )N)r  rZ  r  r?   r  )r  rZ  r  r?   rb   r   )rI   sort_kwargss     rJ   r  zSortValues.sort_function_kwargs	  s[     ''++ --	
 <<./;t||,BCDrL   c                .    | j                   j                  S rN   r   rH   s    rJ   rq   zSortValues._meta  r   rL   c                    | j                   j                  | j                     }t        |      r|j                  d   }|S r  )rq   rF  r  r:   iloc)rI   r   s     rJ   _meta_by_dtypezSortValues._meta_by_dtype  s4    

!!$''*% JJqMErL   c                   | j                   j                  dk(  r+t        | j                   | j                  | j                        S | j                   | j
                  d      }t        | j                   || j                  | j                  | j                        \  }}}}|rN| j                  | j                   j                  k(  r+t        | j                   | j                  | j                        S t        ||j                  j                  |      j                         | j                        }t        | j                   d|      }t        |dt!        |      dz
  | j"                  | j$                  | j&                        }t)        || j                   j*                        }t        || j                  | j                        S )Nr\   r   rR  rZ  r   r>   r?   r@   rA   )r=   r   SortValuesBlockwiser  r  r  rY  rT  r  rS  _SetPartitionsPreSetIndexrq   _constructorr  r	   r   r   r?   rf  rA   r   r]   )rI   _divisions_byrX  r   r^  
partitionsassignedr   s           rJ   r   zSortValues._lower   s|   ::!!Q&&

D..0I0I  

4771:.%3JJ##%%]]&
"	1a ))TZZ-C-CC&

D..0I0I  /,,Y7CCE//


 $**mZ@	NQ.**&&LL
 h

(:(:;"d(($*C*C
 	
rL   c                   ddl m}m}m} t	        ||      r7t        | j                  |j                  | j                  | j                        S t	        ||      r7t        | j                  |j                  | j                  | j                        S t	        ||      r#| j                  ||      r| j                  |      S t	        |t              rt        | ||| j                        }t        |      }| j                  j                   D cg c]	  }||v s| }}| j                  j                   |k(  ry  t#        |       t#        |       | j                  |   g| j$                  dd   |j'                  d            S t	        |t(              rX|j'                  d      F t#        |        t#        |      | j                  g|j$                  dd   g| j$                  dd   S y y c c}w )Nr   ry  r|  r~  r\   r]   r   )r  r   rz  r{  rQ   r   r=   r,  r  rZ  r   r^   r_   r   r   r)   r]   r`   ra   rb   r&   )rI   rc   rd   r   rz  r{  r]   rg   s           rJ   ri   zSortValues._simplify_upF  s   EEfd#

fhhDNN  fd#

fhhDNN  ff%$*L*LJ+
 ..v66fj)1fjTWWG 'w/G&*jj&8&8KsC7NsKGKzz!!W,4<T
4::g.Cqr1BCy) 
 v{+/0<4:VTZZ>&//!"*=>AEqrAR  = , Ls   	G*G*)returnrK  )rx   ry   rz   r{   r|   r~   rw   ra  r  r  r  r   r   rq   r  r   ri   r   rL   rJ   r  r    s    K   $
I (  # #  
 	 	      $
L#rL   r  c                      e Zd ZdZg dZd Zy)ru  a  Shuffles the DataFrame according to its new divisions.

    Simplifies the Expression to blockwise pre-processing, shuffle and
    blockwise post-processing expressions.

    Parameters
    ----------
    frame: Expr
        Frame-like expression where the index is set.
    _other: Expr | Scalar
        Either a Series-like expression to use as Index or a scalar defining the column.
    drop: bool
        Whether to drop the old column.
    new_divisions: int
        Divisions of the resulting expression.
    )	r=   rd  rp   r   rZ  rS  rP  rf  rA   c           	        | j                   j                  j                  | j                               }t	        | j                   |      }t        | j                  d|      }t        | j                  t              rt        |d| j                        }t        |dt        | j                               dz
  d| j                  | j                        }t        ||j                  D cg c]
  }|dk7  s	| c}      }t        | j                  t              rd\  }}n,| j                   | j                   j                  j"                  }}| j                   j$                  | j&                  | j(                  d| j*                  f}t,        j/                  |      }	t1        || j                   j                  j"                  ||| j                  j                  j                  j2                  |	| j4                        }
t7        |
      S c c}w )Nr   r'  r\   Tr  )Tr'  rg  )rW  rq   r  rw   r  r	   r=   rQ   rd  r   r   r   rf  rA   r   r]   rp   r   rG   rT  rZ  rS  divisions_lrur   _SetIndexPostr   rP  rt  )rI   rX  r  r  r   r   rp   set_namelru_keycomputed_divisionsrw  s              rJ   r   zSetPartition._lower  s   JJ$$11$//2CD	.tzz9E
$**mZ@dkk4(h$++>H 12Q6&&LL
 ("2"2IQa=6HqI
 dkk4(+ND(!YY

(8(8(=(=(DJJ##NNMM
 +..w7!JJ!!JJ$$**
	 "),,1 Js   %
H
0H
N)rx   ry   rz   r   r{   r   r   rL   rJ   ru  ru  l  s    "
K'-rL   ru  c                  V    e Zd Zg dZdddZ ee      ZdZe	j                  d        Zy)r  )r=   new_divisionsrZ  r  Tr  )rZ  r  c                `    t        | j                  j                  j                  dg            S r  )r+   r=   rq   r  rH   s    rJ   rq   z_SetPartitionsPreSetIndex._meta  s$    ))66s;<<rL   N)rx   ry   rz   r{   r|   r   r1   rJ  r}   r   r   rq   r   rL   rJ   r  r    s:    HK"6:I/0I = =rL   r  c                  F    e Zd Zg dZdZedd       Zed        Zd Z	d Z
y)	r  )r=   r;  rp   r  column_dtyper  rP  Tc                     | j                   d d S )N   )ra   rH   s    rJ   _argsz_SetIndexPost._args  s    }}Ra  rL   c                    | j                  ||      } || j                  _        | j                  j	                  |      | _        | S rl  )rm  r   r   r]   rH  )r   r;  rp   r  r  s        rJ   rJ  z_SetIndexPost.operation  s:    \\(\."ZZ&&|4
	rL   c           	     r   | j                   j                  t        |      dz
  k  rt        | j                   j	                  t
                    }t        |      dkD  r?t        t        |      D cg c]  \  }}||d   j                  v r| c}}|d   gz         S | j                   j                  S |S c c}}w )Nr\   r   r  )
r=   r   r   rV   find_operationsr   rW   r   r   rX  )rI   rX  part_filterr   divs        rJ   _get_culled_divisionsz#_SetIndexPost._get_culled_divisions  s    ::!!C	NQ$66tzz99:LMNK;!# '0	&:"AsA : :: 
 !}o&  zz+++s   /B3
c                    | j                  d       | j                  | j                  d            S | j                  J | j                  | j                  d         S )NrP  r   )rb   r  r  rH   s    rJ   rw   z_SetIndexPost._divisions  s[    <<()5--dll;K.LMM&&222))$*A*A!*DEErL   N)r  rV   )rx   ry   rz   r{   r}   ra  r  r   rJ  r  rw   r   rL   rJ   r  r    sB    K !! !  "FrL   r  c                  2    e Zd ZdZdgZej                  ZdZy)rt  Tr=   N)	rx   ry   rz   _projection_passthroughr{   r5   
sort_indexrJ  r}   r   rL   rJ   rt  rt    s    ")KI rL   rt  c                  X    e Zd ZdZg dZddgZdZed        Ze	j                  d        Zy)	r  F)r=   r  r  r  r  Tc                 Z    |j                  d      }|j                  d      } || i ||S )Nr  r  )pop)r   kwargs	sort_funcr  s       rJ   rJ  zSortValuesBlockwise.operation  s3    JJ/	jj/$8&8K88rL   c                .    | j                   j                  S rN   r   rH   s    rJ   rq   zSortValuesBlockwise._meta  r   rL   N)rx   ry   rz   r  r{   _keyword_onlyr}   r   rJ  r   r   rq   r   rL   rJ   r  r    sI    #;K$m4M 9 9
    rL   r  c                  L    e Zd Zg dZddddZg dZdZdZed        Z	d Z
d	 Zy)
rs  )r=   rW  rp   r  r   FNT)r   r  rp   )rp   r  r   c               &     | j                   |i |S rN   )rm  )r   r  r   r  s       rJ   rJ  zSetIndexBlockwise.operation  s    r||T,V,,rL   c                |    | j                   d| j                  j                  dz   z  S t        | j                         S rv   )r  r=   r   rW   rH   s    rJ   rw   zSetIndexBlockwise._divisions  s9    %djj44q899T''((rL   c                   t        |t              rt        | ||t        | j                              }| j
                  j                  |k(  ry | j
                  j                  D cg c]	  }||v s| }} t        |       t        |       | j
                  |   g| j                  dd   |j                  d            S y c c}w )Nr~  r\   r]   )
rQ   r   r   r)   rW  r=   r]   r`   ra   rb   )rI   rc   rd   r]   rg   s        rJ   ri   zSetIndexBlockwise._simplify_up  s    fj)1#3DJJ#?	G zz!!W,&*jj&8&8KsC7NsKGK4<T
4::g.Cqr1BCy)  * Ls   %	C/C)rx   ry   rz   r{   r|   r  r}   rL  r   rJ  rw   ri   r   rL   rJ   rs  rs    s@    GK 4FI7M *.'- -)
rL   rs  
   c                |    |j                   ||||f}|t        v r	t        |   S t        | |||||      }|t        |<   |S rN   )rG   r  _calculate_divisions)r=   rW  r   rZ  re  rS  r  results           rJ   rY  rY  *  sS     ;;Y
IC
mS!!!uk9nhF  M#MrL   c           
     	    ddl m}m} t        j                        rt              t        j                  j                        r* |      j                  j                         j                  	 t         | |||             |      j                  t        j                         |      j                  t        j                              \  }}	}
g }t#        j2                  |      j5                         }|rt7        |      }t        t9        j:                  ||z        d      }t        | j<                        }|j>                  }	 tA        jB                  tA        jD                  d|dz
  |dz         tA        jD                  d|dz
  |      |jG                               jG                         }nV|j>                  }tQ        |jN                  d |dz
   jS                               |jN                  |dz
  d  jG                         z   }|	jU                         }	|
jU                         }
tW        j                  j                  t"        jX                        r8j                  j                  }|	jK                  |      }	|
jK                  |      }
|	j3                         j+                         s|
j3                         j+                         rd}n|	j>                  }|r|
jN                  d |dz
   n|
jN                  dd  j[                  d      }|r|	jN                  dd  n|	jN                  d |dz
   j[                  d      }|	jG                         |	j]                  |      jG                         k(  xrF |
jG                         |
j]                  |      jG                         k(  xr ||k  j5                         }||	jG                         |
jG                         |fS # t         $ r}t"        j$                  j&                  j)                  j                  j                        sVt+         fd j,                  D              rddj.                   dfnd\  }}t1        d	| d
j.                   d| d      ||d }~ww xY w# t         tH        f$ rQ tA        jD                  d|dz
  |dz         jK                  tL              }|jN                  |   jG                         }Y w xY w)Nr   )RepartitionQuantilesnew_collectionrR  c              3  V   K   | ]   }j                   |   j                   k(   " y wrN   rF   )r   r   r=   rW  s     rJ   r   z'_calculate_divisions.<locals>.<genexpr>Z  s"     Lu{{eAhnn4Ls   &)r  z`.dropna(subset=['z'])`)seriesz`.loc[series[~series.isna()]]`z-Divisions calculation failed for non-numeric z 'z}'.
This is probably due to the presence of nulls, which Dask does not entirely support in the index.
We suggest you try with .r\   )r  xpfpFTro   r  )/dask.dataframe.dask_exprr  r  r9   rq   r   r*   r   cat
as_ordered_exprr   map_partitionsr5   minmax	TypeErrorr   r   r   r   r8  r]   r   rk   isnaallsumr   r   r   sizer1  interplinspacetolistr   rH  rS   r  rV   uniquebfillrQ   r   rr   r  )r=   rW  r   rZ  re  rS  r  r  rX  r\  r]  erB  suggested_methodsizesempty_dataframe_detectedtotalr,  indexesr   r^  maxes2mins2s   ``                     rJ   r  r  <  s+    NU[[!e$EKK--.u%))446<<!(/{XVW5!0075!007"
	42 E!wwy1557E
$))EN$:;Q?+u'8'89NN	9		++aQa8;;q!a%+##% fh	  NN!a%(//12Y^^AEG5L5S5S5UU 	 ::<DKKME%++##R%8%89!!{{5!U#yy{EJJL,,.	II)2%**Wq1u%

12SS T 
 #,127QU1CPP Q 
 KKMT--	-BIIKK '%"3"3i"3"H"O"O"QQ'%$$& 	
 dkkmU\\^Y>>E  
 vv||,,U[[->->? LemmLL /

|4@AA "C!
 &?uBuzzl S++;*<A? 	 G%D :& 	9kk!QUK!O<CCCHG!w/668I	9s.   5A&O" A'R "	R+BR  RAS('S()r  zpd.DataFramer  rS   r  rS   )Trg  rh  )r   rS   rZ  rK  re  floatrS  r  )g
__future__r   r   r   r   r   numpyr1  pandasr   tlzr  r   daskr   dask._task_specr   r   dask.dataframe.corer   r  r	   r
   r   r   r   r   r   r   r   $dask.dataframe.dask_expr._reductionsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   %dask.dataframe.dask_expr._repartitionr&   r'   dask.dataframe.dask_expr._utilr(   r)   r   r*   r+   dask.dataframe.shuffler,   r-   r.   r/   r0   r1   r2   r3   r4   
dask.utilsr5   r6   r7   r8   r9   r:   r<   r   r   r   r   r   r   r  r   r/  r(  r)  r6  r   rN  rc  r  ru  r  r  rt  r  rs  r  rY  r  r   rL   rJ   <module>r     s(   "        #  ) '
 
 
     , R @ C
 
 
 f4$ f4R28k 28j6
 6
rF& FR- D'3- '3T  		F F\2?"D*i D*N!IT !IHf% fRZ' ZzE-8 E-P=	 =,FI ,F^! ! )  "	 B B !  	
  , !X? X? 	X?
 X? X?rL   