
    biF                       d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z)  G d de      Z* G d de*      Z+ G d de*      Z, G d de*      Z- G d de*      Z. G d de*      Z/d Z0 e"d      Z1d Z2d  Z3y)!    )annotationsN)Callable)getitem)pformat)Any)is_datetime64_any_dtypeis_numeric_dtype)unique)methods)_concat_map_freq_to_period_startsplit_evenly)ExprFilter
Projectionplain_column_projection)TotalMemoryUsageFrame)LRU)is_series_like)tokenize)iter_chunksparse_bytesc                       e Zd ZdZg dZdddddZdZdZej                  d        Z
d Ze fd	       Zej                  d
        Zd Zd Zej                  d        Z xZS )Repartitionz"Abstract repartitioning expression)framenew_partitionsnew_divisionsforcepartition_sizeNF)r   r   r   r   Tc                .    | j                   j                  S N)r   _metaselfs    `/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/_repartition.pyr"   zRepartition._meta0   s    zz    c                    | j                  d      | j                  "| j                  d      }|j                         S | j                  S )Nr   F)fuse)operandr   optimize
_divisionsr   )r$   xs     r%   r+   zRepartition._divisions4   sF    LL)*6"".5)A<<>!!!!r&   c                    d| j                   v rP| j                  d      ?| j                  d      }t        |t              r || j                  j
                        S |S t        |   S Nr   )_parametersr)   
isinstancer   r   npartitionssuper)r$   r   	__class__s     r%   r1   zRepartition.npartitions=   sb      0 00-.:!\\*:;N.(3%djj&<&<==!!w""r&   c                    d| j                   v rJ| j                  d      9| j                  | j                  j                  k  r| j                  j                  S t               S r.   )r/   r)   r1   r   -unique_partition_mapping_columns_from_shufflesetr#   s    r%   r5   z9Repartition.unique_partition_mapping_columns_from_shuffleI   sQ      0 00-.:  DJJ$:$::::KKK5Lr&   c                   t        |       t        k7  ry | j                  d      | j                  | j                  j
                  k  r%t        | j                  | j                  d            S | j                  | j                  j
                  k(  r| j                  S t        j                  | j                  j                        j                         x}}| j                  j                  rt        |j                        st        |j                        r| j                  }| j                  }t        |j                        r|j                  j!                  d      }t#        |      r|j                  }t%        |      }t'        j(                  t'        j*                  d||dz         t'        j*                  d||      |      }t        |j                        rBt-        j.                  t        j                  |      j!                  |j                              }nIt'        j0                  |j                  t&        j2                        r|j!                  |j                        }t5        |t&        j6                        r|j/                         }t9        |      }|j                  d   |d<   |j                  d   |d<   t9        t;        |d d             |d   gz   }t=        ||| j>                        S tA        | j                  | j                  d            S | jB                  rtE        | jB                        | j                  j                  k(  r| j                  S | j                  j                  d   tG        d      t=        | j                  | jB                  | j>                        S | jH                  !tK        | j                  | jH                        S tM               )	Nr   float64r      )r,   xpfpz6Cannot repartition on divisions with unknown divisions)r   )'typer   r)   r   r   r1   RepartitionToFewerpdSeries	divisionsdrop_duplicatesknown_divisionsr   dtyper	   valuesastyper   lennpinterplinspacer   tolist
issubdtypeintegerr0   ndarraylistr
   RepartitionDivisionsr   RepartitionToMorer   tuple
ValueErrorr   RepartitionSizeNotImplementedError)r$   original_divisionsrA   r1   dfns         r%   _lowerzRepartition._lowerT   s   :$<<()5""TZZ%;%;;)$**dllCS6TUU$$

(>(>>zz!13JJ((2!/#$"Y ::--+IOO<'	8"&"5"5KB.y?$-$4$4$;$;I$F	%i0$-$4$4	IA "		++aK!O<;;q!Q/$!I
 //A/G/GH$+NNIIi0778J8P8PQ%	 '9'?'?L$-$4$45G5M5M$N	!)RZZ8$-$4$4$6	 $YI#%<<?IaL$&LL$4IbM !%VIcrN%; <	" NI/ItzzJJ,TZZFV9WXXT''(DJJ,@,@@zz!%%a(0 L  (

D4F4F

SS  ,"4::d>Q>QRR%''r&   c                    t        |t              rG| j                  ||      r5| j                  |j                  j                  k(  ry | j                  |      S t        |t              rt        | ||      S y r!   )r0   r   _filter_passthrough_available_name	predicate_filter_simplificationr   r   )r$   parent
dependentss      r%   _simplify_upzRepartition._simplify_up   sl    ff%$*L*LJ+
 zzV--333 ..v66fj)*4DD *r&   c                    t        | j                  d      t              r+ | j                  d      | j                  j                        S | j                  d      S r.   )r0   r)   r   r   r1   r#   s    r%   r   zRepartition.new_partitions   sQ     $,,'78(C +DLL)*4::+A+AB	
 ./	
r&   )__name__
__module____qualname____doc__r/   	_defaults_is_length_preserving_filter_passthrough	functoolscached_propertyr"   r+   propertyr1   r5   rY   ra   r   __classcell__r3   s   @r%   r   r      s    ,K 	I !   " 	# 	#  >(@
E 
 
r&   r   c                  X    e Zd ZdZddgZd Zed        Zej                  d        Z
d Zy)	r>   zReduce the partition countr   r   c                @     t         fd j                  D              S )Nc              3  P   K   | ]  }j                   j                  |     y wr!   r   rA   .0ir$   s     r%   	<genexpr>z0RepartitionToFewer._divisions.<locals>.<genexpr>   s      RTZZ))!,R   #&)rR   _partitions_boundariesr#   s   `r%   r+   zRepartitionToFewer._divisions   s    Rd6Q6QRRRr&   c                |    || z  }t        | dz         D cg c]  }t        ||z         }}t        ||      S c c}w Nr9   )rangeint_clean_new_division_boundaries)n_new_partitionsn_old_partitionsnpartitions_rationew_partition_indexnew_partitions_boundariess        r%   _compute_partition_boundariesz0RepartitionToFewer._compute_partition_boundaries   s_    ,/?? (--=-A'B%
# #&778%
! %
 .%'7
 	
	%
s   9c                x    | j                   }| j                  j                  }||kD  sJ | j                  ||      S r!   )r   r   r1   r   )r$   r1   npartitions_inputs      r%   rx   z)RepartitionToFewer._partitions_boundaries   s?    )) JJ22 ;...11+?PQQr&   c                   | j                   }t        t        ||dd              D ci c]K  \  }\  }}| j                  |ft        t        ||      D cg c]  }| j                  j                  |f c}fM c}}}}S c c}w c c}}}}w rz   )rx   	enumeratezipr\   r   r{   r   )r$   r   ru   startendjs         r%   _layerzRepartitionToFewer._layer   s    $($?$?! $--/H/LM$
 

  <E3	 ZZO05eS0AB1$**""A&B 
 	
 C
s   +B
B 4B
 B
N)rc   rd   re   rf   r/   r+   staticmethodr   rj   rk   rx   r    r&   r%   r>   r>      sK    $,-KS 
 
 R R

r&   r>   c                  H    e Zd ZdZddgZd Zej                  d        Zd Z	y)rQ   zIncrease the partition countr   r   c                8    ddt        | j                        z   z  S )Nr!   r9   )sum_nsplitsr#   s    r%   r+   zRepartitionToMore._divisions   s    !c$--0011r&   c                    | j                   }t        | j                  |j                        \  }}|g|j                  z  }|dxx   |z  cc<   t	        |      |j                  k7  rt        d|j                         |S )Nr<   znsplits should have len=)r   divmodr   r1   rG   rS   )r$   rW   divmodnsplitss        r%   r   zRepartitionToMore._nsplits   sp    ZZ$--r~~>S%"..(sw<2>>)77GHIIr&   c                @   i }| j                   }| j                  }| j                  }d| }d}t        |      D ]b  \  }}|dk(  r|j                  |f|||f<   |dz  }$t        |j                  |f|f|||f<   t        |      D ]  }	t        ||f|	f|||f<   |dz  } d |S )Nsplit-r   r9   )r   r   r\   r   r   r{   r   )
r$   dskr   rW   new_name
split_namer   ru   kjjs
             r%   r   zRepartitionToMore._layer   s    --ZZ::hZ(
g& 	DAqAv$&HHa=HaK Q&2RXXqM1%EJM"( B(/*a"'EC!$FA	 
r&   N)
rc   rd   re   rf   r/   r+   rj   rk   r   r   r   r&   r%   rQ   rQ      s4    &,-K2  r&   rQ   c                  ,    e Zd ZdZg dZddiZd Zd Zy)rP   z!Repartition to specific divisions)r   r   r   r   Fc                    | j                   S r!   )r   r#   s    r%   r+   zRepartitionDivisions._divisions   s    !!!r&   c           
        | j                   j                  d      d   }| j                  j                  }| j                  }| j                  j                   }d|z   }| j                   }| j
                  }t        |      dk  rt        d      |r0|d   |d   k  rd}t        |      |d   |d   kD  r=d}t        |      |d   |d   k7  rd	}t        |      |d   |d   k7  rd
}t        |      d }	|d   g}
t               }|d   }d\  }}d} |	|      }|t        |      k  r|t        |      k  r||   ||   k  r-t        j                  ||dz
  f|||   df|||f<   ||   }|dz  }n||   ||   kD  r-t        j                  ||dz
  f|||   df|||f<   ||   }|dz  }nPt        j                  ||dz
  f|||   df|||f<   ||   }t        |      |dz   k(  s||   ||dz      k  r|dz  }|dz  }|
j                  |       |dz  }|t        |      k  r|t        |      k  r|d   |d   k  s|d   |d   k(  rct        |t        |            D ]J  }t        |      dz
  }t        j                  ||f|||   df|||f<   ||   }|
j                  |       |dz  }L nN|r8|t        |      k  r*t        j                  ||dz
  f||   ||   df|||f<   |dz  }|
j                  |d          |||dz
  f   d d dz   |||dz
  f<   d\  }} |	|
      }|t        |      k  rVg }|
|   ||   k  r$|j                  ||f       |dz  }|
|   ||   k  r$|rs|
|   |d   k(  rh|d   |d   k7  s|t        |      dz
  k(  rL||k  rG|j                  ||f       |dz  }|r-|
|   |d   k(  r"|d   |d   k7  s|t        |      dz
  k(  r||k  rGt        |      dk(  r&t        j                  |df|d   |d   df|||dz
  f<   ngt        |      dk(  r|d   |||dz
  f<   nK|s/t        dt        |      dt        |      dt        |
            t        j                  |f|||dz
  f<   |dz  }|t        |      k  rV|S )N-r<   repartition-split-   z+New division must be longer than 2 elementsr   zHleft side of the new division must be equal or smaller than old divisionzHright side of the new division must be equal or larger than old divisionz0left side of old and new divisions are differentz1right side of old and new divisions are differentc                8    t        |       dk\  xr | d   | d   k(  S )z0Whether last division only contains single labelr   r<   )rG   )r,   s    r%   _is_single_last_divz8RepartitionDivisions._layer.<locals>._is_single_last_div!  s!    q6Q;11R5AbE>1r&   )r9   r9   r9   Fr   )T)r   r9   z$check for duplicate partitions
old:
z

new:
z

combined:
)r\   splitr   rA   r   r   rG   rS   dictr   boundary_sliceappendr{   r   concat)r$   tokenabnameout1out2r   msgr   cdlowru   r   r   	last_elem_jmtmps                       r%   r   zRepartitionDivisions._layer   sF   

  %b)JJ  zz#e+zz

q6A:JKKtad{(  !o%uqu}(  !o%tqt|H o%u"~I o%	2 qTFFd1'*	 #a&jQQZtad{ !( 6 6q1usAaDRWX4)dQ1! ' 6 6q1usAaDRWX4)dQ ' 6 6q1usAaDRWX4)dq6QU?adQq1uXoFAQHHSMFA% #a&jQQZ* R51R5=AbEQrUNAs1v&  FQJ ' 6 6q	3"uU4)eQ QQZ**1q5MaDaD 4) QHHQrUO dAE]+CR07:4Q-1'*	#a&jCA$1+

D!9%Q A$1+ aDAbEMrUae^qCFQJE 

D!9%Q aDAbEMrUae^qCFQJE
 3x1} **1IaDaD$4Q-  SQ#&q64Q- $ #1:wqz71:? 
 %,NNC#84Q- FAE #a&jF r&   N)rc   rd   re   rf   r/   rg   r+   r   r   r&   r%   rP   rP      s    +5K% I"Hr&   rP   c                       e Zd ZddgZd Zd Zy)RepartitionFreqr   freqc                   t        | j                        }	 | j                  j                  d   j	                  |      }t        j                  t        j                  || j                  j                  d   |            }t        |      s4| j                  j                  d   | j                  j                  d   g}|S |j                  | j                  j                  d          |d   | j                  j                  d   k7  r| j                  j                  d   g|z   }|S # t
        $ r | j                  j                  d   }Y w xY w)Nr   r<   )r   r   r   )r   r   r   rA   ceilrS   r   rK   r?   
date_rangerG   r   )r$   r   r   rA   s       r%   r+   zRepartitionFreq._divisions  s   (3	,JJ((+006E NNMM4::+?+?+C$O
	 9~--a0$**2F2Fr2JKI
  TZZ11"56|tzz33A66!ZZ11!45	A	  	,JJ((+E	,s   (D& &"EEc                    t        | j                  j                  d   t        j                        st        d      t        | j                  | j                               S )Nr   z0Can only repartition on frequency for timeseries)r0   r   rA   r?   	Timestamp	TypeErrorrP   r+   r#   s    r%   rY   zRepartitionFreq._lower  sC    $**..q12<<@NOO#DJJ0ABBr&   N)rc   rd   re   r/   r+   rY   r   r&   r%   r   r     s    F#K$Cr&   r   c                       e Zd Zej                  d        Zej                  d        Zej                  d        Zej                  d        Zd Z	 fdZ
ddZ xZS )	rT   c                p    | j                  d      }t        |t              rt        |      }t	        |      S )Nr   )r)   r0   strr   r|   )r$   sizes     r%   _sizezRepartitionSize._size  s/    ||,-dC t$D4yr&   c                ,    t        | j                        S r!   )_get_mem_usagesr   r#   s    r%   
_mem_usagezRepartitionSize._mem_usage  s    tzz**r&   c                :    d| j                   | j                  z  z   S rz   )r   r   r#   s    r%   r   zRepartitionSize._nsplits  s    4??djj000r&   c           	        | j                   }| j                  }t        j                  |dkD        rCg }t	        ||      D ]  \  }}|j                  ||z  g|z          t        j                  |      }t        j                  || j                  k        sJ t        t        t        t        || j                                    }t        j                  |      }t        || j                   j"                        S rz   )r   r   rH   anyr   extendr?   r@   allr   rO   maprG   r   cumsumr}   r   r1   )r$   r   
mem_usagessplit_mem_usagesrX   usagenew_npartitionsr   s           r%   _partition_boundariesz%RepartitionSize._partition_boundaries  s    --__
66'A+!4 95 ''a89#34JvvjDJJ.///s3J

(KLM$&IIo$>!-%tzz'='=
 	
r&   c                     t        j                   j                  dkD        rdt         j                        z  S  fd j                  D        S )Nr9   r!   c              3  P   K   | ]  }j                   j                  |     y wr!   rr   rs   s     r%   rv   z-RepartitionSize._divisions.<locals>.<genexpr>  s      LA

$$Q'Lrw   )rH   r   r   rG   r   r#   s   `r%   r+   zRepartitionSize._divisions  s?    66$--!#$S!;!;<<<L1K1KLLr&   c                8    | j                    t        | 	         S r!   )r   r2   rY   )r$   r3   s    r%   rY   zRepartitionSize._lower  s    w~r&   c                &   | j                   }i }t        j                  | j                  dkD        rdt	        || j                         }d| j
                   dt	        |       }d}t        | j                        D ]b  \  }}|dk(  r|j                  |f|||f<   |dz  }$t        |j                  |f|f|||f<   t        |      D ]  }t        ||f|f|||f<   |dz  } d n| j                   j                  }|j                  t        t        | j                  | j                  dd              D 	
ci c]A  \  }\  }	}
| j                  |ft        j                  t        |	|
      D cg c]  }||f c}fC c}}
}	}       |S c c}w c c}}
}	}w )Nr9   r   r   r   r   )r   rH   r   r   r   r   r   r\   r   r{   r   updater   r   r   r   )r$   rW   r   r   r   r   ru   r   r   r   r   s              r%   r   zRepartitionSize._layer  s   ZZ "66$--!#$!(2t}}"=!>?J+DJJ<q"GHA!$--0 16(*!}C!$FA*61q)IC
A&#Ah ,3j!_b+IHaK(Q zz''H

 (122D4N4Nqr4RS( 
 $A|s	 QNN,1%,=>qh]>" 
	
 
 ?s   45F)F4FF)returnr   )rc   rd   re   rj   rk   r   r   r   r   r+   rY   r   rm   rn   s   @r%   rT   rT     sy      + + 1 1 
 
"M
 
r&   rT   c                    t        | t              st        |       } | d   dkD  r| j                  dd       | d   |k  r|| d<   | S )Nr   r<   )r0   rO   insert)r   frame_npartitionss     r%   r}   r}     sU    /6$()B$C! #a'!((A. $'88(9!"%$$r&   
   c                    | j                   t        v rt        | j                      S t        |       }|t        | j                   <   |S r!   )r\   mem_usages_lru_compute_mem_usages)r   results     r%   r   r      s:    {{n$ekk** 'F"(N5;;Mr&   c                P    ddl m}  |t        | d            j                         S )Nr   )new_collectionT)deep)$dask.dataframe.dask_expr._collectionr   r   compute)r   r   s     r%   r   r     s!    C/DABJJLLr&   )4
__future__r   rj   collections.abcr   operatorr   pprintr   typingr   numpyrH   pandasr?   pandas.api.typesr   r	   tlzr
   dask.dataframer   dask.dataframe.corer   r   r   dask.dataframe.dask_expr._exprr   r   r   r   $dask.dataframe.dask_expr._reductionsr   dask.dataframe.dask_expr._utilr   dask.dataframe.utilsr   dask.tokenizer   
dask.utilsr   r   r   r>   rQ   rP   r   rT   r}   r   r   r   r   r&   r%   <module>r      s    "  $      F  " P P  G . / " /I
$ I
X$
 $
N" "JQ; QhCk C6Lk L^% RMr&   