
    bi.7                    \   d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZmZmZmZmZmZmZ d d	lmZmZm Z  d d
l!m"Z" eZ#eZ$d7dZ%d7dZ& e"dd      d        Z'd7dZ(d8dZ)d Z*d Z+d Z,d Z-d Z.d Z/d Z0	 	 	 	 d9dZ1d Z2d Z3d Z4d Z5d Z6d Z7d Z8d7d Z9d:d!Z:	 d;d"Z;d# Z<d$ Z=d% Z>d& Z?d' Z@d<d(ZAd) ZBd* ZCd+ ZDd, ZEd- ZFd. ZGd/ ZHd0 ZId1 ZJd2 ZKd3 ZL eeJd45      ZM eeJd65      ZN eeKd45      ZO eeKd65      ZP eeLd45      ZQ eeLd65      ZRy)=    )annotationsN)partial)is_extension_array_dtype)PerformanceWarning)	partition)!check_apply_dataframe_deprecationcheck_convert_dtype_deprecationcheck_observed_deprecation)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_like)_deprecated_kwargc                F    || j                   |   S | j                   ||f   S )z"
    .loc for known divisions
    )locdfiindexercindexers      Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/methods.pyr   r   *   s-     vvhvvh())    c                (    | j                   d d |f   S N)iloc)r   r   s     r   r"   r"   4   s    771h;r   convert_dtypec                    t               5  t               5   | j                  |i |cd d d        cd d d        S # 1 sw Y   nxY w	 d d d        y # 1 sw Y   y xY wr!   )r	   r   apply)r   argskwargss      r   r%   r%   8   s_    	(	* -.0 	-288T,V,	- 	-- -	- 	- 	-- - -s   A;	AA	 AAc                    	 t        | ||      S # t        $ r% | j                  d      j                   dd|f   cY S w xY w)z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   s      r   try_locr+   ?   s@    +2x** +wwqz~~ak**+s    +==c                   t        | j                        dk(  r| S | j                  j                  sU|'|r| | j                  |k\     } n| | j                  |kD     } |(|r| | j                  |k     } | S | | j                  |k     } | S | j                  || }|s-|+|j                  j	                  |d      }|j
                  d| }|s-|+|j                  j	                  |d      }|j
                  |d }|S )aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   Nleftright)lenindexis_monotonic_increasingr   get_slice_boundr"   )r   startstopright_boundaryleft_boundaryresultright_index
left_indexs           r   boundary_slicer:   I   s   @ 288}	88++
 E)*5()D() 	 4(	VVE$Fd.ll224@\k*U.\\11%A
Z[)Mr   c                H    t        j                  |       j                         S r!   )pdnotnullsumxs    r   index_countrA      s    ::a=r   c                    	 t        j                  d      5  t        j                  d       | |z  cd d d        S # 1 sw Y   y xY w# t        $ r& t	        j
                  t        j                        cY S w xY w)NT)recordalways)warningscatch_warningssimplefilterZeroDivisionErrornpfloat64nan)sns     r   mean_aggregaterN      sb    "$$D1 	!!(+q5	 	 	  "zz"&&!!"s+   A <	A AA A ,A76A7c                t    t        | t        j                  t        f      rt	        j
                  | |      S | S Nr0   
isinstancerI   ndarraylistr<   Series)	array_varr0   s     r   wrap_var_reductionrX      s,    )bjj$/0yy%00r   c                t    t        | t        j                  t        f      rt	        j
                  | |      S | S rP   rR   )
array_skewr0   s     r   wrap_skew_reductionr[      s,    *rzz401yy511r   c                t    t        | t        j                  t        f      rt	        j
                  | |      S | S rP   rR   )array_kurtosisr0   s     r   wrap_kurtosis_reductionr^      s,    .2::t"45yyu55r   c                T    t        j                  | |g      }|j                  |      S rP   )r<   r   reindex)numeric_vartimedelta_varcolumnsvarss       r   var_mixed_concatre      s&    99k=12D<<g<&&r   c                    t        |       dkD  sJ g }t        d | D        t               }|D ]  }|D ]  }||vs|j                  |        ! t        j                  | dd      j                  |      S )Nr   c              3  4   K   | ]  }|j                     y wr!   rQ   ).0r@   s     r   	<genexpr>z%describe_aggregate.<locals>.<genexpr>   s     5QWW5s   )key   F)axissort)r/   sortedappendr<   r   r`   )valuesnamesvalues_indexesidxnamesnames        r   describe_aggregateru      s}    v;?? E5f53?N" # 	#D5 T"	##
 99V!%088??r   c                   xs dt        |       dk(  sJ | \  }}}}}	}
t        |      rt        |j                               }nt        |	      }|rt	        j
                  |      j                        }t	        j
                  |      j                        }t	        j
                  |      j                        }t	        j
                  |
      j                        }
|	j                  fd      }	|r`t	        j                  |      j                        }t	        j                  |
      j                        }
|	j                  fd      }	|r |||gddg      }n |||||gg d	      }t        |	j                        D cg c]  }|d
z  dd c}|	_
        t        |	      r|t        |	      k7  r|	j                         }	 ||
gdg      }t        ||	|gd      }t        |      r||_        |S c c}w )Nns   unitc                P    t        j                  |       j                        S Nry   )r<   to_timedeltaas_unitr@   rz   s    r   <lambda>z,describe_numeric_aggregate.<locals>.<lambda>   s    booad;CCDI r   c                P    t        j                  |       j                        S r|   )r<   to_datetimer~   r   s    r   r   z,describe_numeric_aggregate.<locals>.<lambda>   s    bnnQT:BB4H r   countminrQ   )r   meanstdr   d   g%maxF)rm   )r/   r   typeto_framer<   r}   r~   r%   r   r   r0   r   rt   )statsrt   is_timedelta_colis_datetime_colrz   r   r   r   r   qr   typpart1lpart3r7   s       `           r   describe_numeric_aggregater      s    <4Du:??$)!E4c1ce5>>#$1gt$/77=ooc-55d;ooc-55d;ooc-55d;GGIJnnSt,44T:nnSt,44T:GGHIUCL%(89UD#s+3RS(.qww81!c'!A8AGaSDG^JJLeW%EUAu%E2FfM 9s   H
c                H   t        |       }|dk(  }|dk(  }|s|sJ |r| \  }}}n| \  }}}}}	t        |      dk(  rmddg}
ddg}d }|
j                  t        j                  t        j                  g       |j                  ddg       t        }t        j                  |
|||      }|S |j                  d   }|j                  d   }g d	}||g}|r|j                  }t        j                  |      }|j                  ||j                  |      }n|j                  |      }t        j                  |
      }t        j                  	|
      }|j                  ddg       |j                  ||||g       n|j                  ||g       t        j                  |||      S )N      r   r   uniquetopfreq)r0   dtypert   )r   r   r   r   )tzfirstlast)r0   rt   )r/   extendrI   rK   objectr<   rV   r0   r"   r   	Timestamptzinfo
tz_converttz_localize)r   rt   args_lenis_datetime_columnis_categorical_columnnuniquer   top_freqmin_tsmax_tsdatar0   r   r7   r   r   rp   r   r   r   s                       r   describe_nonnumeric_aggregater      s   5zH!Q$M!666#( 380&& 8}1v(#RVVRVV$%eV_%4uEE
..
C==D.EuFVVll3::!bn..$C//"%CV+||Fr*gv&'sD%./sDk"99V5t44r   c                    ||S  | ||      S )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
     )	aggregater@   ys      r   _cum_aggregate_applyr     s     	yAr   c                    | |S || S | |z   S r!   r   r@   r   s     r   cumsum_aggregater   /      y	
1ur   c                    | |S || S | |z  S r!   r   r   s     r   cumprod_aggregater   8  r   r   c                    t        |       st        |       r5| j                  | |k  | j                         z  || j                  dz
        S | |k  r| S |S Nrk   rl   r   r   whereisnullndimr   s     r   cummin_aggregater   A  O    a-a0wwA+QQVVaZw@@Eq q r   c                    t        |       st        |       r5| j                  | |kD  | j                         z  || j                  dz
        S | |kD  r| S |S r   r   r   s     r   cummax_aggregater   H  r   r   c                   t        t        d|            }| j                  d      } t        j                         5  t        j
                  ddt               |j                         D ]
  \  }}|| |<    	 d d d        | S # 1 sw Y   | S xY w)N   F)deepignorez DataFrame is highly fragmented *)messagecategory)dictr   copyrE   rF   filterwarningsr   items)r   pairsrt   vals       r   assignr   O  s     1e$%E	e	B		 	 	" 6'	

  	ID#BtH	 I Is   :BBc                    | j                         }t        |      s"t        |      st        j                  ||      }|S )N)rt   )r   r   r   r<   rV   )r@   series_nameouts      r   r   r   `  s3    
((*C 3=#5ii+.Jr   c                    t               5   | j                  dddi|j                         cd d d        S # 1 sw Y   y xY w)Nlevelr   r   )r
   groupbyr>   )r@   rm   	ascendinggroupby_kwargss       r   value_counts_combiner   i  s<    	#	% :qyy3q3N3779: : :s	   "7A c                    t        | fi |}|r|||n|j                         z  }|r|j                  |      }|rd|_        |S )N)r   
proportion)r   r>   sort_valuesrt   )r@   total_lengthrm   r   	normalizer   r   s          r   value_counts_aggregater   o  sO     q
3N
3C|7|SWWYFoo	o2Jr   c                    | j                   S r!   )nbytesr?   s    r   r   r   |  s    88Or   c                    | j                   S r!   )sizer?   s    r   r   r     s    66Mr   c                ^    | j                   }t        |      r|j                  t              }|S r!   )rp   r   astyper   )r   rp   s     r   rp   rp     s(    YYF  'v&Mr   c                    t         j                  j                  |      }t        |       dkD  r| j	                  |||      S | S )Nr   )random_statefracreplace)rI   randomRandomStater/   sample)r   stater   r   rss        r   r   r     s;    			u	%BEHWq[299"49AXVXXr   c                l    | j                  |d      } | j                  j                  |      | _        | S r   )droprc   r   )r   rc   r   s      r   drop_columnsr     s/    	q	!B""5)BJIr   c                    |r t        | |             }n| j                         }|rC|j                         j                  j	                  d      j                         rt        d      |S )Nr   r   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)getattrfillnar   rp   allany
ValueError)r   methodcheckr   s       r   fillna_checkr     sb    !gb&!#iik$$((a(04464
 	

 Jr   c                D    | j                  dd      j                         S Nr   F)r   observed)r   r>   r   s    r   	pivot_aggr     s    ::A:.2244r   c                D    | j                  dd      j                         S r   )r   r   r   s    r   pivot_agg_firstr    s    ::A:.4466r   c                D    | j                  dd      j                         S r   )r   r   r   s    r   pivot_agg_lastr    s    ::A:.3355r   c           	     :    t        j                  | |||ddd      S )Nr>   Fr0   rc   rp   aggfuncdropnar   r<   pivot_tabler   r0   rc   rp   s       r   	pivot_sumr    s(    >>
 r   c           	     t    t        j                  | |||ddd      j                  t        j                        S )Nr   Fr  )r<   r
  r   rI   rJ   r  s       r   pivot_countr    s:     >>
 fRZZr   c           	     :    t        j                  | |||ddd      S )Nr   Fr  r	  r  s       r   pivot_firstr    s(    >>
 r   c           	     :    t        j                  | |||ddd      S )Nr   Fr  r	  r  s       r   
pivot_lastr    s(    >>
 r   c                4    | j                         } || _        | S r!   )r   r0   )r   inds     r   assign_indexr    s    	BBHIr   c                    | j                   rd }n/t        |       r| n| j                  }t        | |      |d   |d   gg}t	        j
                  |g d      S )Nr   	monotonicr   r   )r   rc   )emptyr   r"   r   r<   	DataFrame)r@   propr   s      r   _monotonic_chunkr    sP    ww!!$q!&&D!47DH56<<T+IJJr   c                @   | j                   rd }nwt        j                  | ddg   j                         j	                               }| d   j                         xr t        ||      }||j                  d   |j                  d   gg}t        j                  |g d      S )Nr   r   r  r   r  r  )rc   )	r  r<   rV   to_numpyravelr   r   r"   r  )concatenatedr  r   rL   is_monotonics        r   _monotonic_combiner#    s    IIlGV#45>>@FFHI#K0446K71d;Kqvvay!&&*56<<&DEEr   c                    t        j                  | ddg   j                         j                               }| d   j	                         xr t        ||      S )Nr   r   r  )r<   rV   r  r   r   r   )r!  r  rL   s      r   _monotonic_aggregater%    sL    
		,01::<BBDEA$((*?wq$/??r   r1   )r  is_monotonic_decreasingr!   )TT)NFFrw   )TF)NTFF)T)S
__future__r   rE   	functoolsr   numpyrI   pandasr<   pandas.api.typesr   pandas.errorsr   tlzr   dask.dataframe._compatr   r	   r
   dask.dataframe.dispatchr   r   r   r   r   r   r   r   r   dask.dataframe.utilsr   r   r   
dask.utilsr   hash_dfgroup_splitr   r"   r%   r+   r:   rA   rN   rX   r[   r^   re   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   rp   r   r   r   r   r  r  r  r  r  r  r  r  r#  r%  monotonic_increasing_chunkmonotonic_decreasing_chunkmonotonic_increasing_combinemonotonic_decreasing_combinemonotonic_increasing_aggregatemonotonic_decreasing_aggregater   r   r   <module>r:     s   "     5 ,  
 
 
 R Q ( "*  ?D)- *-+;|
"'@  
	,^.5b"!!": AF
Y
&576			KF@
 %%5<UV $%5<UV &6    '6   ")8"  ")8" r   