
    bi                   |   d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dl	Z
d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZmZmZmZ d d
lmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZ d dl[m\Z\m]Z] d dl^m_Z_ d dl`maZambZbmcZc d Zdd Ze G d d      Zf G d d      Zg G d de.eg      Zh G d de-eg      Zi G d deieg      Zj G d d eieg      Zk G d! d"ek      Zl G d# d$ek      Zm G d% d&ek      Zn G d' d(ej      Zo G d) d*ej      Zp G d+ d,ej      Zq G d- d.ej      Zr G d/ d0ej      Zs G d1 d2ej      Zt G d3 d4ej      Zu G d5 d6ej      Zv G d7 d8ej      Zw G d9 d:ew      Zx G d; d<ej      Zy G d= d>ej      Zz G d? d@ej      Z{ G dA dBe{      Z| G dC dDe/eg      Z}ddEZ~ G dF dGe}      Z G dH dIe      ZdddJdKZddLZddMZ G dN dOe}      ZdP ZddQZ G dR dSej      Z G dT dUej      Z G dV dWe      Z G dX dYe!eg      Z G dZ d[e      Zd\ Z G d] d^e      Z G d_ d`e      Z G da dbe      Z G dc dde      ZdddedfZ G dg dhe eg      Z	 	 	 	 	 	 	 ddiZ G dj dke eg      Zdl Ze;ddfdmZe;ddfdnZe;ddfdoZdp Zdq Zdr Z G ds dte!eg      Z G du dve!eg      Z G dw dxe      Z G dy dze      Z G d{ d|e      Z G d} d~      Z G d de      Zy)    )annotationsN)Callable)reconstruct_funcvalidate_func_kwargs)is_dask_collection)Task)flatten)_concatapply_and_enforceis_dataframe_likeis_series_like)	FrameBaseIndexSeriesnew_collection)Assign	BlockwiseExprMapPartitions
ProjectionRenameFrameRenameSeriesToFrame	_DeepCopy_extract_metaare_co_aligneddetermine_column_projection
no_default)ApplyConcatApplyChunk	Reduction)RearrangeByColumn)PANDAS_GE_300_convert_to_listget_specified_shuffle)concat	make_metameta_nonempty) GROUP_KEYS_DEFAULT_agg_finalize_aggregate_docstring_apply_chunk_build_agg_args_cov_agg
_cov_chunk_cum_agg_aligned_cum_agg_filled_cumcount_aggregate_determine_levels_groupby_aggregate_groupby_aggregate_spec_groupby_apply_funcs_groupby_get_group_groupby_raise_unaligned_groupby_slice_apply_groupby_slice_shift_groupby_slice_transform_head_aggregate_head_chunk_non_agg_chunk_normalize_spec_nunique_df_chunk_nunique_df_combine_tail_aggregate_tail_chunk_unique_aggregate_value_counts_value_counts_aggregate_var_agg
_var_chunk)insert_meta_param_description	is_scalar)Key)Mderived_fromis_index_likec                    |i S | |iS N )keyvalues     \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/_groupby.py_as_dictrU   Y   s     20S%L0    c                    t        |      dk(  rt        j                  | dz        S t        j                  | dt        |      dz
  z  z        S )N      
   )lenmathceil)npartitionsbys     rT    _adjust_split_out_for_group_keysr`   _   sB    
2w!|yyr)**99[B#b'A+$6788rV   c                      e Zd ZdZddZy)Aggregationa  User defined groupby-aggregation.

    This class allows users to define their own custom aggregation in terms of
    operations on Pandas dataframes in a map-reduce style. You need to specify
    what operation to do on each chunk of data, how to combine those chunks of
    data together, and then how to finalize the result.

    See :ref:`dataframe.groupby.aggregate` for more.

    Parameters
    ----------
    name : str
        the name of the aggregation. It should be unique, since intermediate
        result will be identified by this name.
    chunk : callable
        a function that will be called with the grouped column of each
        partition, takes a Pandas SeriesGroupBy in input.
        It can either return a single series or a tuple of series.
        The index has to be equal to the groups.
    agg : callable
        a function that will be called to aggregate the results of each chunk.
        Again the argument(s) will be a Pandas SeriesGroupBy. If ``chunk``
        returned a tuple, ``agg`` will be called with all of them as
        individual positional arguments.
    finalize : callable
        an optional finalizer that will be called with the results from the
        aggregation.

    Examples
    --------
    We could implement ``sum`` as follows:

    >>> custom_sum = dd.Aggregation(
    ...     name='custom_sum',
    ...     chunk=lambda s: s.sum(),
    ...     agg=lambda s0: s0.sum()
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_sum)  # doctest: +SKIP

    We can implement ``mean`` as follows:

    >>> custom_mean = dd.Aggregation(
    ...     name='custom_mean',
    ...     chunk=lambda s: (s.count(), s.sum()),
    ...     agg=lambda count, sum: (count.sum(), sum.sum()),
    ...     finalize=lambda count, sum: sum / count,
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_mean)  # doctest: +SKIP

    Though of course, both of these are built-in and so you don't need to
    implement them yourself.
    Nc                <    || _         || _        || _        || _        y rP   )chunkaggfinalize__name__)selfnamerd   re   rf   s        rT   __init__zAggregation.__init__   s    
 rV   rP   )rg   
__module____qualname____doc__rj   rQ   rV   rT   rb   rb   e   s    3jrV   rb   c                      e Zd Zej                  d        Zej                  d        Zed        Zej                  d        Z	ej                  d        Z
ed        Zy)GroupByBasec                    | j                   D cg c])  }t        |t              rt        |j                        n|+ c}S c c}w rP   )r_   
isinstancer   r(   _metarh   xs     rT   _by_metazGroupByBase._by_meta   s1    NRggV*Q*=agg&1DVVVs   .A c                b    | j                   D cg c]  }t        |t              r| c}S c c}w rP   )r_   rq   r   rs   s     rT   _by_columnszGroupByBase._by_columns   s#    77>a*Q*=>>>s   ,,c                    t        t        | j                  D cg c]!  }t        |t              s|gn|j
                  # c}t                     S c c}w )N)	container)listr	   r_   rq   r   columnsrs   s     rT   split_byzGroupByBase.split_by   sC    HLP1Jq$/!QYY>P
 	
Ps   &Ac                F    | j                   t        | j                        d  S rP   )operandsr[   _parametersrh   s    rT   r_   zGroupByBase.by   s    }}S!1!12455rV   c                ,    t        | j                        S rP   )r3   r_   r   s    rT   levelszGroupByBase.levels   s     ))rV   c                     yNTrQ   r   s    rT   shuffle_by_indexzGroupByBase.shuffle_by_index       rV   N)rg   rk   rl   	functoolscached_propertyru   rw   propertyr|   r_   r   r   rQ   rV   rT   ro   ro      s    W W ? ? 
 
 6 6 * *  rV   ro   c                  Z    e Zd ZdZej
                  dd       Zej
                  d        Zy)GroupByChunkTc                6    | j                   g| j                  z   S rP   framer_   r   s    rT   _argszGroupByChunk._args       

|dgg%%rV   c                    | j                   D cg c])  }t        |t              rt        |j                        n|+ }}t         | j                  |i | j                        S c c}w rP   )r   rq   r   r(   rr   r'   	operation_kwargs)rh   opargss      rT   rr   zGroupByChunk._meta   sa     OSjj
HJz"d';M"((#C
 
 >>??
s   .A$Nreturnrz   )rg   rk   rl   #_preserves_partitioning_informationr   r   r   rr   rQ   rV   rT   r   r      s<    *.'& & @ @rV   r   c                  v     e Zd ZeZej                  d        Zed        Z	e fd       Z
ed        Zd Z xZS )GroupByApplyConcatApplyc                    t        | j                  j                        } | j                  |g| j                  i | j
                  S rP   r(   r   rr   rd   ru   chunk_kwargsrh   metas     rT   _meta_chunkz#GroupByApplyConcatApply._meta_chunk   ;    TZZ--.tzz$DD$2C2CDDrV   c                    | j                   S rP   r_   r   s    rT   _chunk_cls_argsz'GroupByApplyConcatApply._chunk_cls_args       wwrV   c                <    | j                  d      yt        |   S N	split_outrX   operandsuperr   rh   	__class__s    rT   r   z!GroupByApplyConcatApply.split_out        <<$,w  rV   c                .    | j                   j                  S rP   )r   r{   r   s    rT   _projection_columnsz+GroupByApplyConcatApply._projection_columns   s    zz!!!rV   c                    | j                  d      6| j                  dt        j                  t        | j
                        i      S y Nr   r   r   substitute_parametersr   partialr`   r_   r   s    rT   
_tune_downz"GroupByApplyConcatApply._tune_down   G    <<$,--!2!28TWW"  -rV   )rg   rk   rl   r   
_chunk_clsr   r   r   r   r   r   r   r   __classcell__r   s   @rT   r   r      sc    JE E   ! !
 " "rV   r   c            
          e Zd ZU dZg dZdddddddddd	ZdZded<   dZded<   e	d	        Z
e	d
        Zedd       Zedd       Zd Zy)SingleAggregationa  Single groupby aggregation

    This is an abstract class. Sub-classes must implement
    the following methods:

    -   `groupby_chunk`: Applied to each group within
        the `chunk` method of `GroupByApplyConcatApply`
    -   `groupby_aggregate`: Applied to each group within
        the `aggregate` method of `GroupByApplyConcatApply`

    Parameters
    ----------
    frame: Expr
        Dataframe- or series-like expression to group.
    by: str, list or Series
        The key for grouping
    observed:
        Passed through to dataframe backend.
    dropna:
        Whether rows with NA values should be dropped.
    chunk_kwargs:
        Key-word arguments to pass to `groupby_chunk`.
    aggregate_kwargs:
        Key-word arguments to pass to `aggregate_chunk`.
    )
r   observeddropnar   aggregate_kwargs_slicesplit_everyr   sortshuffle_methodN   )	r   r   r   r   r   r   r   r   r   Callable | Nonegroupby_chunkgroupby_aggregatec                     t        |g|i |S rP   )r,   )clsdfr_   kwargss       rT   rd   zSingleAggregation.chunk,  s    B..v..rV   c                ,    t        t        |      fi |S rP   )r4   r
   r   inputsr   s      rT   	aggregatezSingleAggregation.aggregate0  s    !'&/<V<<rV   c                    | j                  d      xs i }| j                  }| j                  |dt        d| j                        t        d| j
                        |S )Nr   rd   r{   r   r   )r   r   r   rU   r   r   )rh   r   r{   s      rT   r   zSingleAggregation.chunk_kwargs4  sh    ||N39r++''
 z4==1
 x-	

 
 	
rV   c                    | j                  d      xs i }| j                  xs | j                  }|| j                  | j                  dt        d| j                        t        d| j                        |S )Nr   )aggfuncr   r   r   r   )r   r   r   r   r   rU   r   r   )rh   r   r   s      rT   r   z"SingleAggregation.aggregate_kwargs@  s}    <<(:;Ar 22Hd6H6H(kkII
 z4==1	

 x-
 
 	
rV   c                    t        | ||      S rP   groupby_projectionrh   parent
dependentss      rT   _simplify_upzSingleAggregation._simplify_upM      !$
;;rV   r   dict)rg   rk   rl   rm   r   	_defaultsr   __annotations__r   classmethodrd   r   r   r   r   r   rQ   rV   rT   r   r      s    4K  
I &*M?))--/ / = = 	
 	
 

 

<rV   r   c                  z    e Zd ZdZg dZddddddddZej                  d        Zej                  d        Z	d Z
y)	GroupbyAggregationBaseaW  Base class for groupby aggregation

    This class can be subclassed to perform a general
    groupby aggregation by passing in a `str`, `list` or
    `dict`-based specification using the `arg` operand.

    Parameters
    ----------
    frame: Expr
        Dataframe- or series-like expression to group.
    by: str, list or Series
        The key for grouping
    arg: str, list or dict
        Aggregation spec defining the specific aggregations
        to perform.
    observed:
        Passed through to dataframe backend.
    dropna:
        Whether rows with NA values should be dropped.
    )	r   argr   r   r   r   r   r   r   Nr   )r   r   r   r   r   r   r   c           
     "   t        | j                  j                        rs| j                  }| j                  r| j                  }t        |      r,|g}n(| j                  j                  D cg c]	  }||vs| }}t        | j                  |      }|S t        | j                  j                        rt        | j                  t        t        t        f      r9t        d | j                  ig       }|D cg c]  \  \  }}}}|||f }}}}}|S t        d | j                  ig       }|D cg c]"  \  }}}| j                  j                  d   ||f$ }}}}|S t        d| j                  j                         c c}w c c}}}}w c c}}}w )Nr   zaggregate on unknown object )r   r   rr   rw   r   rJ   r{   r?   r   r   rq   rz   tupler   
ValueError)	rh   group_columnsnon_group_columnscolspec_result_columnfuncinput_columns	            rT   r   zGroupbyAggregationBase.spec|  s    TZZ--. ,,M{{$(KK!./):(;% $(::#5#5%M9QC%! % #488->?D$ # DJJ,,-$((T5$$78&dhh'7< EI @+!]T< #D,7   'dhh'7< 48 /D, ZZ''*D,?   ;DJJ<L<L;MNOO'%s   +	E=5E=6F
.'F
c                Z    g d}t        t        |t        | j                                    S )N)chunk_funcsaggregate_funcs
finalizers)r   zipr-   r   )rh   keyss     rT   agg_argszGroupbyAggregationBase.agg_args  s"    ?Codii89::rV   c                   t        | j                  t              sy t        | j                        j                  | j                  j                               j                  | j                  j                        }| j                  j                  D cg c]	  }||v s| }}|| j                  j                  k7  r, t        |       | j                  |   g| j                  dd   S y c c}w )NrX   )rq   r   r   setrw   unionr   intersectionr   r{   typer~   )rh   required_columnscolumncolumn_projections       rT   _simplify_downz%GroupbyAggregationBase._simplify_down  s    $((D)   !U488==?#\$**,,- 	 "&!3!3
vAQ7QF
 
 

 2 224:djj):;PdmmAB>OPP 3
s   	C)C))rg   rk   rl   rm   r   r   r   r   r   r   r   rQ   rV   rT   r   r   Q  sg    *
K I  B ; ;QrV   r   c                  ^    e Zd ZdZej
                  d        Zej
                  d        Zd Zy)GroupbyAggregationzLogical groupby aggregation class

    This class lowers itself to concrete implementations for decomposable
    or holistic aggregations.
    c                6    | j                         j                  S rP   )_lowerrr   r   s    rT   rr   zGroupbyAggregation._meta  s    {{}"""rV   c                <    t        d | j                  D               S )Nc              3  J   K   | ]  }|d    dt         j                  fv   yw)rX   medianN)npr  ).0ss     rT   	<genexpr>z6GroupbyAggregation._is_decomposable.<locals>.<genexpr>  s"     Hqt"))44Hs   !#)anyr   r   s    rT   _is_decomposablez#GroupbyAggregation._is_decomposable  s    HdiiHHHHrV   c                   | j                   rt        nt        } || j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  | j                  g	| j                   S rP   )r  DecomposableGroupbyAggregationHolisticGroupbyAggregationr   r   r   r   r   r   r   r   r   r_   )rh   r   s     rT   r  zGroupbyAggregation._lower  sz     $$ ++ 	
 JJHHMMKKNNIIKK
 WW
 	
rV   N)	rg   rk   rl   rm   r   r   rr   r  r  rQ   rV   rT   r   r     sB     # # I I
rV   r   c                      e Zd ZdZej
                  d        Z ee      Z	e
d        Zed        Ze
dd       Ze
dd       Zy)	r  a  Groupby aggregation for both decomposable and non-decomposable aggregates

    This class always calculates the aggregates by first collecting all the data for
    the groups and then aggregating at once.

    We are always shuffling, so we will never call combine
    c                ~    | j                   }| j                  xs d }| j                  } ||gfi |}t        |      S )Nc                    | S rP   rQ   )rt   s    rT   <lambda>z2HolisticGroupbyAggregation._meta.<locals>.<lambda>  s     rV   )r   r   r   r'   )rh   r   r   r   s       rT   rr   z HolisticGroupbyAggregation._meta  sC    NN3{	00$4#34rV   c                     yr   rQ   r   s    rT   should_shufflez)HolisticGroupbyAggregation.should_shuffle  r   rV   c                ,    t        t        |      fi |S rP   )r5   r
   r   s      rT   r   z$HolisticGroupbyAggregation.aggregate  s    &wvA&AArV   c                    | j                   | j                  j                  D cg c]  }|| j                   vs| c}dt        d| j                        t        d| j
                        S c c}w )N)r_   rR   r   r   )rw   r   r{   rU   r   r   )rh   r   s     rT   r   z'HolisticGroupbyAggregation.chunk_kwargs  si     ""#'::#5#5UCDDTDT9TCU
 z4==1
 x-	
 	
Us
   A-A-c                    | j                   t        | j                        dt        d| j                        t        d| j
                        S )N)r   r   r   r   )r   r3   r_   rU   r   r   r   s    rT   r   z+HolisticGroupbyAggregation.aggregate_kwargs  sK     HH'0
 z4==1
 x-	
 	
rV   Nr   )rg   rk   rl   rm   r   r   rr   staticmethodr>   rd   r   r  r   r   r   r   rQ   rV   rT   r  r    s}       (E  B B 
 
 
 
rV   r  c                  v    e Zd ZdZ ee      Zed        Zed        Z	e
dd       Ze
dd       Ze
dd       Zy)	r  zvGroupby aggregation for decomposable aggregates

    The results may be calculated via tree or shuffle reduction.
    c                ,    t        t        |      fi |S rP   )r6   r
   r   s      rT   combinez&DecomposableGroupbyAggregation.combine  s    #GFO>v>>rV   c                ,    t        t        |      fi |S rP   )r*   r
   r   s      rT   r   z(DecomposableGroupbyAggregation.aggregate  s    WV_777rV   c                    | j                   d   | j                  dt        d| j                        t        d| j                        S )Nr   )funcsr   r   r   )r   r   rU   r   r   r   s    rT   r   z+DecomposableGroupbyAggregation.chunk_kwargs  sK     ]]=1II
 z4==1
 x-	
 	
rV   c                    | j                   d   | j                  | j                  dt        d| j                        t        d| j
                        S )Nr   )r  levelr   r   r   )r   r   r   rU   r   r   r   s    rT   combine_kwargsz-DecomposableGroupbyAggregation.combine_kwargs  sS     ]]#45[[II
 z4==1	

 x-
 	
rV   c                4   | j                   d   | j                  | j                  | j                   d   | j                  j                  j
                  dk(  | j                  | j                  dt        d| j                        t        d| j                        S )Nr   r   rX   )r   r   r{   finalize_funcs	is_seriesr   r   r   r   )r   r   r   r   rr   ndimr   r   rU   r   r   r   s    rT   r   z/DecomposableGroupbyAggregation.aggregate_kwargs(  s      $}}->?88{{"mmL9))..!3[[II

 z4==1

 x-

 
	
rV   Nr   )rg   rk   rl   rm   r  r6   rd   r   r  r   r   r   r!  r   rQ   rV   rT   r  r    sv    
 -.E? ? 8 8 
 
 
 
 
 
rV   r  c                  $    e Zd Zej                  Zy)SumN)rg   rk   rl   rL   sumr   rQ   rV   rT   r'  r'  7      EEMrV   r'  c                  $    e Zd Zej                  Zy)ProdN)rg   rk   rl   rL   prodr   rQ   rV   rT   r+  r+  ;      FFMrV   r+  c                  $    e Zd Zej                  Zy)MinN)rg   rk   rl   rL   minr   rQ   rV   rT   r/  r/  ?  r)  rV   r/  c                  $    e Zd Zej                  Zy)MaxN)rg   rk   rl   rL   maxr   rQ   rV   rT   r2  r2  C  r)  rV   r2  c                  $    e Zd Zej                  Zy)FirstN)rg   rk   rl   rL   firstr   rQ   rV   rT   r5  r5  G  s    GGMrV   r5  c                  $    e Zd Zej                  Zy)LastN)rg   rk   rl   rL   lastr   rQ   rV   rT   r8  r8  K  r-  rV   r8  c                  <    e Zd Zej                  Zej                  Zy)CountN)rg   rk   rl   rL   countr   r(  r   rQ   rV   rT   r;  r;  O  s    GGMrV   r;  c                  B    e Zd Zej                  Zej                  Zd Zy)Sizec                .   | j                   t        | j                   t              r| j                  j                  dk(  ry | j
                  }|D cg c]  }|| j                  j                  v s| }}t        |      t        | j                  j                        k(  ry | j                  j                  d      }t        | j                        D cg c]  \  }}||k7  r|nd  }}} t        |       | j                  |   g|dd   S c c}w c c}}w )NrX   r   )r   rq   rz   r   r%  rw   r{   r   r   index	enumerater~   r   )rh   
by_columnsc	slice_idxir   opss          rT   r   zSize._simplify_downX  s    KK#t{{D1zz!#  %%
!+GAqDJJ4F4F/FaG
Gz?c$**"4"455$$**84	;DT]];ST%!RQ)^r-TTtDz$**Z0;3qr7;; H
 Us   D/DDN)	rg   rk   rl   rL   sizer   r(  r   r   rQ   rV   rT   r>  r>  T  s    FFM<rV   r>  c                  <    e Zd Zej                  Zej                  Zy)IdxMinN)rg   rk   rl   rL   idxminr   r6  r   rQ   rV   rT   rI  rI  m      HHMrV   rI  c                  <    e Zd Zej                  Zej                  Zy)IdxMaxN)rg   rk   rl   rL   idxmaxr   r6  r   rQ   rV   rT   rM  rM  r  rK  rV   rM  c                  ,    e Zd Z ee      Z ee      Zy)ValueCountsN)rg   rk   rl   r  rE   r   rF   r   rQ   rV   rT   rP  rP  w  s     /M$%<=rV   rP  c                  f     e Zd Zej                  Z ee      Ze	j                  d fd       Z xZS )Uniquec                    t         |   }| j                  j                  }|j                  dk(  r|j
                  }n|| j                     j
                  }i |d|iS )NrX   ri   )r   r   r   rr   r%  ri   r   )rh   r   r   ri   r   s       rT   r   zUnique.aggregate_kwargs  sV    )zz99>99D$))D'&'&$''rV   r   )rg   rk   rl   rL   uniquer   r  rD   r   r   r   r   r   r   s   @rT   rR  rR  |  s0    HHM$%67( (rV   rR  c                  v    e Zd Z ee      ZdZed        Zed        Z	e
dd       Ze
dd       Ze
dd       Zy)	CovFc                    t        |      S rP   r
   )r   gr   s      rT   r  zCov.combine  s    qzrV   c                ,    t        t        |      fi |S rP   )r.   r
   r   s      rT   r   zCov.aggregate  s    2622rV   c                $    | j                  d      S )Nr   r   r   s    rT   r   zCov.chunk_kwargs  s    ||N++rV   c                    | j                  d      j                         }| j                  |d<   | j                  |d<   | j                  |d<   |S )Nr   r   stdr   )r   copyr   r^  r   rh   r   s     rT   r   zCov.aggregate_kwargs  sG    01668vu;;xrV   c                    d| j                   iS Nr   r   r   s    rT   r!  zCov.combine_kwargs      $++&&rV   Nr   )rg   rk   rl   r  r/   rd   r^  r   r  r   r   r   r   r!  rQ   rV   rT   rV  rV    ss    $E
C  3 3 , ,   ' 'rV   rV  c                      e Zd ZdZy)CorrTN)rg   rk   rl   r^  rQ   rV   rT   rf  rf    s    
CrV   rf  c                       e Zd ZeZd Ze fd       Zed        Ze	j                  d        Zd Zd Ze	j                  d        Ze	j                  d        Ze	j                  d	        Z xZS )
GroupByReductionc                    | j                  d      6| j                  dt        j                  t        | j
                        i      S y r   r   r   s    rT   r   zGroupByReduction._tune_down  r   rV   c                <    | j                  d      yt        |   S r   r   r   s    rT   r   zGroupByReduction.split_out  r   rV   c                    | j                   S rP   r   r   s    rT   r   z GroupByReduction._chunk_cls_args  r   rV   c                    t        | j                  j                        } | j                  |g| j                  i | j
                  S rP   r   r   s     rT   r   zGroupByReduction._meta_chunk  r   rV   c                x    | j                   ry| j                  }|du r| j                  j                  }d|dz   z  S )NNNTrP   rX   )r   r   r   r^   )rh   r   s     rT   
_divisionszGroupByReduction._divisions  s;    99NN	

..I)a-((rV   c                    t        | ||      S rP   r   r   s      rT   r   zGroupByReduction._simplify_up  r   rV   c                J    | j                   | j                  | j                  dS )Nr   r   r   rr  r   s    rT   r!  zGroupByReduction.combine_kwargs  s    ++4==DKKXXrV   c                4    | j                   | j                  dS )Nr   r   rt  r   s    rT   r   zGroupByReduction.chunk_kwargs  s     MMT[[AArV   c                `    | j                   | j                  | j                  | j                  dS )Nr   r   r   r   rv  r   s    rT   r   z!GroupByReduction.aggregate_kwargs  s*     kkIIkk	
 	
rV   )rg   rk   rl   r   r   r   r   r   r   r   r   r   ro  r   r!  r   r   r   r   s   @rT   rh  rh    s    J ! !
   E E)< Y Y B B 
 
rV   rh  c                H    | j                  ||||      j                         S Nr   r   r   r   groupbyr(  rY  r   r   r   r   s        rT   _var_combiner}    "    996x9OSSUUrV   c                       e Zd Zg dZdddddddZ ee      Z ee      Z	 ee
      Zej                   fd       Zej                   fd       Z xZS )Var)	r   ddofnumeric_onlyr   r   r   r   r   r   rX   N)r   r   r   r   r   r   c                J    | j                   | j                  dt        |   S )N)r  r  )r  r  r   r   r   s    rT   r   zVar.aggregate_kwargs  s.     II --
 g&
 	
rV   c                4    d| j                   it        |   S Nr  )r  r   r   r   s    rT   r   zVar.chunk_kwargs	  s     1 1JUW5IJJrV   )rg   rk   rl   r   r   r  rG   reduction_aggregater}  reduction_combinerH   rd   r   r   r   r   r   r   s   @rT   r  r    s{    
K I 'x0$\2$E
 
 K KrV   r  c                      e Zd Zd Zy)Stdc                x    t        | j                   }t        |t        j                  |j
                  ddd      S )NT)r   r   enforce_metadatatransform_divisionsclear_divisions)r  r~   r   r  sqrtrr   )rh   vs     rT   r  z
Std._lower  s7    ! $ 
 	
rV   N)rg   rk   rl   r  rQ   rV   rT   r  r    s    	
rV   r  rt  c                   t        |       r| j                         } t        | |||      }|j                  d      }||j                     j                         j                  d       }t        ||gd      S )N)r_   r   r   Tr  c                    | dz   S )Nz-countrQ   )rC  s    rT   r  z_mean_chunk.<locals>.<lambda>!  s
    a(l rV   )r{   rX   )axis)r   to_framer8   r(  r{   r<  renamer&   )r   r   r   r_   rY  rt   ns          rT   _mean_chunkr    sk    b[[] XfMA	4 A	!))##,B#CA1a&q!!rV   c                H    | j                  ||||      j                         S rx  rz  r|  s        rT   _mean_combiner  %  r~  rV   c                   | j                  ||||      j                         }||j                  d t        |j                        dz      }||j                  t        |j                        dz  d     }|j                  |_        ||z  S )Nry     )r{  r(  r{   r[   )rY  r   r   r   r   resultr  rC  s           rT   	_mean_aggr  )  s}    YYV$&YQUUWFv~~8FNN 3q 89:Av~~c&..1Q689:A		AIq5LrV   c                  l    e Zd Zej                  Zej
                  Z ee      Z ee	      Z
 ee      Zy)MeanN)rg   rk   rl   r   r   r   r  r  r  r  r  r  rd   rQ   rV   rT   r  r  1  s8    #//K!++I&y1$]3%ErV   r  c                2    t        t        |       g|i |S rP   )rA   r&   )dfsr   r   s      rT   nunique_df_combiner  9  s    vc{<T<V<<rV   c                    t        |       }|j                  dk(  r"|j                  ||d      j                         S |j                  ||d      |   j                         S )NrX   T)r   r   r   )r&   r%  r{  nunique)r  r   ri   r   r   s        rT   nunique_df_aggregater  =  sW    	B	ww!|zzTDzAIIKKzzTDzA$GOOQQrV   c                       e Zd Z ee      Z ee      Zed        Ze	j                  d fd       Ze	j                  dd       Ze	j                  d        Z xZS )NUniquec                    | j                   dk(  r3| j                         } t        | j                  d   t	        |            }t        | g|i |S )NrX   r   )ri   r   )r%  r  r   r{   r3   r@   )r   r_   r   s      rT   rd   zNUnique.chunkJ  sG    77a<Brzz!}5Fr5JKF 3b3F33rV   c                :    t         |   }| j                  |d<   |S )Nri   )r   r   r   )rh   r   r   s     rT   r   zNUnique.chunk_kwargsQ  s    %vrV   c                4    | j                   | j                  dS )N)r   ri   )r   r   r   s    rT   r   zNUnique.aggregate_kwargsW  s    ++t{{;;rV   c                    d| j                   iS rb  rc  r   s    rT   r!  zNUnique.combine_kwargs[  rd  rV   r   )rg   rk   rl   r  r  r   r  r  rd   r   r   r   r   r!  r   r   s   @rT   r  r  F  sy    12I-.G4 4  
 < < ' 'rV   r  c                  <    e Zd Z ee      Z ee      Zed        Z	y)Headc                    t        |      S rP   rX  r   s      rT   r  zHead.combined  s    vrV   N)
rg   rk   rl   r  r=   r   r<   r   r   r  rQ   rV   rT   r  r  `  s)     -M$_5 rV   r  c                  ,    e Zd Z ee      Z ee      Zy)TailN)rg   rk   rl   r  rC   r   rB   r   rQ   rV   rT   r  r  i  s     -M$_5rV   r  c                      e Zd Zg dZddddddZej                  d        Zej                  d        Zd Z	ddZ
ej                  d	        Zej                  d
        Zd Zy)GroupByApply)
r   r   r   r   
group_keysr   r   r   r   r   NT)r   r   r   r  r   c                L    t        j                  t        | j                        S N)r   )r   r   groupby_slice_applyr   r   s    rT   grp_funczGroupByApply.grp_func  s      !4499EErV   c                    | j                  d      t        ur0t        | j                  d      | j                  j                        S t        | | j                        S Nr   )parent_meta)r   r   r'   r   rr   _meta_apply_transformr  r   s    rT   rr   zGroupByApply._meta  sD    <<z1T\\&1tzz?O?OPP$T4==99rV   c                ~    | j                   rd| j                  j                  dz   z  S | j                  j                  S )NrP   rX   )need_to_shuffler   r^   	divisionsr   s    rT   ro  zGroupByApply._divisions  s5    djj44q899zz###rV   c                    | j                   S rP   r  rh   shuffleds     rT   _shuffle_grp_funczGroupByApply._shuffle_grp_func      }}rV   c                    | j                   s| j                  j                  S t        d | j                  D              st        | j                        hS t               S )Nc              3  <   K   | ]  }t        |t                y wrP   rq   r   r  bs     rT   r	  zMGroupByApply.unique_partition_mapping_columns_from_shuffle.<locals>.<genexpr>  s     :QZ4(:   )r  r   -unique_partition_mapping_columns_from_shuffler
  r_   r   rw   r   r   s    rT   r  z:GroupByApply.unique_partition_mapping_columns_from_shuffle  sH    ##::KKK:$''::$**+,,5LrV   c                    t        d  j                  D              s)t         fd j                  j                  D              ryt        d  j                  j                  D              xs t         fd j                  D               S )Nc              3  <   K   | ]  }t        |t                y wrP   r  r  s     rT   r	  z/GroupByApply.need_to_shuffle.<locals>.<genexpr>       81:a&8r  c              3  `   K   | ]%  }t        j                        t        |      k\   ' y wrP   )r   rw   )r  colsrh   s     rT   r	  z/GroupByApply.need_to_shuffle.<locals>.<genexpr>  s-       D$$%T2s   +.Fc              3  $   K   | ]  }|d u  
 y wrP   rQ   )r  divs     rT   r	  z/GroupByApply.need_to_shuffle.<locals>.<genexpr>  s     ?33$;?s   c              3     K   | ]8  }t        j                  j                  j                  j                  |       : y wrP   )_contains_index_namer   rr   r@  ri   )r  r  rh   s     rT   r	  z/GroupByApply.need_to_shuffle.<locals>.<genexpr>  s6      K
EF !1!1!7!7!<!<a@K
s   >A)r
  r_   r   r  r  r   s   `rT   r  zGroupByApply.need_to_shuffle  s|    888  JJTT  ?$***>*>?? 
s K
JN''K
 H
 D
 	
rV   c                   | j                   }| j                  }| j                  rCd }t        d | j                  D              r|j                  dk(  }|rt        |      }g g }}t        | j                        D ]@  \  }}t        |t              s|j                  d| |g       |j                  d|        B t        |      rt        |g| } ||      \  }	}
|	rt        ||	      }t        ||D cg c]  }|	j                  ||       c}|j                   | j"                        }|
rt        ||
      }t        | j                        D cg c]T  \  }}t        |t              s|n<t%        t'        t)        |d|       | j                  |   j*                  d               V }}}|j*                  D cg c]	  }||vs| }}|r|d   }t)        ||      }nt ||      \  }	}
|	rt        ||	      }t        ||	j                  | j                  d   | j                  d         | j                   | j"                        }|
rt        ||
      }| j-                  d      }n| j-                  d	      }t/        || j0                  | j2                  | j4                  | j6                  | j9                  d
      | j9                  d      || j9                  d      g	| S c c}w c c}}w c c}w )Nc                    | j                   D ci c]  }|t        |      k7  s|t        |       }}|j                         D ci c]  \  }}||
 }}}||fS c c}w c c}}w rP   )r{   stritems)r   r   map_columnskr  unmap_columnss         rT   get_map_columnsz,GroupByApply._lower.<locals>.get_map_columns  sd    8:

VcSQTXosCH}VV2=2C2C2E F$!QA F F"M11 W Fs   AAAc              3  <   K   | ]  }t        |t                y wrP   r  r  s     rT   r	  z&GroupByApply._lower.<locals>.<genexpr>  r  r  rX   _by_methodr   )r@  TFr   r   r   )r   r_   r  r
  r%  r   rA  rq   r   extendappendr[   r   r   r"   getr^   r   r   r   r   r{   r  GroupByUDFBlockwiser   r  r   r   r   )rh   r   r_   r  r$  r  assign_exprsrE  r  r  r  rC  r   r  s                 rT   r  zGroupByApply._lower  s   ZZWW2 888GGqL	 B%'l%dgg. 0DAq!!T*$++tA3ZO<d1#J/0 |$2\2B-<R-@*]$R5B&489q[__Q*9NN..	 !$R7B& !*$'' 2 1  *!T2 &( *2aSz :$''!*BTBTUVBW  (*zzES_EE7DD)-<R-@*]$R5B&OODGGAJ
;$$..	 !$R7B--d3H--e4H"KKOOMMKKLL LL"LL 
 
 	
c : Fs   8K#
AK(?	K.	K.F)rg   rk   rl   r   r   r   r   r  rr   ro  r  r  r  r  rQ   rV   rT   r  r  n  s    K I F F : :
$
   

 

[
rV   r  c                  0    e Zd Zej                  d        Zy)GroupByTransformc                L    t        j                  t        | j                        S r  )r   r   groupby_slice_transformr   r   s    rT   r  zGroupByTransform.grp_func
  s      !8tyyIIrV   N)rg   rk   rl   r   r   r  rQ   rV   rT   r  r  	  s    J JrV   r  c               &     t        | |      di |S )NrQ   )getattr)groupwhatr   s      rT   _fillnar    s    75$)&))rV   c                  F    e Zd Z e ej
                  ed            Zd Zy)GroupByBFillbfillr  c                >    t        |t              rt        | ||      S y rP   rq   r   r   r   s      rT   r   zGroupByBFill._simplify_up       fj)%dFJ?? *rV   N)	rg   rk   rl   r  r   r   r  r   r   rQ   rV   rT   r  r    s"    )	))'@AD@rV   r  c                  @    e Zd Z e ej
                  ed            Zy)GroupByFFillffillr  N)rg   rk   rl   r  r   r   r  r   rQ   rV   rT   r  r    s    )	))'@ADrV   r  c                  H    e Zd ZddddddZej
                  d        ZddZy)GroupByShiftNT)r   r   r   r   r  c                8    t        j                  t        d      S )NFr  r   r   groupby_slice_shiftr   s    rT   r  zGroupByShift.grp_func(  s      !4uEErV   c                8    t        j                  t        |      S )Nr  r  r  s     rT   r  zGroupByShift._shuffle_grp_func,  s      !4xHHrV   r  )rg   rk   rl   r   r   r   r  r  rQ   rV   rT   r  r    s:    I F FIrV   r  c                      e Zd Zej                  dgz   Zi ej                  ddiZej                  d        Z
ddZd Zej                  d        Zy)Medianr   Nc                4    t        j                  t              S rP   )r   r   _median_groupby_aggregater   s    rT   r  zMedian.grp_func4  s      !:;;rV   c                    | j                   S rP   r  r  s     rT   r  zMedian._shuffle_grp_func8  r  rV   c                >    t        |t              rt        | ||      S y rP   r  r   s      rT   r   zMedian._simplify_up;  r  rV   c                h    | j                   j                  }| j                  || j                  z  }|S rP   )r   r^   r   )rh   r^   s     rT   r^   zMedian.npartitions?  s3    jj,,'%)9)99KrV   r  )rg   rk   rl   r  r   r  r   defaultr   r   r  r  r   r^   rQ   rV   rT   r  r  0  sh    **m_<K=''==G< <@  rV   r  get_keyr{   c               X    t         rt        |      r|f}t        | t        |      ||      S rP   )r#   rJ   r7   rz   )r   r
  r{   by_keys       rT   groupby_get_groupr  G  s(    7+*b$v,AArV   c                  P    e Zd Z ee      Zg dZddgZedd       Z	edd       Z
y)	GetGroup)r   r
  r{   r
  r{   c                6    | j                   g| j                  z   S rP   r   r   s    rT   r   zGetGroup._argsR  r   rV   c                v    | j                  d      }| j                  ||dS | j                  j                  dS )Nr{   r	  )r   r
  r   r{   rh   r  s     rT   r   zGetGroup._kwargsV  s@    ||I&||#/t
 	
59ZZ5G5G
 	
rV   Nr   r   )rg   rk   rl   r  r  r   r   _keyword_onlyr   r   r   rQ   rV   rT   r  r  M  sB    ./I1K	*M& & 
 
rV   r  c                    |d|ini }|d|ini } | j                   dd|i||}	||	|   }	|	j                  |      S )Nr   r   r_   r  rQ   )r{  r  )
r   r_   rR   r  r   r   r  r   r   rY  s
             rT   r  r  _  sb     $*#5h2F)1)=
H%2H

/b/H//A
cF888..rV   c                      e Zd Zg dZdddZg dZed
d       Zej                  d        Z
ddZedddddddd	       Zy)r  )	r   r   r  r   r   r   r   	dask_funcr   Nrt  )r   r  r   r   r   r   r  c                6    | j                   g| j                  z   S rP   r   r   s    rT   r   zGroupByUDFBlockwise._args  r   rV   c                    | j                  d      t        ur0t        | j                  d      | j                  j                        S t        | | j                        S r  )r   r   r'   r   rr   r  r  r   s    rT   rr   zGroupByUDFBlockwise._meta  sD    <<z1T\\&1tzz?O?OPP$T4>>::rV   c                
   | j                   D cg c]  }| j                  ||       }}| j                  j                         }|j	                  | j
                  | j                  d       t        |t        g|i |S c c}w )N)_funcrr   )	r   _blockwise_argr   r_  updater   rr   r   r   )rh   ri   r@  r   r   r   s         rT   _taskzGroupByUDFBlockwise._task  sw    9=D2##B.DD""$	
 D+=d=f== Es   B )r  r   r   r   r   r  c               p    |d}|i } || t        |      f|||dt        d|      t        d|      |S )NrQ   )rR   r  r   r   r   )rz   rU   )	r   r   r  r   r   r   r   r  r_   s	            rT   r   zGroupByUDFBlockwise.operation  sk     <D>FH	
 !	
 z8,	
 x(	
 	
 		
rV   r   )ri   rK   r@  intr   r   )rg   rk   rl   r   r   r  r   r   r   r   rr   r  r  r   rQ   rV   rT   r  r  s  sw    
K "T2IM & & ; ;
	> 
 
 
rV   r  c                L    | yt        |t              ryt        |      sy| |k(  S )NF)rq   r   rJ   )
index_namer_   s     rT   r  r    s-    "dR=rV   c                .    t        | |||g||||d|S N)r  r   r   )r9   	r   grouperrR   r   r   r  r   r   r   s	            rT   r  r    sA      
	

 

 
 
 
rV   c           	     &    t        | ||||||fi |S rP   )r:   )	r   r%  rR   r   r  r  r   r   r   s	            rT   r  r    s(      
GS(JDJ rV   c                .    t        | |||g||||d|S r#  )r;   r$  s	            rT   r  r    sA     $
	

 

 
 
 
rV   c           
        | j                  d      }| j                  }|D cg c]  }t        |      r|n
t        |       }}t	        | j                  d      |fd      \  }}t         |t        | j                  j                        |f| j                  |dt        d| j                        t        d| j                        t        d| j                        |      S c c}w )	Nr   r   T)nonempty)rR   r   r   r   r  )r   ru   rJ   r(   r   r'   r   rr   r   rU   r   r   r  )objr  r   by_metart   	meta_argsmeta_kwargss          rT   r  r    s    [["FllG@GH1IaLqmA&66HGH*CKK,?+HSWXI{#))//*		
 

			

 z3<<0		
 x,		
 |S^^4		
 		
  Is   C#c                   t        |t              rt        | ||| j                        }t	        |      }| j
                  j                  D cg c]	  }||v s| }}|| j
                  j                  k(  ry  t        |       t        |       | j
                  |   g| j                  dd   g|j                  dd   S y c c}w )N)additional_columnsrX   )	rq   r   r   rw   r$   r   r{   r   r~   )exprr   r   r{   r   s        rT   r   r     s    &*%-&*9I9I
 #7+"&**"4"4G3w3GGdjj(((tF|DJtzz'*?T]]12->?
__QR 
 	
  Hs   	CCc                R   t        |t              rY|j                  | j                  v rA| j                  dk(  r2|j
                  | |j                     j
                  k(  r|j                  S t        |t              r/|j
                  | j                  j
                  k(  r|j                  S t        |t        t        f      rht        | j                  |j                        st        d      t        |t              r!|j                         }| j                  |_        |j                  S |S )Nr  zCby must be in the DataFrames columns or aligned with the DataFrame.)rq   r   ri   r{   r%  _namer   r@  r0  r   NotImplementedError	to_series)r*  r_   s     rT   _clean_by_exprr5  0  s    2vGGs{{"HHMHHBGG***ww	B	288syy#>ww	B	(chh0%U  b% ByyBHww IrV   c                  x    e Zd ZU g dZddddZdZdZded<   dZe	j                  d        Zd Zed	        Zd
 Zy)GroupByCumulative)r   r   r   r  N)r  r   r   r   r   r   c                    | j                   d n| j                   }t        | j                  j                  g| j                  | j
                  |d| j                  S )Nr   )r   r,   r   rr   ru   rd   r  r  s     rT   rr   zGroupByCumulative._metaO  s]    {{*tJJ
]]
 **	

 
 	
rV   c                .    | j                   j                  S rP   r   r  r   s    rT   ro  zGroupByCumulative._divisionsZ      zz###rV   c                4    | j                  d      }|i S d|iS r  r\  )rh   nos     rT   r  zGroupByCumulative.numeric_only]  s$    \\.)Zr9nb%99rV   c                   | j                   }| j                  i nd| j                  i}t        |      r|j                  n|j                  }t        | j                  t        |ddddd d d | j                  |d|| j                  t        | j                        g| j                   }|}|j                  dk(  rt        |      }| j                  j                         }t        |      D ]x  \  }}t!        |t"              r|| j                  j                  v rt%        |d| | j                  |         }n$t%        |d| | j                  j&                        }d| ||<   z |dn|}t        |t        t(        ddddd d d t*        j,                  |d|t        |      g| }	t/        |||	|| j0                  | j2                  |g| S )Nr   TFr   rX   r  r   )rr   r   r   ri   r{   r   r   r,   rd   r  r[   r_   r%  r   r_  rA  rq   r   r   r@  r   rL   r9  GroupByCumulativeFinalizerr   initial)
rh   r   r   r{   r   cum_rawr_   rE  r  cum_lasts
             rT   r  zGroupByCumulative._lowerb  s   zz{{*4;;0G-d3$))JJjjWTT$BSBSTL
 WW
 ::?ENEWW\\^bM 	#DAqa&

***"5D*djjmDE"5D*djj6F6FGEqc
1	# !G ff;F;G
 
 *NNLL	
 	
 		
rV   )rg   rk   rl   r   r   rd   r   r   r@  r   r   rr   ro  r   r  r  rQ   rV   rT   r7  r7  H  s]    ?K!%FIE!%I%G
 
$ : :<
rV   r7  c                  F    e Zd Zg dZej
                  d        Zd ZddZy)r?  )r   rA  rB  r   r   r@  r{   c                    | j                   S rP   )r   r   s    rT   rr   z GroupByCumulativeFinalizer._meta  s    yyrV   c                .    | j                   j                  S rP   r:  r   s    rT   ro  z%GroupByCumulativeFinalizer._divisions  r;  rV   c                L   | j                   df| j                  j                   dfi}d| j                   z   }t        d| j                  j                        D ]  }|dk(  r!| j
                  j                   |dz
  f|||f<   nBt        ||dz
  f| j
                  j                   |dz
  f| j                  | j                  f|||f<   t        | j                  j                   |f||f| j                  | j                  d      | j                  | j                  f|| j                   |f<    |S )Nr   zcum-lastrX   r{   )r2  rA  ranger   r^   rB  r1   r   r@  r0   r_   r   )rh   dskname_cumrE  s       rT   _layerz!GroupByCumulativeFinalizer._layer  s   

A!3!3Q 78

*q$**001 	AAv&*mm&9&91q5%AXqM" $q1u%]]((!a%0NNLL&XqM" !!!1%1Y'$CQ 	, 
rV   Nr   )	rg   rk   rl   r   r   r   rr   ro  rJ  rQ   rV   rT   r?  r?    s-    K  $rV   r?  c                  @    e Zd Zej                  Zej                  ZdZy)GroupByCumsumr   N)	rg   rk   rl   rL   cumsumrd   addr   r@  rQ   rV   rT   rL  rL    s    HHEIGrV   rL  c                  @    e Zd Zej                  Zej                  ZdZy)GroupByCumprodrX   N)	rg   rk   rl   rL   cumprodrd   mulr   r@  rQ   rV   rT   rP  rP    s    IIEIGrV   rP  c                  8    e Zd Zej                  Z ee      ZdZ	y)GroupByCumcountN)
rg   rk   rl   rL   cumcountrd   r  r2   r   r@  rQ   rV   rT   rT  rT    s    JJE01IGrV   rT  c                  	   e Zd ZdZ	 	 	 	 	 d5dZd Z	 	 	 	 	 d6dZd Zd Zd Z	d	 Z
 eej                  j                  j                  d
      d        Z eej                  j                  j                        d        Z eej                  j                  j                        d7d       Z eej                  j                  j                        d7d       Zd8dZ eej                  j                  j                        d9d       Z eej                  j                  j                        d9d       Z eej                  j                  j                        d        Zd Z eej                  j                  j                        d7d       Zd Z eej                  j                  j                        d9d       Z eej                  j                  j                        d9d       Z eej                  j                  j                        d7d       Z eej<                        	 	 	 	 	 d:d       Z eej<                        	 d;d       Z  eej                  j                  j                        d7d       Z! eej                  j                  j                        d<d       Z" eej                  j                  j                        d<d       Z# eej                  j                  j                        d         Z$ eej<                        	 	 	 	 	 d=d!       Z% eej<                        	 	 	 	 	 d=d"       Z& eej                  j                  jN                        d>d#       Z( eej                  j                  jN                        d>d$       Z) eej                  j                  j                        	 	 	 	 	 d:d%       Z* eej                  j                  j                        	 	 	 	 	 d:d&       Z+ e,d'(      	 d?d)       Z-d* Z.d@d+Z/ e0d,-      e1dd.d/       Z2e1dfd0Z3 e0d,-      e1dfd1       Z4 e0d,-      de1dfd2       Z5 eej                  j                  j                        	 dAd3       Z6dBd4Z7y)CGroupByzCollection container for groupby aggregations

    The purpose of this class is to expose an API similar
    to Pandas' `Groupby` for dask-expr collections.

    See Also
    --------
    SingleAggregation
    Nc                J   t        |t        t        f      r|D cg c]  }t        ||       }}nt        ||      }t        |t        t        f      r|n|g}	t	        d |	D              rt        d      || _        d }
t        j                  |      s<t        |t        t        t        f      s!t        |      st        |      rkt        |      s`t        |	      j                  t        j                  |      st        |t              r|hn|      }
|j                  D cg c]	  }||
v s| }
}|
||
   n|| _        || _        ||n	t$        sdnd| _        || _        || _        t        j                  |      st        |t,        t.        f      r|gn
t        |      | _         | j                   j2                  j4                  |f||dt7        d|      t7        d|      | _        |0t        |t              rt        |      }| j2                  |   | _        y y c c}w c c}w )Nc              3  P   K   | ]  }t        |t        j                           y wrP   )rq   pdGrouper)r  rR   s     rT   r	  z#GroupBy.__init__.<locals>.<genexpr>  s     :sz#rzz*:s   $&z.pd.Grouper is currently not supported by Dask.FT)r  r   r   r   )rq   r   rz   r5  r
  r3  r   r  isscalarr  rN   r   r   r   r   r{   r*  r   r#   r   r   r  r   r   r_   rr   r{  rU   )rh   r*  r_   r  r   r   r   slicert   by_
projectionrC  s               rT   rj   zGroupBy.__init__  s    b5$-(245Q.a(5B5R(BrE4=1bt:c::%&VWW
KK%#tU!34u%)>*51 SKK.*UC2HuJ &)[[DAO!DJD&0&<3z?#	 ,H}%RV 	 $KKOz"tX6F'GRDTRTX 	 ,TXX^^++
!
 z8,	

 x(

 %'UE*DJ S 6* Es   H	H )H c                L    d|i}|j                         |j                         dS )Nr  r   r   )r_  )rh   r  r   s      rT   _numeric_only_kwargszGroupBy._numeric_only_kwargs&  s#     ,/ &6;;=QQrV   c                    |d}t         || j                  j                  | j                  | j                  ||| j
                  ||| j                  t        |      g
| j                         S )Nr   )	r   r*  r0  r   r   r   r   r%   r_   )rh   expr_clsr   r   r   r   r   s          rT   _single_aggzGroupBy._single_agg*  sm     K 		%n5 
 	
rV   c                H    	 | |   S # t         $ r}t        |      |d }~ww xY wrP   )KeyErrorAttributeError)rh   rR   es      rT   __getattr__zGroupBy.__getattr__E  s.    	+9 	+ #*	+s    	!!c                    t        t        t        t        |             t	        | j
                        z   t	        t        t        j                  | j                  j                              z               S rP   )sortedr   dirr   rz   __dict__filterrL   isidentifierr*  r{   r   s    rT   __dir__zGroupBy.__dir__K  sV    DJt}}%&vanndhh.>.>?@A
 	
rV   c                    t        d      )NzDataFrameGroupBy does not allow compute method.Please chain it with an aggregation method (like ``.mean()``) or get a specific group using ``.get_group()`` before calling ``compute()``r3  r`  s     rT   computezGroupBy.computeT  s    !Q
 	
rV   c           	     T   t        |      rNt        | j                  | j                  | j                  || j
                  | j                  | j                        S t        | j                  | j                  || j
                  | j                  | j                  | j                        }|S )N)r_   r  r^  r   r   r   )r_   r^  r   r   r   r  )	rJ   SeriesGroupByr*  r_   r  r   r   r   rX  )rh   rR   rY  s      rT   __getitem__zGroupBy.__getitem__[  s    S> 77??YY{{  HHww;;]]
 rV   zHIf the group is not present, Dask will return an empty Series/DataFrame.)inconsistenciesc                    t        t        | j                  j                  || j                  g| j
                         S rP   )r   r  r*  r0  r   r_   )rh   rR   s     rT   	get_groupzGroupBy.get_groupq  s-    
 htxx}}c4;;QQRRrV   c                0     | j                   t        fi |S rP   )rf  r;  r`  s     rT   r<  zGroupBy.countx  s    t000rV   c                    | j                  |      } | j                  t        fi ||}|r2|j                  | j	                         |k\  t
        j                        S |S N)other)rc  rf  r'  wherer<  r  nanrh   r  	min_countr   numeric_kwargsr  s         rT   r(  zGroupBy.sum|  sX    22<@!!!#BB>B<<

	 9<HHrV   c                    | j                  |      } | j                  t        fi ||}|r2|j                  | j	                         |k\  t
        j                        S |S r~  )rc  rf  r+  r  r<  r  r  r  s         rT   r,  zGroupBy.prod  sX    22<@!!!$C&CNC<<

	 9<HHrV   c                    t         || j                  j                  | j                  | j                  |g| j
                         S rP   )r   r*  r0  r   r   r_   )rh   r   r  s      rT   _cum_aggzGroupBy._cum_agg  sA    	
 
 	
rV   c                .    | j                  t        |      S rP   )r  rL  rh   r  s     rT   rM  zGroupBy.cumsum  s    }}]L99rV   c                .    | j                  t        |      S rP   )r  rP  r  s     rT   rQ  zGroupBy.cumprod  s    }}^\::rV   c                ,    | j                  t              S rP   )r  rT  r   s    rT   rV  zGroupBy.cumcount  s    }}_--rV   c                    | j                   j                  j                         }| j                  j                         j                  }t        t        |      t        |j                        z
        dk(  S )z3Are all columns that we're not grouping on numeric?r   )r*  rr   _get_numeric_datar<  r{   r[   r   )rh   numericspost_group_columnss      rT   _all_numericzGroupBy._all_numeric  sW    88>>335!ZZ--/773)*S1A1A-BBCqHHrV   c                    |s| j                         st        d      | j                  |      } | j                  t        fd|i||}| j                  |      S )N0'numeric_only=False' is not implemented in Dask.r   )r  r3  rc  rf  r  _postprocess_series_squeeze)rh   r  r   r   r  r  s         rT   meanzGroupBy.mean  sd    D$5$5$7%B  22<@!!!$X)XvXX//77rV   c                    t        | j                  t              s!t        | j                        rA| j                  5t        |j                        dk  rt        d      ||j                  d      }|S )NrX   zCannot call `SeriesGroupBy.var` or `SeriesGroupBy.mean` on the key column. Please use `aggregate` if you really need to do this.r   )rq   r*  r   rJ   r   r[   r{   r3  )rh   r  s     rT   r  z#GroupBy._postprocess_series_squeeze  sd    txx(%'6>>"Q&)T  FNN1-.FrV   c                V    | j                  |      } | j                  t        fi ||S rP   )rc  rf  r/  rh   r  r   r  s       rT   r0  zGroupBy.min  /    22<@t@v@@@rV   c                V    | j                  |      } | j                  t        fi ||S rP   )rc  rf  r2  r  s       rT   r3  zGroupBy.max  r  rV   c                n    |r
t               | j                  |      } | j                  t        fi ||S rP   )r3  rc  rf  r5  rh   r  r   r   r  s        rT   r6  zGroupBy.first  s;    %''22<@tBB>BBrV   rX   c                b    | j                  |      }| j                  t        |||d   d|i      S )Nr   r  rb  )rc  rf  rV  )rh   r  r   r   r  r   r  s          rT   covzGroupBy.cov  sD     22<@'7$d^   
 	
rV   c                b    | j                  |      }| j                  t        |||d   ddi      S )Nr   r  rX   rb  )rc  rf  rf  )rh   r   r   r  r   r  s         rT   corrzGroupBy.corr  sD     22<@'7$a[   
 	
rV   c                n    |r
t               | j                  |      } | j                  t        fi ||S rP   )r3  rc  rf  r8  r  s        rT   r9  zGroupBy.last  s;    %''22<@tAA.AArV   c                4    | j                  t        d ||      S N)limitr   )_transform_like_opr  rh   r  r   s      rT   r  zGroupBy.ffill  #    &&$eN ' 
 	
rV   c                4    | j                  t        d ||      S r  )r  r  r  s      rT   r  zGroupBy.bfill  r  rV   c                0     | j                   t        fi |S rP   )rf  r>  r`  s     rT   rG  zGroupBy.size  s    t///rV   c                j    | j                  |      }||d   d<    | j                  t        f|||d|S Nr   skipna)r   r   r   )rc  rf  rI  rh   r   r   r  r  r   r  s          rT   rJ  zGroupBy.idxmin	  S     22<@39~&x0t
#)	

 
 	
rV   c                j    | j                  |      }||d   d<    | j                  t        f|||d|S r  )rc  rf  rM  r  s          rT   rN  zGroupBy.idxmax  r  rV   c                    d|i}|t        | j                  t              st        | j                        ndd}| j	                  t
        ||||      S Nr  rX   )r  index_levels)r   r   r   r   )rq   r_   r   r[   rf  r  rh   r  r   r   r   r   s         rT   headzGroupBy.head/  Y    Qx0:477D0ICLq
 #%-   
 	
rV   c                    d|i}|t        | j                  t              st        | j                        ndd}| j	                  t
        ||||      S r  )rq   r_   r   r[   rf  r  r  s         rT   tailzGroupBy.tail>  r  rV   c                   |s| j                         st        d      t        t        | j                  j
                  ||||| j                  | j                  | j                  |g	| j                         }| j                  |      S Nr  )r  r3  r   r  r*  r0  r   r   r   r_   r  rh   r  r   r   r  r   r  s          rT   varzGroupBy.varM       D$5$5$7%B   		 
 //77rV   c                   |s| j                         st        d      t        t        | j                  j
                  ||||| j                  | j                  | j                  |g	| j                         }| j                  |      S r  )r  r3  r   r  r*  r0  r   r   r   r_   r  r  s          rT   r^  zGroupBy.stdj  r  rV   z$pd.core.groupby.DataFrameGroupBy.agg)based_onc                   d\  }}}|Bt        | t              st        |fi |\  }}}}n t        | t              rt        |      \  }}d}|dk(  r| j	                         S t        t        | j                  j                  || j                  | j                  ||| j                  || j                  g	| j                         }	|r|	||	j                  d d |f   }	||	_        |	S )N)NNNTrG  )rq   rw  r   r   rG  r   r   r*  r0  r   r   r   r   r_   ilocr{   )
rh   r   r   r   r   r   
relabelingorderr{   r  s
             rT   r   zGroupBy.aggregate  s     &6"
E7;dM22B32Q&2Q/
C%D-03F;!
&=99;		 
 &, QX.$FNrV   c                &     | j                   |i |S rP   )r   rh   r   r   s      rT   re   zGroupBy.agg  s    t~~t.v..rV   c                \    |t         u r$d| d| d| d}t        j                  |d       y y )Nzs`meta` is not specified, inferred from partial data.
Please provide `meta` if the result is unexpected.
  Before: .z(func)
  After:  .zD(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .z6(func, meta=('x', 'f8'))            for series result
   )
stacklevel)r   warningswarn)rh   r   r  msgs       rT   _warn_if_no_metazGroupBy._warn_if_no_meta  sI    :8 8 8 	C MM#!, rV      )pad)r   r   c                   | j                  |       t        t        | j                  j                  | j
                  | j                  | j                  | j                  ||||t        |      g
| j                         S )a  Parallel version of pandas GroupBy.apply

        This mimics the pandas version except for the following:

        1.  If the grouper does not align with the index then this causes a full
            shuffle.  The order of rows within each group may not be preserved.
        2.  Dask's GroupBy.apply is not appropriate for aggregations. For custom
            aggregations, use :class:`dask.dataframe.groupby.Aggregation`.

        .. warning::

           Pandas' groupby-apply can be used to to apply arbitrary functions,
           including aggregations that result in one row per group. Dask's
           groupby-apply will apply ``func`` once on each group, doing a shuffle
           if needed, such that each group is contained in one partition.
           When ``func`` is a reduction, e.g., you'll end up with one row
           per group. To apply a custom aggregation with Dask,
           use :class:`dask.dataframe.groupby.Aggregation`.

        Parameters
        ----------
        func: function
            Function to apply
        args, kwargs : Scalar, Delayed or object
            Arguments and keywords to pass to the function.
        $META

        Returns
        -------
        applied : Series or DataFrame depending on columns keyword
        )r  r   r  r*  r0  r   r   r   r  r%   r_   rh   r   r   r   r   r   s         rT   applyzGroupBy.apply  sq    B 	d#%n5 
 	
rV   c                    t         || j                  j                  | j                  | j                  | j
                  | j                  ||||t        |      g
| j                         S rP   )	r   r*  r0  r   r   r   r  r%   r_   )rh   re  r   r   r   r   r   s          rT   r  zGroupBy._transform_like_op  sb     %n5 
 	
rV   c                b    | j                  |d        | j                  t        |||g|i |S )a	  Parallel version of pandas GroupBy.transform

        This mimics the pandas version except for the following:

        1.  If the grouper does not align with the index then this causes a full
            shuffle.  The order of rows within each group may not be preserved.
        2.  Dask's GroupBy.transform is not appropriate for aggregations. For custom
            aggregations, use :class:`dask.dataframe.groupby.Aggregation`.

        .. warning::

           Pandas' groupby-transform can be used to apply arbitrary functions,
           including aggregations that result in one row per group. Dask's
           groupby-transform will apply ``func`` once on each group, doing a shuffle
           if needed, such that each group is contained in one partition.
           When ``func`` is a reduction, e.g., you'll end up with one row
           per group. To apply a custom aggregation with Dask,
           use :class:`dask.dataframe.groupby.Aggregation`.

        Parameters
        ----------
        func: function
            Function to apply
        args, kwargs : Scalar, Delayed or object
            Arguments and keywords to pass to the function.
        $META

        Returns
        -------
        applied : Series or DataFrame depending on columns keyword
        	transformr  )r  r  r  r  s         rT   r  zGroupBy.transform  sF    B 	d;7&t&&dD.
;?
CI
 	
rV   c                    d|v rt        d      | j                  |d       d|i|} | j                  t        d||g|i |S )a  Parallel version of pandas GroupBy.shift

        This mimics the pandas version except for the following:

        If the grouper does not align with the index then this causes a full
        shuffle.  The order of rows within each group may not be preserved.

        Parameters
        ----------
        periods : Delayed, Scalar or int, default 1
            Number of periods to shift.
        freq : Delayed, Scalar or str, optional
            Frequency string.
        fill_value : Scalar, Delayed or object, optional
            The scalar value to use for newly introduced missing values.
        $META

        Returns
        -------
        shifted : Series or DataFrame shifted within each group.

        Examples
        --------
        >>> import dask
        >>> ddf = dask.datasets.timeseries(freq="1h")
        >>> result = ddf.groupby("name").shift(1, meta={"id": int, "x": float, "y": float})
        r  zaxis is not supported in shift.shiftr  periodsN)	TypeErrorr  r  r  )rh   r  r   r   r   r   s         rT   r  zGroupBy.shift"  sh    : V=>>d73W//&t&&$n
7;
?E
 	
rV   c                   t        t        | j                  j                  | j                  | j
                  | j                  | j                  d t        dd|it        |      |g| j                         }|dur|j                  |      }|S )NrQ   r  T)r^   )r   r  r*  r0  r   r   r   r  r   r%   r_   repartition)rh   r   r   r   r  r  s         rT   r  zGroupBy.medianG  s      .%n5 
  D ''I'>FrV   c                    ddl m}  || j                  ||||| j                  | j                  | j
                  | j                  | j                  d| j                        S )a  Provides rolling transformations.

        .. note::

            Since MultiIndexes are not well supported in Dask, this method returns a
            dataframe with the same index as the original data. The groupby column is
            not added as the first level of the index like pandas does.

            This method works differently from other groupby methods. It does a groupby
            on each partition (plus some overlap). This means that the output has the
            same shape and number of partitions as the original.

        Parameters
        ----------
        window : str, offset
           Size of the moving window. This is the number of observations used
           for calculating the statistic. Data must have a ``DatetimeIndex``
        min_periods : int, default None
            Minimum number of observations in window required to have a value
            (otherwise result is NA).
        center : boolean, default False
            Set the labels at the center of the window.
        win_type : string, default None
            Provide a window type. The recognized window types are identical
            to pandas.
        axis : int, default 0

        Returns
        -------
        a Rolling object on which to call a method to compute a statistic

        Examples
        --------
        >>> import dask
        >>> ddf = dask.datasets.timeseries(freq="1h")
        >>> result = ddf.groupby("name").x.rolling('1D').max()
        r   )Rolling)r_   r   r   r   r  )min_periodscenterwin_typegroupby_kwargsgroupby_slice)	!dask.dataframe.dask_expr._rollingr  r*  r_   r   r   r   r  r   )rh   windowr  r  r  r  r  s          rT   rollingzGroupBy.rolling_  sY    L 	>HH#gg		 MM++"oo ++
 	
rV   TNNNN)NNNNN)FNrP   r  )rX   NNFN)NNFNrn  )NNTFN)   NN)Nr   NN)r  )NTNF)NFNr   )8rg   rk   rl   rm   rj   rc  rf  rk  rr  ru  rx  rM   r[  corer{  rX  r{  r<  r(  r,  r  rM  rQ  rV  r  r  r  r0  r3  r6  	DataFramer  r  r9  r  r  rG  rJ  rN  rw  r  r  r  r^  r+   r   re   r  rI   r   r  r  r  r  r  r  rQ   rV   rT   rX  rX    s    7+rR 
6+

, 
bS	S "''//))*1 +1 "''//))* + "''//))* +	
 "''//))*: +: "''//))*; +; "''//))*. +.I "''//))*8 +8 "''//))*A +A "''//))*A +A "''//))*C +C ",, 
  
" ",,SW

  

 "''//))*B +B "''//))*
 +

 "''//))*
 +

 "''//))*0 +0 ",, 
  
$ ",, 
  
$ "''////0
 1
 "''////0
 1
 "''//))* 8 +88 "''//))* 8 +88 #IJFJ  K D/- #r*&0 /
 +/
d $.d
& #r*#-d #
 +#
J #r*Jt "
 +"
H "''//))*RW +.6
rV   rX  c                  f    e Zd Z	 	 	 	 	 d
 fd	Z eej                  j                  j                        d        Z	 eej                  j                  j                        d        Z
	 	 	 	 ddZ	 	 	 	 ddZ eej                  j                  j                        dd       Zd Zd Zd	 Z xZS )rw  c           	        t        |t              rVt        |t              r3 |j                  j                  |j                  fi t        d|       nt        |t        t        f      ret        d |D              rS|D cg c]   }t        |t              r|j                  n|" }	} |j                  j                  |	fi t        d|       nt        |t              rat        |      dk(  rt        d      |D 
cg c]  }
t        |
t              r|
 }}
 |j                  j                  |fi t        d|       n' |j                  j                  |fi t        d|       t        | 1  |||||||       y c c}w c c}
w )Nr   c              3  <   K   | ]  }t        |t                y wrP   )rq   r   )r  rt   s     rT   r	  z)SeriesGroupBy.__init__.<locals>.<genexpr>  s      7-.
1i(7r  r   zNo group keys passed!)r_   r  r^  r   r   r   )rq   r   r   rr   r{  rU   rz   r   r
  r[   r   r   rj   )rh   r*  r_   r  r   r   r   r^  rt   metasitemnon_series_itemsr   s               rT   rj   zSeriesGroupBy.__init__  s[    c6""i(!		!!"((Mhz8.LMBu.3 7247 4 NPPJq)$<!CPP!		!!%J8J+IJB%r7a<$%<==57#XTz$PV?WD#X #X!		!!"2Uhz86TU!		!!"GX(FG! 	 	
 Q $Ys   %F >FFc                0     | j                   t        fi |S rP   )rf  rP  r`  s     rT   value_countszSeriesGroupBy.value_counts  s    t6v66rV   c                0     | j                   t        fi |S rP   )rf  rR  r`  s     rT   rT  zSeriesGroupBy.unique  s    t1&11rV   c                H    | j                  t        d |t        |            S N)r  )r   r   r   )rf  rI  r   rh   r   r   r  r  r   s         rT   rJ  zSeriesGroupBy.idxmin  s-     V,	   
 	
rV   c                H    | j                  t        ||t        |            S r  )rf  rM  r   r  s         rT   rN  zSeriesGroupBy.idxmax  s-     #V,	   
 	
rV   c                   | j                   xs | j                  j                  }t        t	        | j                  j
                  | j                  | j                  dd|||| j                  t        |      g
| j                         S )a9  
        Examples
        --------
        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> d = {'col1': [1, 2, 3, 4], 'col2': [5, 6, 7, 8]}
        >>> df = pd.DataFrame(data=d)
        >>> ddf = dd.from_pandas(df, 2)
        >>> ddf.groupby(['col1']).col2.nunique().compute()
        N)r   r*  ri   r   r  r0  r   r   r   r%   r_   )rh   r   r   r   r^  s        rT   r  zSeriesGroupBy.nunique  st     ,txx}}		%n5 
 	
rV   c                    t        d      Nz1cov is not implemented for SeriesGroupBy objects.rt  r  s      rT   r  zSeriesGroupBy.cov	      !"UVVrV   c                    t        d      r  rt  r  s      rT   r  zSeriesGroupBy.corr	  r  rV   c                     yr   rQ   r   s    rT   r  zSeriesGroupBy._all_numeric
	  s    rV   r  )NNTF)NTN)rg   rk   rl   rj   rM   r[  r  r{  rw  r  rT  rJ  rN  r  r  r  r  r   r   s   @rT   rw  rw    s    
 $
L "''////07 17 "''////02 12
 
$ 
  "''////0
 1
8WWrV   rw  )FFT)FNNr  )NNTNNFN)
__future__r   r   r\   r  collections.abcr   numpyr  pandasr[  pandas.core.applyr   r   daskr   dask._task_specr   	dask.corer	   dask.dataframe.corer
   r   r   r   $dask.dataframe.dask_expr._collectionr   r   r   r   dask.dataframe.dask_expr._exprr   r   r   r   r   r   r   r   r   r   r   r   r   $dask.dataframe.dask_expr._reductionsr   r    r!   !dask.dataframe.dask_expr._shuffler"   dask.dataframe.dask_expr._utilr#   r$   r%   dask.dataframe.dispatchr&   r'   r(   dask.dataframe.groupbyr)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   dask.dataframe.utilsrI   rJ   dask.typingrK   
dask.utilsrL   rM   rN   rU   r`   rb   ro   r   r   r   r   r   r  r  r'  r+  r/  r2  r5  r8  r;  r>  rI  rM  rP  rR  rV  rf  rh  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r5  r7  r?  rL  rP  rT  rX  rw  rQ   rV   rT   <module>r     s   "    $   D #         T S ? 
 E D! ! ! ! ! ! ! ! !D J  5 519: :D >@5+ @. BX</ X<v`Q4k `QF 
/  
F+
!7 +
\/
%; /
d
  
 
    
< <2   
 V  
># >
( ('
 ':3 6
y+ 6
rV"K
 "KJ

# 

 #'t "V& &=R' '4 64 6
X
4 X
vJ| J*@# @B< BI< I"\ . ,0 B
y+ 
( 	/(E
)[ E
P
& "< ", "0&*0V
k V
r,{ ,^% & ' s

 s

lsG srV   