
    bi                       U d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dl m!Z!m"Z"m#Z# g dZ$dZ%de&d<   d Z'd Z(dHdZ)e%ddddZ*e%ddddZ+e%ddfdZ,d Z- G d d      Z.	 dIdZ/	 dJdZ0ddddZ1ddddZ2eddd d!Z3deddfd"Z4dKd#Z5d$ Z6ed%d&Z7dLd'Z8d( Z9dKd)Z:d* Z;d+ Z<d, Z=d- Z>d. Z?d/ Z@d0 ZAd1 ZBd2 ZCd3 ZDd4 ZEd5 ZF	 	 	 	 dMd6ZGd7 ZHd8 ZId9 ZJdKd:ZK	 dHd;ZLd< ZMd= ZNd> ZOdNd?ZPdNd@ZQdNdAZRdB ZSdC ZTdD ZUdE ZVdF ZWdG ZXy)O    )annotationsN)partialtokenize)flatten)PANDAS_GE_220PANDAS_GE_300check_groupby_axis_deprecationcheck_observed_deprecation)_convert_to_numeric)concat)get_numeric_only_kwargsis_dataframe_likeis_series_like)
no_default)Mfuncname
itemgetter)meanstdvarTzbool | NoneGROUP_KEYS_DEFAULTc                    t        | t        t        f      r+t        |       dkD  rt        t	        t        |                   S y)z1Determine the correct levels argument to groupby.   r   )
isinstancetuplelistlenrangebys    Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/groupby.py_determine_levelsr#   '   s0    "udm$R1E#b'N##    c                     t        |      st        |      r% j                  j                  |j                        S t	        |t
        t        f      rt         fd|D              S y)z/Check if ``df`` and ``by`` have aligned indicesc              3  6   K   | ]  }t        |        y wN)_is_aligned).0idfs     r"   	<genexpr>z_is_aligned.<locals>.<genexpr>4   s     2!;r1%2   T)r   r   indexequalsr   r   r   all)r+   r!   s   ` r"   r(   r(   /   sL    b.r2xxrxx((	Bu	&2r222r$   c                b   |j                  d      }|t        | |      sd}t        |      |Pt        |      rE|rCt	        |t
              r|g}t        |      }t        |      dk(  r|d   }|j                  |       t               5   | j                  di |cddd       S # 1 sw Y   yxY w)a  Groupby, but raise if df and `by` key are unaligned.

    Pandas supports grouping by a column that doesn't align with the input
    frame/series/index. However, the reindexing does not seem to be
    threadsafe, and can result in incorrect results. Since grouping by an
    unaligned key is generally a bad idea, we just error loudly in dask.

    For more information see pandas GH issue #15244 and Dask GH issue #1876.r!   Na  Grouping by an unaligned column is unsafe and unsupported.
This can be caused by filtering only one of the object or
grouping key. For example, the following works in pandas,
but not in dask:

df[df.foo < 0].groupby(df.bar)

This can be avoided by either filtering beforehand, or
passing in the name of the column instead:

df2 = df[df.foo < 0]
df2.groupby(df2.bar)
# or
df[df.foo < 0].groupby('bar')

For more information see dask GH issue #1876.r   r   r     )
getr(   
ValueErrorr   r   strr   updater   groupby)r+   convert_by_to_listkwargsr!   msgs        r"   _groupby_raise_unalignedr;   9   s     
D	B	~k"b1< 	$ o	CG(: b#B"Xr7a< AB	#	% $rzz#F#$ $ $s   	B%%B.)
group_keysdropnaobservedc                   |d|ini }|d|ini } | j                   |fd|i||}	|r|	|   }	 |	j                  |g|i |S )Nr=   r>   r<   )r7   apply
r+   grouperkeyfuncr<   r=   r>   argsr9   gs
             r"   _groupby_slice_applyrG   h   sl     $*#5h2F)1)=
H%2H

7HzHXHHA
cF1774)$)&))r$   c                   |d|ini }|d|ini } | j                   |fd|i||}	|r|	|   }	t        |       dk(  r |	j                  |g|i |S  |	j                  |g|i |S )Nr=   r>   r<   r   )r7   r   r@   	transformrA   s
             r"   _groupby_slice_transformrJ   }   s     $*#5h2F)1)=
H%2H

7HzHXHHA
cF 2w!|qwwt-d-f--1;;t-d-f--r$   c                    |d|ini }|d|ini }|r| j                         }  | j                  |fd|i||}|r||   }t               5   |j                  di |}	d d d        |	S # 1 sw Y   	S xY w)Nr=   r>   r<   r2   )
sort_indexr7   r
   shift)
r+   rB   rC   shuffledr<   r=   r>   r9   rF   results
             r"   _groupby_slice_shiftrP      s     $*#5h2F)1)=
H%2H]]_

7HzHXHHA
cF	'	) #"6"#M#Ms   A((A2c                    t        | |d      }	 t        |       r||   }|j                  |      S # t        $ r" t        |       r| |   } | j                  dd cY S w xY w)NF)r!   r8   r   )r;   r   	get_groupKeyErroriloc)r+   by_keyget_keycolumnsgroupeds        r"   _groupby_get_grouprY      sj    &rfOG
R g&G  ))  R GBwwq|s    1 (AAc                      e Zd ZdZddZy)Aggregationa  User defined groupby-aggregation.

    This class allows users to define their own custom aggregation in terms of
    operations on Pandas dataframes in a map-reduce style. You need to specify
    what operation to do on each chunk of data, how to combine those chunks of
    data together, and then how to finalize the result.

    See :ref:`dataframe.groupby.aggregate` for more.

    Parameters
    ----------
    name : str
        the name of the aggregation. It should be unique, since intermediate
        result will be identified by this name.
    chunk : callable
        a function that will be called with the grouped column of each
        partition, takes a Pandas SeriesGroupBy in input.
        It can either return a single series or a tuple of series.
        The index has to be equal to the groups.
    agg : callable
        a function that will be called to aggregate the results of each chunk.
        Again the argument(s) will be a Pandas SeriesGroupBy. If ``chunk``
        returned a tuple, ``agg`` will be called with all of them as
        individual positional arguments.
    finalize : callable
        an optional finalizer that will be called with the results from the
        aggregation.

    Examples
    --------
    We could implement ``sum`` as follows:

    >>> custom_sum = dd.Aggregation(
    ...     name='custom_sum',
    ...     chunk=lambda s: s.sum(),
    ...     agg=lambda s0: s0.sum()
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_sum)  # doctest: +SKIP

    We can implement ``mean`` as follows:

    >>> custom_mean = dd.Aggregation(
    ...     name='custom_mean',
    ...     chunk=lambda s: (s.count(), s.sum()),
    ...     agg=lambda count, sum: (count.sum(), sum.sum()),
    ...     finalize=lambda count, sum: sum / count,
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_mean)  # doctest: +SKIP

    Though of course, both of these are built-in and so you don't need to
    implement them yourself.
    Nc                <    || _         || _        || _        || _        y r'   )chunkaggfinalize__name__)selfnamer]   r^   r_   s        r"   __init__zAggregation.__init__   s    
 r$   r'   )r`   
__module____qualname____doc__rc   r2   r$   r"   r[   r[      s    3jr$   r[   Fc                    |d|ini }|d|ini }t               5   | j                  d||d||}d d d         |fi |S # 1 sw Y   xY w)Nr=   r>   levelsortr2   )r   r7   )r+   aggfunclevelsr=   rj   r>   r9   rX   s           r"   _groupby_aggregaterm     st     $*#5h2F)1)=
H%2H	#	% L"**K6KKFKL7%f%%L Ls   AAc                r    |d|ini }|d|ini }  | j                   d||d||j                  |fi |S )zq
    A simpler version of _groupby_aggregate that just calls ``aggregate`` using
    the user-provided spec.
    r=   r>   rh   r2   )r7   	aggregate)r+   specrl   r=   rj   r>   r9   s          r"   _groupby_aggregate_specrq     s`     $*#5h2F)1)=
H%2HN:2::DFDDVDNN r$   )r=   r>   c          	        t        |       rI| j                         j                  t        |      dk(  r|d   n
t	        |            | j
                     }nT| j                  t	        |            }t        |t        t        t        t        j                  f      rt	        |      }||   }|du rd}t        |j                  t        j                        rJd}|j                  j                  j                         j                  |j                  j
                        }nt        |j                  t        j                         rqt#        d |j                  j$                  D              rKd}t        j                   j'                  |j                  j$                  |j                  j(                        }|r|j+                  |j                            }	|j,                  D 
ci c]'  }
|
t        j.                  |	||
   j0                        ) }}
t        j2                  |      }t        j4                  ||g      }|S c c}
w )z
    A non-aggregation agg function. This simulates the behavior of an initial
    partitionwise aggregation, but doesn't actually aggregate or throw away
    any data.
    r   r   FTc              3  P   K   | ]  }t        |t        j                           y wr'   )r   pdCategoricalIndex)r)   ri   s     r"   r,   z!_non_agg_chunk.<locals>.<genexpr><  s"      =
7<Jub112=
s   $&namesr.   dtype)r   to_frame	set_indexr   r   rb   r   r   setrt   Indexr.   ru   
categoriescopyrename
MultiIndexanyrl   from_productrw   isinrW   Seriesry   	DataFramer   )r+   rC   r=   r>   r!   r9   rO   has_categoricals
full_indexnew_catsc
empty_dataemptys                r"   _non_agg_chunkr     s    b ((#b'Q,ADHMbggV d2h'cE4bhh78s)C 5  fllB$7$78#00557>>v||?P?PQJbmm4 =
@F@S@S=
 :
  $33##6<<+=+= 4 J  ":??6<<#@"@AH   29986!9??CCJ  LL,EYY/FMs   ",H>c               4   |j                  d      }|j                  d      }|d|ini }|d|ini }t        | fd|i||}t        |       s|	 ||fi |S t        |t        t
        t        t        j                  f      rt        |      } |||   fi |S )Nr]   rW   r=   r>   r!   )	popr;   r   r   r   r   r|   rt   r}   )r+   r=   r>   r!   r9   rD   rW   rF   s           r"   _apply_chunkr   Q  s    ::gDjj#G#)#5h2F)1)=
H%2H AAhA&AAbW_A   gtS"((;<7mGAgJ)&))r$   )numeric_onlyr>   r=   c                  t        |      }t        |       r| j                         } | j                         } t	        | |||      } |j
                  di |}||j                     j                         j                  d       }|j                  }	| |	   dz  | |	<   t	        | |||      }
 |
j
                  di |j                  d       }t        |||gd      S )	N)r!   r>   r=   c                
    | dfS Nz-countr2   r   s    r"   <lambda>z_var_chunk.<locals>.<lambda>j  s
    q(m r$   rW      c                
    | dfS )Nz-x2r2   r   s    r"   r   z_var_chunk.<locals>.<lambda>p  s
    E
 r$   r   axisr2   )
r   r   rz   r   r;   sumrW   countr   r   )r+   r   r>   r=   r!   numeric_only_kwargsrF   xncolsg2x2s               r"   
_var_chunkr   `  s    1,?b[[]	B XfMA$#$A	!))##,C#DA99D$x1}BtH	!"hv	NB		&%	&	-	-6J	-	KB1b!*1%%r$   c                   t        |      } | j                  ||||      j                  di |} t        | j                        }| | j                  d |dz      }	| | j                  |dz  d|z  dz      j                  d       }
| | j                  | dz  d     j                  d       }|
|	dz  |z  z
  }||z
  }d||dk  <   ||z  }t        j                  |||z
  dk(  <   t        |      sJ d||dk  <   |S )	N)ri   rj   r>   r=      r   c                    | d   S Nr   r2   r   s    r"   r   z_var_agg.<locals>.<lambda>  s
    ad r$   r   c                    | d   S r   r2   r   s    r"   r   z_var_agg.<locals>.<lambda>  s
    !A$ r$   r   r2   )	r   r7   r   r   rW   r   npnanr   )rF   rl   ddofrj   r   r>   r=   r   ncr   r   r   rO   divs                 r"   _var_aggr   u  s#    2,?P		THV	LPP 	
	A 
QYYB	!))IbAg
A	
199R1Wq2v{+	,	3	3N	3	KB	!))RC1HJ
 '''?A !Q$(]F
d(CCaL
cMF ffFAH?V$$$F6A:Mr$   c                   t        t        t        j                  |d                  }t        |      }t        t	        |            }t        t        |t	        |                  }t        j                  | |   j                  d      D ]:  \  }}||   }	||   }
|	||
z  z   }| | }| d|z     }| d|z     }t        j                  ||z        }|dz
  }d||dk  <   | |   | |   | |   z  |z  z
  j                  d   |j                  d   z  }|r| | }| | }| |   | |   dz  |z  z
  j                  d   |j                  d   z  }| |   | |   dz  |z  z
  j                  d   |j                  d   z  }t        j                  ||z        }|dk(  rt        j                  }n||z  }|||<   ||k7  s.||	z  |
z   }|||<   = |}t        j                  j                  ||g      }t        j                   ||      S )Nr   )repeatz%s-countr   r   )r.   )r   r   itproductr   dictzipcombinations_with_replacementrW   r   sqrtvaluesr   rt   r   r   r   )r+   r   r   num_elementsnum_colsvalscol_idx_mappingr*   jr   yidxmul_colninjr   r   valiijj	std_val_i	std_val_jsqrt_vallevel_1r.   s                            r"   _cov_finalizerr     s$   tBJJtA678L4yHl#$D3tU8_56O00D1A1A1E 1AA(Q,Cs)
Q
QGGBG!eC!G'{RURU]Q..66q9CJJqMI3qcB3qcBB2a5A:"33;;A>ANIB2a5A:"33;;A>ANIwwy945H1}ffHnS	6Q,"CDI58 GMM&&'9:E99T''r$   c                    | j                         }t        j                  |d      D ]  \  }}| | }| |   | |   z  ||<    t        j                  t        |      t              |_        |S )zInternal function to be used with apply to multiply
    each column in a dataframe by every other column

    a b c -> a*a, a*b, b*b, b*c, c*c
    r   ry   )	__class__r   r   r   zerosr   intr.   )r+   r   _dfr*   r   cols         r"   	_mul_colsr     sn     ,,.C00q9 !1A3ia52a5=C! S-CIJr$   )r   c               j   t        |      }t        |       r| j                         } | j                         } |du r7| j	                  ddg      }|j
                  D ]  }t        ||   d      | |<    t        j                         }t        | j
                        D ]  \  }}t        |      ||<    | j                  |      } | j                         j
                  }	t        d |D              }
|
s6|D cg c]  }||   	 }}|	j                  t        j                   |            }	t#        | |      } |j$                  di |}t&        rd	dini } |j(                  t*        fd
|	i|j-                  dd      }||j
                     j/                         j                  d       }||||fS c c}w )zCovariance Chunk Logic

    Parameters
    ----------
    df : Pandas.DataFrame
    std : bool, optional
        When std=True we are calculating with Correlation

    Returns
    -------
    tuple
        Processed X, Multiplied Cols,
    Fdatetime	timedelta)includeTr   c              3  2   K   | ]  }t        |        y wr'   )r   )r)   ss     r"   r,   z_cov_chunk.<locals>.<genexpr>  s     0.#0s   r    include_groupsr   )ri   dropc                    |  dS r   r2   r   s    r"   r   z_cov_chunk.<locals>.<lambda>  s    6l r$   r2   )r   r   rz   r   select_dtypesrW   r   collectionsOrderedDict	enumerater5   r   _get_numeric_datar   
differencert   r}   r;   r   r   r@   r   reset_indexr   )r+   r   r!   r   dt_dfr   col_mappingr*   r   r   is_maskkrF   r   r   mulr   s                    r"   
_cov_chunkr     s    2,?b[[]	Bu  *k)B C== 	<C)%*d;BsG	<
 ))+K"**%  1QA 	;	'B!))D 0R00G&()k!n))rxx|, +A$#$A2?&.RN
!'')
9$
9.
9
E
Et F C 	
!))##,B#CAsA{## *s   5F0c                `   t        |       }t        | \  }}}}	|d   j                  }
|	d   }t        |      j	                  ||      j                         }t        |      j	                  ||      j                         }t        |      j	                  |      j                         }t        |||gd      j	                  |      j                  t        |
|      }|j                         D ci c]  \  }}||
 }}}|j                  j                  }t               }t        |      dk(  rAt        d |D              r/t        |j                         t        |j                        z
        }|D ]f  }|j                  ||      }|j!                  |       t        |j                  j"                  d         t        |      k  sU	 |j%                  |       h t        |j                               }t)        t        |j                  j"                              D ]$  }|j                  j+                  ||      |_        & |j                  j-                  |d	
       t.        r|j1                         }n|j1                  d      }t3        |      sJ |S c c}}w # t&        $ r Y 8w xY w)Nr   r   rh   ri   r   r   )r   r   c              3  $   K   | ]  }|d u  
 y wr'   r2   )r)   r   s     r"   r,   z_cov_agg.<locals>.<genexpr>  s     !>!t)!>s   T)inplaceF)r=   )r   r   rW   r   r7   r   r@   r   itemsr.   rw   r   r0   keysr|   r3   appendrl   r   rS   r   
set_levels	set_namesr	   stackr   )_trl   r   r   rj   tsumsmulscountscol_mappingsr   r   
total_sums
total_mulstotal_countsrO   r   vinv_col_mappingidx_valsidx_mappingr   idx_namer   ri   s_results                             r"   _cov_aggr    s`   RA'*Aw$D$7??Dr"K%%F%>BBDJ%%F%>BBDJ&>)))7;;=L
J5A>	v		~Dc	2  )4(9(9(;<1q!t<O<||!!H&K 8}c!>X!>>,,.Z5G5G1HHI 
"&&sC08$v~~$$Q'(3{+;;)
   "#Ds6>>0012 F224u2EF LL;5 <<><<u<-X&&&OC ="  s   3J
J  	J-,J-c                   |j                  d      }	 | j                  t        |      |gz         j                  t        |            S # t        $ r Y nw xY wt        | |d      }t        |       dkD  r1||   j                         j                         j                         }|S ||g   j                         }|j                  | j                  |j                     j                               }|S )Nrb   )subsetT)r!   r<   r   )r   drop_duplicatesr   r{   	Exceptionr;   r   uniqueexploderz   nuniqueastypedtypesrW   to_dict)r+   r!   r9   rb   rF   rX   s         r"   _nunique_df_chunkr  2  s    ::fD !!bTF):!;EEd2hOO  	!t<A
2w{D'.."**,557 N TF)##%..7??!;!C!C!EFNs   6A
 
	AAc                    | j                  ||d      | j                  d      j                         j                         j	                         }|S )NT)ri   rj   r>   r   )r7   rW   r  r	  rz   )r+   rl   rj   rO   s       r"   _nunique_df_combiner  G  sB    


dT
:2::a=I				  Mr$   c                *    | d|dt        | |       S )N-r   rD   columns     r"   _make_agg_idr  ^  s!    XQvj(4"8!9::r$   c                  	 t        | t              s2t        j                  t	        |t        j                  |                   } g }t        | t              r| j                         D ]k  \  	}t        |t              r)|j                  	fd|j                         D               ?t        |t              s|g}|j                  	fd|D               m nt        dt        |              t        t        t        ft        fd| j                         D               }|r|D cg c]  \  }}}|||f }}}}|S c c}}}w )a  
    Return a list of ``(result_column, func, input_column)`` tuples.

    Spec can be

    - a function
    - a list of functions
    - a dictionary that maps input-columns to functions
    - a dictionary that maps input-columns to a lists of functions
    - a dictionary that maps input-columns to a dictionaries that map
      output-columns to functions.

    The non-group columns are a list of all column names that are not used in
    the groupby operation.

    Usually, the result columns are mutli-level names, returned as tuples.
    If only a single function is supplied or dictionary mapping columns
    to single functions, simple names are returned as strings (see the first
    two examples below).

    Examples
    --------
    >>> _normalize_spec('mean', ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'mean', 'b'), ('c', 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', 'count')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'count', 'b')]

    >>> _normalize_spec(['var', 'mean'], ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'var'), 'var', 'a'), (('a', 'mean'), 'mean', 'a'),      (('b', 'var'), 'var', 'b'), (('b', 'mean'), 'mean', 'b'),      (('c', 'var'), 'var', 'c'), (('c', 'mean'), 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', ['sum', 'count'])])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('b', 'sum'), 'sum', 'b'),       (('b', 'count'), 'count', 'b')]

    >>> spec = collections.OrderedDict()
    >>> spec['a'] = ['mean', 'size']
    >>> spec['b'] = collections.OrderedDict([('e', 'count'), ('f', 'var')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('a', 'size'), 'size', 'a'),      (('b', 'e'), 'count', 'b'), (('b', 'f'), 'var', 'b')]
    c              3  2   K   | ]  \  }}|f|f  y wr'   r2   )r)   result_columnrD   input_columns      r"   r,   z"_normalize_spec.<locals>.<genexpr>  s)      +t #M2D,Gs   c              3  >   K   | ]  }t        |      f|f  y wr'   )r   )r)   rD   r  s     r"   r,   z"_normalize_spec.<locals>.<genexpr>  s)       #HTN3T<Hs   zunsupported agg spec of type c              3  6   K   | ]  }t        |        y wr'   )r   )r)   subspec	compoundss     r"   r,   z"_normalize_spec.<locals>.<genexpr>  s      +2
7I&r-   )r   r   r   r   r   r   r   r   extendr   r4   typer   r   r   )
rp   non_group_columnsresr  use_flat_columns_rD   	input_colr  r  s
           @@r"   _normalize_specr%  b  s*   d dD!&&s+<biio'NO
C$%)ZZ\ 	!L''4(

 /6}}  "'40&iG

  ' 	" 8dEFFud#I 6:kkm   HKLL0DD)	4+LLJ Ms   )D?c           
     n   t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  di}i }| D ]F  \  }}}t        |j                  ||            |f}|j                  |g       j                  ||f       H |j                         D ]  }t        |      dk7  st        d|        i }i }	g }
| D ]  \  }}}d}i }t        |t              r|j                  |j                   }}t        |t"              st        |j                  ||            }t%        |||||      }|d	   D ]
  } | || d
   <    |d   D ]
  } | |	| d
   <    |
j                  |d           t'        |j                               }t'        |	j                               }	||	|
fS )aw  
    Create transformation functions for a normalized aggregate spec.

    Parameters
    ----------
    spec: a list of (result-column, aggregation-function, input-column) triples.
        To work with all argument forms understood by pandas use
        ``_normalize_spec`` to normalize the argument before passing it on to
        ``_build_agg_args``.

    Returns
    -------
    chunk_funcs: a list of (intermediate-column, function, keyword) triples
        that are applied on grouped chunks of the initial dataframe.

    agg_funcs: a list of (intermediate-column, functions, keyword) triples that
        are applied on the grouped concatenation of the preprocessed chunks.

    finalizers: a list of (result-column, function, keyword) triples that are
        applied after the ``agg_funcs``. They are used to create final results
        from intermediate representations.
    minmaxmedianr   r   r   z#conflicting aggregation functions: r2   chunk_funcsr   aggregate_funcs	finalizer)r   r'  r(  r)  r   r   r   r3   
setdefaultr   r   r   r4   r   r   rE   keywordsr[   _build_agg_args_singlesorted)rp   known_np_funcsby_namer#  rD   r  rC   funcschunksaggs
finalizersr  	func_argsfunc_kwargsimplss                  r"   _build_agg_argsr:    s   0 	

		8

N G!% A4~))$56D3#**D,+?@A ! Lu:?B5'JKKL FDJ .2 .)t\	dG$%)YY{I$,N..tT:;D&4K

 -( 	#D"F47O	#+, 	!D DaM	! 	%,-'.* FMMO$F$++- D4##r$   c           
     N   t         j                  t         j                  ft         j                  t         j                  ft         j                  t         j                  ft         j                  t         j                  ft         j
                  t         j                  ft         j                  t         j                  ft         j                  t         j                  ft         j                  t         j                  fd t         j                  fd	}||j                         v rt        | ||||         S |dk(  rt        | ||||      S |dk(  rt        | ||||      S |dk(  rt        | ||      S |dk(  rt        | ||      S t!        |t"              rt%        | ||      S t'        d|       )N)	r   r'  r(  r   sizefirstlastprodr)  r   r   r   r   zunknown aggregate )r   r   r'  r(  r   r<  r=  r>  r?  r)  r   _build_agg_args_simple_build_agg_args_var_build_agg_args_std_build_agg_args_mean_build_agg_args_listr   r[   _build_agg_args_customr4   )r  rD   r7  r8  r  simple_impls         r"   r/  r/    s`   quu~quu~quu~''155!''177#  HH
K {!!%4{4/@
 	
 
"4K
 	
 
"4K
 	
 
#M4FF	#M4FF	D+	&%mT<HH -dV455r$   c           	         t        ||      }|\  }}t        |t        t        ||      fg|t        t        ||      fg| t        |      t               f      S )Nr  rD   r*  r+  r,  r  r   _apply_func_to_columnr   )r  rD   r  	impl_pairintermediate
chunk_implagg_impls          r"   r@  r@  .  sp    l3L$J %Lz:
 %Lx8
 !*\":DFC r$   c                    t        d|      }t        d|      }t        d|      }|rt        d| d|       dh}|j                         |z
  }	|	rt        d| d| d|	       t        |t        t        |t
        j                  	      f|t        t        |t
        j                  	      f|t        t        |
      fg|||fD 
cg c]$  }
|
t        t        |
t
        j                  	      f& c}
| t        t        d|||d|f      S c c}
w )Nr   sum2r   zaggregate function 'z0' doesn't support positional arguments, but got r   z' supports z keyword arguments, but got rH  )r  )
sum_columncount_columnsum2_columnrI  r2   )
r  	TypeErrorr   r   rK  r   r   r   _compute_sum_of_squares_finalize_var)r  rD   r7  r8  r  int_sumint_sum2	int_countexpected_kwargsunexpected_kwargsr   s              r"   rA  rA  E  sF   5,/GFL1HWl3I "4&(XYbXcd
 	

 hO#((*_<"4&O3DD`ar`st
 	
 +TAEE-RS-t<agg/VW.L0IJ
  H5
 'Squu)EF

  "&$ 		
 
s   )D
c                N    t        | ||||      }|d   \  } }}| t        |f|d<   |S )Nr,  )rA  _finalize_std)r  rD   r7  r8  r  r9  r#  r9   s           r"   rB  rB  o  s@    tY\E  %[1M1f'?E+Lr$   c                `   t        d|      }t        d|      }t        |t        t        |t        j                        f|t        t        |t        j
                        fg||fD cg c]$  }|t        t        |t        j                        f& c}| t        t        ||      f      S c c}w )Nr   r   rH  )rR  rS  rI  )r  r   rK  r   r   r   _finalize_mean)r  rD   r  rX  rZ  r   s         r"   rC  rC  z  s    5,/GWl3I+TAEE-RS-t<agg/VW
  +
 'Squu)EF

 G)<
 

s   ')B+
c           	         t        d|      }t        |t        t        |d       fg|t        t        |d       fg| t        |      t               f      S )Nr   c                ,    | j                  t              S r'   )r@   r   )r   s    r"   r   z&_build_agg_args_list.<locals>.<lambda>  s     r$   rH  c                &    | j                  d       S )Nc                R    t        t        j                  j                  |             S r'   )r   r   chainfrom_iterable)r4  s    r"   r   z8_build_agg_args_list.<locals>.<lambda>.<locals>.<lambda>  s    tBHH,B,B6,J'K r$   )r@   )s0s    r"   r   z&_build_agg_args_list.<locals>.<lambda>  s    BHHK% r$   rI  rJ  )r  rD   r  rM  s       r"   rD  rD    sm    5L %L/FG
 %'	
 !*\":DFC) r$   c           	     R   t        t        |      |      }|j                  !| t        j                  |      t               f}n| t        t        |j                  |      f}t        |t        t        |j                  |      fg|t        t        |j                  |      fg|      S )N)rD   prefixr  rI  )
r  r   r_   operatorr   r   _apply_func_to_columnsrK  r]   r^   )r  rD   r  r   r,  s        r"   rE  rE    s    
x~|
4C}}"H$7$7$<dfE	 "dmmC0
	 '4::l)ST
 ($DHHS*IJ
  r$   c                   t        |      r|j                  t        |             |j                  d      }t	        | fi |}t        j                         }|D ]B  \  }}} ||fi |}	t        |	t              rt        |	      D ]  \  }
}||| d|
 <    >|	||<   D t        |       r| j                  |      S | j                  d      j                         j                  |      S )a  
    Group a dataframe and apply multiple aggregation functions.

    Parameters
    ----------
    df: pandas.DataFrame
        The dataframe to work on.
    by: list of groupers
        If given, they are added to the keyword arguments as the ``by``
        argument.
    funcs: list of result-colum, function, keywordargument triples
        The list of functions that are applied on the grouped data frame.
        Has to be passed as a keyword argument.
    kwargs:
        All keyword arguments, but ``funcs``, are passed verbatim to the groupby
        operation of the dataframe

    Returns
    -------
    aggregated:
        the aggregated dataframe.
    r    r3  r  r   )r   r6   r   r   r;   r   r   r   r   r   r   r   headrz   )r+   r!   r9   r3  rX   rO   r  rD   r8  rr   r   s               r"   _groupby_apply_funcsro    s    . 2w 	b"JJwE&r4V4G$$&F,1 &(t[(K(a#A, 5Q34-#/05 %&F=!& ||F## wwqz""$..v66r$   c                   t        j                         5  t        j                  ddt               t	        | d      r| j
                  }n/t	        | d      r| j                  }n| j                  j                  }d d d        |r| j                  |   j                  d      n| j                  j                  d      }|j                        j                         S # 1 sw Y   cxY w)NignorezYDataFrameGroupBy.grouper is deprecated and will be removed in a future version of pandas.rB   _grouperr   )warningscatch_warningsfilterwarningsFutureWarninghasattrrB   rr  groupingr   objpowr7   r   )rX   r  r   r+   s       r"   rV  rV    s     
	 	 	" )g	
 7I&??DWj)##D ##((D) (.V		 	 	#7;;??13EB::d!!) )s   A$CC$c                B   t        | f|||d|} t        j                         }	|D ]  \  }
}} || fi ||	|
<    | j                  |	      }	|	 |	|   }	|r9|7t        |t        t        f      s!|	j                  dk(  r|	|	j                  d      }	|	S # t        $ r Y Hw xY w)N)r3  ri   rj   r   r   )
ro  r   r   r   rS   r   r   r   ndimrW   )r+   r+  finalize_funcsri   rj   argrW   	is_seriesr9   rO   r  rD   finalize_kwargss                r"   _agg_finalizer  	  s     


!T
=C
B
 $$&F0> <,t_ $R ;? ;}< \\&!F	G_F 	O3t-KK1q)*M  		s   B 	BBc                ,    | ||       S  || |         S r'   r2   )df_liker  rD   s      r"   rK  rK  .  s     ~G}  r$   c                    t        |       r| j                  }n| j                  j                  }t        fd|D              }|D cg c]  }| |   	 }} || S c c}w )Nc              3  F   K   | ]  }|j                        s|  y wr'   )
startswith)r)   r   ri  s     r"   r,   z)_apply_func_to_columns.<locals>.<genexpr><  s     FSs~~f/ESFs   !!)r   rW   ry  r0  )r  ri  rD   rW   r   s    `   r"   rk  rk  5  s[    !// ++%%FGFFG'./ws|/G/> 0s   Ac                6    | |   | |   z  }t        || |         S r'   )_adjust_for_arrow_na)r+   rR  rS  rO   s       r"   r`  r`  B  s'    
^b..F<(899r$   c                    t        | j                  t        j                        r;|r#t        j                  | |j                         <   | S t        j                  | |dk(  <   | S r   )r   ry   rt   
ArrowDtypeNAisna)rO   r+   check_for_isnas      r"   r  r  G  sJ    &,,. "F2779 M !eeF27OMr$   c                    |j                  dd      }| |   }| |   }| |   }	|	|dz  |z  z
  }
||z
  }d||dk  <   |
|z  }
t        j                  |
||z
  dk(  <   |rt        |
|      S |
S )Nr   r   r   r   )r3   r   r   r  )r+   rS  rR  rT  adjust_arrowr9   r   r   r   r   rO   r   s               r"   rW  rW  Q  s    
 ::fa D
<A
:A	KB!Q$(]F
d(CCaL
cMF ffFAH?#FC00r$   c                    t        | |||fddi|}t        j                  |      }|j                  |j                  k7  r|j	                  |j                        }t        ||d      S )Nr  FT)r  )rW  r   r   ry   r  r  )r+   rS  rR  rT  r9   rO   r!  s          r"   r^  r^  f  se    
L*k@EIOF ''&/C
yyFLL jj&VDAAr$   c                    |j                  | j                  |      j                  |      }| j                  |_         || |   |      S N
fill_value)reindexr{   r.   )partcum_lastr.   rW   rD   initialaligns          r"   _cum_agg_alignedr  p  sB    T^^E288WME**EKWu%%r$   c                    | j                   j                  |j                         } || j                  ||      |j                  ||      |      S r  )r.   unionr  )abrD   r  r  s        r"   _cum_agg_filledr  v  sH    GGMM!''"E			%G	,			%G	, r$   c                .    | j                  ||      dz   S )Nr  r   )add)r  r  r  s      r"   _cumcount_aggregater    s    55z5*Q..r$   c                &    | dnd|  dfd}|S )N
z

Based on c                    d d| _         | S )Nz9Aggregate using one or more specified operations
        a  
        Parameters
        ----------
        arg : callable, str, list or dict, optional
            Aggregation spec. Accepted combinations are:

            - callable function
            - string function name
            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
            - dict of column names -> function, function name or list of such.
            - None only if named aggregation syntax is used
        split_every : int, optional
            Number of intermediate partitions that may be aggregated at once.
            This defaults to 8. If your intermediate partitions are likely to
            be small (either due to a small number of groups or a small initial
            partition size), consider increasing this number for better performance.
        split_out : int, optional
            Number of output partitions. Default is 1.
        shuffle : bool or str, optional
            Whether a shuffle-based algorithm should be used. A specific
            algorithm name may also be specified (e.g. ``"tasks"`` or ``"p2p"``).
            The shuffle-based algorithm is likely to be more efficient than
            ``shuffle=False`` when ``split_out>1`` and the number of unique
            groups is large (high cardinality). Default is ``False`` when
            ``split_out = 1``. When ``split_out > 1``, it chooses the algorithm
            set by the ``shuffle`` option in the dask config system, or ``"tasks"``
            if nothing is set.
        kwargs: tuple or pd.NamedAgg, optional
            Used for named aggregations where the keywords are the output column
            names and the values are tuples where the first element is the input
            column name and the second element is the aggregation function.
            ``pandas.NamedAgg`` can also be used as the value. To use the named
            aggregation syntax, arg must be set to None.
        )rf   )rD   based_on_strs    r"   wrapperz%_aggregate_docstring.<locals>.wrapper  s#    		 !	"F r$   r2   )based_onr  r  s     @r"   _aggregate_docstringr    s'     $+4;xj1KL$L Nr$   c                   | D ci c]$  \  }}||j                         j                         & }}} t        | j                        ||      }| j                  j                  j
                  |j                  _        |j                  j                  | j                  j                  j                  d      |_        |S c c}}w )N)rb   F)r   )r	  r  r  ry  r.   rw   r  ry   )	series_gbrb   r   r   datarets         r"   _unique_aggregater    s    09:1Aqyy{!!##:D:
$y}}
d
.Cmm))//CIIO		  !4!4!:!: GCIJ	 ;s   )C c                    | j                   r9t        d t        | j                   j                         t              D              rt        j                  t              S  | j                  di |S )Nc              3  F   K   | ]  }t        j                  |        y wr'   )rt   r  )r)   rC   s     r"   r,   z _value_counts.<locals>.<genexpr>  s      s   !)	containerr   r2   )	groupsr0   r   r   r   rt   r   r   value_counts)r   r9   s     r"   _value_countsr    sS    88s  '5 I  yys##q~~'''r$   c                   | D ci c]&  \  }}||j                  d      j                         ( }}}|s/t        j                  | j                  j
                  d d d      g}t        j                  || j                  j
                  j                        }t        t        |j
                  j                              D ci c]V  }||j
                  j                  |   j                  | j                  j
                  j                  |   j                        X }}|j
                  j                  |j                         |j                         d      |_        |S c c}}w c c}w )	Nr   r   r   float64rx   rv   F)ri   verify_integrity)r7   r   rt   r   ry  r.   r   rw   r   r   rl   r  ry   r   r   r   )r  r   r   r  r!  r*   typed_levelss          r"   _value_counts_aggregater    s.   5>?TQAqyyry"&&((?D?			 3 3BQ 7yIJ
))D	 3 3 9 9
:C s399++,- 	
399A%%imm&9&9&@&@&C&I&IJJL  		$$\%6%6%85 % CI J @s   +E"AE(c           
         t        |       rt        |  nd| ff\  }}t        j                  |D cg c]  } |j                  di | c}|      S c c}w NT)r   r2   )r   r   rt   r   tailr  r9   r   r  groups        r"   _tail_chunkr    K    &))n3	?7YL:QLD&99?ujejj*6*?dKK?   Ac                    |j                  d      } | j                  di |j                  t        t	        |                  S Nindex_levelsr2   )r   r  	droplevelr   r   r  r9   rl   s      r"   _tail_aggregater    8    ZZ'F9>>#F#--d5=.ABBr$   c           
         t        |       rt        |  nd| ff\  }}t        j                  |D cg c]  } |j                  di | c}|      S c c}w r  )r   r   rt   r   rm  r  s        r"   _head_chunkr    r  r  c                    |j                  d      } | j                  di |j                  t        t	        |                  S r  )r   rm  r  r   r   r  s      r"   _head_aggregater    r  r$   r  )NNNFN)NNFN)F)FF)FNNFr'   )Y
__future__r   r   	itertoolsr   rj  rs  	functoolsr   numpyr   pandasrt   	dask.baser   	dask.corer   dask.dataframe._compatr   r	   r
   r   dask.dataframe.corer   dask.dataframe.methodsr   dask.dataframe.utilsr   r   r   dask.typingr   
dask.utilsr   r   r   NUMERIC_ONLY_NOT_IMPLEMENTEDr   __annotations__r#   r(   r;   rG   rJ   rP   rY   r[   rm   rq   r   r   r   r   r   r   r   r  r  r  r  r%  r:  r/  r@  rA  rB  rC  rD  rE  ro  rV  r  rK  rk  r`  r  rW  r^  r  r  r  r  r  r  r  r  r  r  r  r2   r$   r"   <module>r     s   "           4 ) 
 # . .   #' K &,$j "*6 ".> "0,: :| FJ& >B )-t 3l "& * &0% &,  j5QU0#(L( &0 .$b2t*.;SlG$T)6X.'T*60/7d"4 
"J!
:
 =A*B&/,^(L
C
L
Cr$   