
    bi9                       U d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ e
r;d dlmZmZmZmZmZ d dl Z!d dl"m#Z$ d dl%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z, d dl-m.Z. dZ/de0d<   dZ1de0d<   ed   Z2de0d<   eddddddd d!d"d#d$d%d&d'd(d)d*d+d,e2f   Z3de0d-<   	 d.Z4de0d/<   	 eZ5de0d0<   	 d d1d d2Z6d3e0d4<    ed56      d@d7       Z7 G d8 d9      Z8 G d: d;ed<d=e3f         Z9dAd>Z:dBd?Z;y)C    )annotationsN)	lru_cache)chain)methodcaller)TYPE_CHECKINGAnyClassVarLiteral)EagerGroupBy)issue_warning)!evaluate_output_names_and_aliases)make_group_by_kwargs)
zip_strict)is_pandas_like_dataframe)CallableIterableIteratorMappingSequence)DataFrameGroupBy)	TypeAliasUnpack)NarwhalsAggregationScalarKwargs)PandasLikeDataFrame)PandasLikeExprz._NativeGroupBy[tuple[str, ...], Literal[True]]r   NativeGroupByz(Callable[[pd.DataFrame], pd.Series[Any]]NativeApply)covskewInefficientNativeAggregationanyallcountidxmaxidxminmaxmeanmedianminmodenthnuniqueprodquantilesemsizestdsumvarNativeAggregationz.Callable[[Any], pd.DataFrame | pd.Series[Any]]
_NativeAggNonStrHashable)firstlast	any_valuez,Mapping[NarwhalsAggregation, Literal[0, -1]]_REMAP_ORDERED_INDEX    )maxsizec                   | dk(  rt        | d      S |r|j                  d      dk(  rt        |       S t        | fi |S )Nr-   F)dropnaddof   )r   get)namekwdss     Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/narwhals/_pandas_like/group_by.py_native_aggrG   E   sE    yD//488F#q(D!!%%%    c                      e Zd ZU dZded<   ded<   ded<   ddZddZ	 	 	 	 dd	Zdd
ZddZ	ddZ
ddZddZddZedd       ZddZy)AggExpraM  Wrapper storing the intermediate state per-`PandasLikeExpr`.

    There's a lot of edge cases to handle, so aim to evaluate as little
    as possible - and store anything that's needed twice.

    Warning:
        While a `PandasLikeExpr` can be reused - this wrapper is valid **only**
        in a single `.agg(...)` operation.
    r   exprzSequence[str]output_namesaliasesc                <    || _         d| _        d| _        d| _        y )N  )rK   rL   rM   
_leaf_name)selfrK   s     rF   __init__zAggExpr.__init__]   s    	57rH   c               ~    |j                   }|j                  }t        | j                  ||      \  | _        | _        | S )zd**Mutating operation**.

        Stores the results of `evaluate_output_names_and_aliases`.
        )	compliantexcluder   rK   rL   rM   )rR   group_bydfrV   s       rF   with_expand_nameszAggExpr.with_expand_namesc   s@    
 ""*KIIr7+
'4< rH   c               6   | j                   }| j                         r,| j                         r|j                  j	                         }n| j                         r}|j                  j	                         }|j
                  j                         }|j                  |D cg c],  }|j                  |      j                  |      j                  . c}      }n| j                         r-|j
                  }|j                  | j                        }|j                  d      x}	dk7  rd|	 d|j                   d}
t!        |
      t#        |      }|j                  }|j$                  |j&                  }}|j                         }|j                  |D cg c]w  }  |j(                  g ||fi |j	                         j+                  d      j-                  |      j(                  |fi ||   j/                  d      j1                         y c}      }n| j3                         s | j5                         s| j7                         rO | j9                         |j                  g |j$                  |         }|j;                  |j$                  d	
       nAt=        |      dk(  r|d   n
t#        |      } | j9                         |j                  |         }t?        |      rt#        | j@                        |_!        |S | j@                  d   |_"        |S c c}w c c}w )z8Evaluate the wrapped expression as a group_by operation.keepr"   z`Expr.mode(keep='z7')` is not implemented in group by context for backend z3

Hint: Use `nw.col(...).mode(keep='any')` instead.F)	ascendingrB   Tinplacer   )#rL   is_lenis_top_level_function_groupedr1   rU   __narwhals_namespace___concat_horizontalfrom_nativealiasnativeis_mode_kwargsrK   rC   _implementationNotImplementedErrorlist_keys_group_by_kwargsgroupbysort_valuesreset_indexhead
sort_indexis_lastis_firstis_any_value
native_agg	set_indexlenr   rM   columnsrD   )rR   rW   namesresultresult_singlensrD   rU   node_kwargsr[   msgcolsrf   keyskwargscolselects                    rF   _getitem_aggszAggExpr._getitem_aggso   s   
 !!;;=T779&&++-F[[]$--224M##::<B**NSTd.44T:AATF \\^ **I"**4995K#//E9'v .(889 :HH 
 *#..;D%%F#>>8+D+D&D 113B**  $	 NFNN<T<3<:6:TV [5[1 [%WT	- &,	- .1	2
 T!WZ\"	F \\^t}}$2C2C2E&T__&x'8'89R8>>9RE9R'STFX^^T:!$UqU1Xd5kF&T__&x'8'8'@AF#F+!$,,/FN  ,,q/FKU U*	s   1LA<Lc                     | j                   dk(  S )Nrx   	leaf_namerR   s    rF   r_   zAggExpr.is_len   s    ~~&&rH   c                     | j                   dk(  S )Nr:   r   r   s    rF   rs   zAggExpr.is_last       ~~''rH   c                     | j                   dk(  S )Nr9   r   r   s    rF   rt   zAggExpr.is_first   s    ~~((rH   c                     | j                   dk(  S )Nr+   r   r   s    rF   rg   zAggExpr.is_mode   r   rH   c                     | j                   dk(  S )Nr;   r   r   s    rF   ru   zAggExpr.is_any_value   s    ~~,,rH   c                t    t        t        | j                  j                  j	                                     dk(  S )NrB   )rx   rk   rK   	_metadataop_nodes_reversedr   s    rF   r`   zAggExpr.is_top_level_function   s*    4		++==?@AQFFrH   c                    | j                   x}r|S t        j                  | j                        | _         | j                   S N)rQ   PandasLikeGroupByrK   )rR   rD   s     rF   r   zAggExpr.leaf_name   s6    ??"4"K+66tyyArH   c                v   t         j                  | j                        }t        | j                  j
                  j                               }| j                  t        v rF|j                  j                  d      rd}t        |      t        dt        | j                           S t        |fi |j                  S )z@Return a partial `DataFrameGroupBy` method, missing only `self`.ignore_nullszd`Expr.any_value(ignore_nulls=True)` is not supported in a `group_by` context for pandas-like backendr,   )n)r   _remap_expr_namer   nextrK   r   r   r<   r   rC   rj   r   rG   )rR   native_name	last_noder   s       rF   rv   zAggExpr.native_agg   s    '88H,,>>@A	>>11##N36  *#..)=dnn)MNN;;)*:*:;;rH   N)rK   r   returnNone)rW   r   r   rJ   )rW   r   r   zpd.DataFrame | pd.Series[Any])r   bool)r   zNarwhalsAggregation | Any)r   r6   )__name__
__module____qualname____doc____annotations__rS   rY   r   r_   rs   rt   rg   ru   r`   propertyr   rv   rO   rH   rF   rJ   rJ   N   ss     8
6)6	&6p'()(-G  <rH   rJ   c                      e Zd ZU i ddddddddddddddddd	d
ddddddddddddddddZded<   ded<   	 ded<   	 ded<   	 ded<   	 ed'd       Z	 	 	 	 	 	 	 	 d(dZd)d Z	 	 	 	 	 	 d*d!Z		 	 	 	 d+d"Z
d,d#Zd-d$Zd.d%Zy&)/r   r3   r(   r)   r'   r*   r+   r2   r4   rx   r1   n_uniquer-   r$   r/   r#   r"   r9   r,   r:   r;   z9ClassVar[Mapping[NarwhalsAggregation, NativeAggregation]]_REMAP_AGGStuple[str, ...]_original_columnsz	list[str]rl   _output_key_nameszMapping[str, bool]rm   c                    | j                   S )z>Group keys to ignore when expanding multi-output aggregations.)_excluder   s    rF   rV   zPandasLikeGroupBy.exclude   s     }}rH   c              :   t        |j                        | _        || _        | j	                  ||      \  | _        | _        | _        g | j                  | j                  | _        | j                  j                  }t        |j                  j                        j                  | j                  j                        r|j                  d      }t!        |      | _         |j$                  | j                  j'                         fi | j"                  | _        y )NT)drop)drop_null_keys)tuplery   r   _drop_null_keys_parse_keys_compliant_framerl   r   r   rU   rf   setindexrz   intersectionrp   r   rm   rn   copyra   )rR   rX   r   r   rf   s        rF   rS   zPandasLikeGroupBy.__init__   s     "'rzz!2-DHDTDTE
Atz4+A *P4::)O8N8N)O &&v||!!"//0F0FG''T'2F 4N S'5v~~JJOO(
!%!6!6(
rH   c                t   d}g }|D ]?  }|j                  t        |      j                  |              | j                  |      r>d}A |r|r;| j                  j                         }|j                  | j                  |            }n| j                  j                         j                  t        | j                  j                        | j                        }n;| j                  j                  j                  r
t!               | j#                  |      }|j%                  d       | j'                  ||      S )NTF)ry   r]   )appendrJ   rY   
_is_simplerU   rb   rc   r   __native_namespace__	DataFramerk   ra   groupsrl   rf   emptyempty_results_error_apply_aggsrp   _select_results)rR   exprsall_aggs_are_simple	agg_exprsrK   r}   r{   s          rF   aggzPandasLikeGroupBy.agg  s   "#%	 	,DWT]<<TBC??4(&+#	,
 ^^::<..t/A/A)/LM<<>HH--.

 I  ^^""((%''%%e,F 	4(##FI66rH   c                  t        j                  d |D              } | j                  j                  |d      j                  g | j
                  | j                  t        t        | j
                  | j                                    S )zgResponsible for remapping temp column names back to original.

        See `ParseKeysGroupBy`.
        c              3  4   K   | ]  }|j                     y wr   )rM   ).0es     rF   	<genexpr>z4PandasLikeGroupBy._select_results.<locals>.<genexpr>1  s     'Ea		'Es   F)validate_column_names)
r   from_iterablerU   _with_nativesimple_selectrl   renamedictzipr   )rR   rX   r   	new_namess       rF   r   z!PandasLikeGroupBy._select_results*  sx     '''E9'EE	DNN''%'H]4 JJ4)24VDTZZ)?)?@AB	
rH   c               J    |D cg c]  }|j                  |        c}S c c}w r   )r   )rR   r   r   s      rF   r   zPandasLikeGroupBy._getitem_aggs8  s!     055!%555s    c                    t                | j                  j                  }| j                  |      }| j                  j
                  }|j                         r|j                         dk\  r
 ||d      S  ||      S )a"  Stub issue for `include_groups` [pandas-dev/pandas-stubs#1270].

        - [User guide] mentions `include_groups` 4 times without deprecation.
        - [`DataFrameGroupBy.apply`] doc says the default value of `True` is deprecated since `2.2.0`.
        - `False` is explicitly the only *non-deprecated* option, but entirely omitted since [pandas-dev/pandas-stubs#1268].

        [pandas-dev/pandas-stubs#1270]: https://github.com/pandas-dev/pandas-stubs/issues/1270
        [User guide]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html
        [`DataFrameGroupBy.apply`]: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.groupby.DataFrameGroupBy.apply.html
        [pandas-dev/pandas-stubs#1268]: https://github.com/pandas-dev/pandas-stubs/pull/1268
        )   r   F)include_groups)warn_complex_group_byrU   ri   _apply_exprs_functionra   apply	is_pandas_backend_version)rR   r   implfuncr   s        rF   r   zPandasLikeGroupBy._apply_aggs=  si     	~~--))%0##>> 5 5 76 Ae44T{rH   c                      j                   j                         j                  j                  d fd}|S )Nc                    j                   j                  |       fdD        }|rt        | ng g f\  }} ||      j                  S )Nc              3     K   | ]6  } |      D ])  }|j                   j                  d    |j                  f + 8 yw)r   N)rf   ilocrD   )r   rK   r   rU   s      rF   r   zFPandasLikeGroupBy._apply_exprs_function.<locals>.fn.<locals>.<genexpr>W  sJ       O  !!!$dii00s   <?)r   context)rU   r   r   rf   )	rX   results	out_group	out_namesrU   r   into_seriesr}   rR   s	       @rF   fnz3PandasLikeGroupBy._apply_exprs_function.<locals>.fnU  sU    33B7I!G
 <C:w#7R Iyy	2FMMMrH   )rX   pd.DataFramer   zpd.Series[Any])rU   rb   _seriesr   )rR   r   r   r   r}   s   `` @@rF   r   z'PandasLikeGroupBy._apply_exprs_functionQ  s7    ^^224jj..	N 	N 	rH   c              #  2  K   t        j                         5  t        j                  ddt               | j                  j
                  }| j                  D ](  \  }}|  ||      j                  | j                   f * 	 d d d        y # 1 sw Y   y xY ww)Nignorez#.*a length 1 tuple will be returned)messagecategory)	warningscatch_warningsfilterwarningsFutureWarningrU   r   ra   r   r   )rR   with_nativekeygroups       rF   __iter__zPandasLikeGroupBy.__iter__a  s     $$& 	W##=&
 ..55K"mm W
U<K.<<d>T>TUVVW	W 	W 	Ws   BA*B	BBBN)r   r   )rX   r   r   z(Sequence[PandasLikeExpr] | Sequence[str]r   r   r   r   )r   r   r   r   )r   zSequence[AggExpr]rX   r   r   r   )r   zIterable[AggExpr]r   z#list[pd.DataFrame | pd.Series[Any]])r   Iterable[PandasLikeExpr]r   r   )r   r   r   r   )r   z)Iterator[tuple[Any, PandasLikeDataFrame]])r   r   r   r   r   r   rV   rS   r   r   r   r   r   r   rO   rH   rF   r   r      s   NuNN 	(N 	u	N
 	uN 	N 	uN 	uN 	vN 	IN 	N 	JN 	uN 	uN 	N  	!N" 	U#NKJ & '&EO  8((K 

 7
 
 

274
.?

	
6&6	,6
( 	WrH   r   r   r   c                     d} t        |       S )zJDon't even attempt this, it's way too inconsistent across pandas versions.au  No results for group-by aggregation.

Hint: you were probably trying to apply a non-elementary aggregation with a pandas-like API.
Please rewrite your query such that group-by aggregations are elementary. For example, instead of:

    df.group_by('a').agg(nw.col('b').round(2).mean())

use:

    df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())

)
ValueError)r   s    rF   r   r   m  s    	^  c?rH   c                 $    t        dt               y )Na)  Found complex group-by expression, which can't be expressed efficiently with the pandas API. If you can, please rewrite your query such that group-by aggregations are simple (e.g. mean, std, min, max, ...). 

Please see: https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/)r   UserWarningrO   rH   rF   r   r   |  s    	W
 	rH   )rD   r5   rE   zUnpack[ScalarKwargs]r   r6   )r   r   )r   r   )<
__future__r   r   	functoolsr   	itertoolsr   operatorr   typingr   r   r	   r
   narwhals._compliantr   narwhals._exceptionsr   narwhals._expression_parsingr   narwhals._pandas_like.utilsr   narwhals._utilsr   narwhals.dependenciesr   collections.abcr   r   r   r   r   pandaspdpandas.api.typingr   _NativeGroupBytyping_extensionsr   r   narwhals._compliant.typingr   r   narwhals._pandas_like.dataframer   narwhals._pandas_like.exprr   r   r   r   r!   r5   r6   r7   r<   rG   rJ   r   r   r   rO   rH   rF   <module>r     sM   "    ! 8 8 , . J < & :OOD3LC9OM9OCY C*1-*@ i @&			
	
	
	
			 '"  9 , hH
I H @  	  6 F B  2& &< <DZW&(8:KKLZWzrH   