
    bik                       d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ  ede      Z ed      Z eej@                        Z!e!jD                  dk\  Z#d Z$dddZ%d dZ&d!d"dZ' G d deeef         Z( G d d      Z) ejT                  e)      d        Z+d Z,d Z-d Z.d Z/d Z0y)#    )annotationsN)OrderedDictUserDict)Hashable)AnyLiteralTypeVarcast)Version)configis_dask_collection)is_string_dtype)is_dataframe_likeis_series_like)_tokenize_deterministicnormalize_tokenget_default_shuffle_methodK)boundV   c                $   t        |t              r)t        j                  j                  j                  |      }t        |t        j                        }|r?t        |t        j                  j                  t        j                  j                  f      sy| j                  ret        j                  t        t        | j                              | j                        }|j                  ||      j                   }t#        |      S | j                  S )a  Maybe calculate new divisions by periods of size freq

    Used to shift the divisions for the `shift` method. If freq isn't a fixed
    size (not anchored or relative), then the divisions are shifted
    appropriately.

    Returning None, indicates divisions ought to be cleared.

    Parameters
    ----------
    df : dd.DataFrame, dd.Series, or dd.Index
    periods : int
        The number of periods to shift.
    freq : DateOffset, timedelta, or time rule string
        The frequency to shift by.
    N)index)freq)
isinstancestrpdtseriesfrequencies	to_offset
DateOffsetoffsetsTickDayknown_divisionsSeriesrangelen	divisionsshiftr   tuple)dfperiodsr   	is_offsetdivsr*   s         Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/_util.py_calc_maybe_new_divisionsr2      s    " $zz%%//54/I$"**.. AB 	yys2<<01FJJwTJ288	Y<<    c                b    | dvrt        d|        ddd}|rd|d <   |j                  | |       S )N)r      r   columnsNzNo axis named r   r5   )r   r6   )
ValueErrorget)axisnone_is_zeronumeric_axiss      r1   _validate_axisr<   <   sG    33>$011>?A4NLTD$''r3   c                    | t        | t              r	 | S t        | t              rt        |       } | S t        | d      r| j	                         } | S | g} | S )Ndtype)r   listr,   hasattrtolist)columns    r1   _convert_to_listrC   G   sb    ~FD1 M 
FE	"f
 M	 
	! M Mr3   c           
         |xs g }t        t        | j                        D cg c]0  \  }}|t        | j                        k\  s| j                  |   |vr|2 c}} S c c}}w N)r   	enumerateoperandsr)   _parameters)exprignoreiops       r1   _tokenize_partialrM   S   si     \rF" #4==1

2C(())T-=-=a-@-N 

 

s   5A
c                  <     e Zd ZdZd fdZd fdZd fdZ xZS )LRUzILimited size mapping, evicting the least recently looked-up key when fullc                N    t         |           t               | _        || _        y rE   )super__init__r   datamaxsize)selfrT   	__class__s     r1   rR   zLRU.__init__c   s    M	r3   c                x    t         |   |      }t        t        | j                        j                  |       |S rE   )rQ   __getitem__r
   r   rS   move_to_endrU   keyvaluerV   s      r1   rX   zLRU.__getitem__h   s0    #C([$))$005r3   c                    t        |       | j                  k\  r*t        t        | j                        j                  d       t        |   ||       y )NF)last)r)   rT   r
   r   rS   popitemrQ   __setitem__rZ   s      r1   r`   zLRU.__setitem__m   s>    t9$dii(00e0<C'r3   )rT   floatreturnNone)r[   r   rb   r   )r[   r   r\   r   rb   rc   )__name__
__module____qualname____doc__rR   rX   r`   __classcell__)rV   s   @r1   rO   rO   `   s    S

( (r3   rO   c                  V    e Zd ZdZd Zej                  d        Zd Zd	dZ	d Z
d
dZy)_BackendDatazHelper class to wrap backend data

    The primary purpose of this class is to provide
    caching outside the ``FromPandas`` class.
    c                2    || _         t        d      | _        y )N
   )_datarO   _division_info)rU   rS   s     r1   rR   z_BackendData.__init__z   s    
!"gr3   c                2    ddl m}  || j                        S )Nr   )r   )dask.tokenizer   rm   )rU   r   s     r1   _tokenz_BackendData._token~   s    9&tzz22r3   c                ,    t        | j                        S rE   )r)   rm   rU   s    r1   __len__z_BackendData.__len__   s    4::r3   c                z    	 t         j                  | |      S # t        $ r t        | j                  |      cY S w xY wrE   )object__getattribute__AttributeErrorgetattrrm   )rU   r[   s     r1   __getattr__z_BackendData.__getattr__   s:    	,**455 	,4::s++	,s    ::c                2    t        |       | j                  ffS rE   )typerm   rs   s    r1   
__reduce__z_BackendData.__reduce__   s    DzDJJ=((r3   Nc                T     t        |       | j                  j                               S rE   )r|   rm   copy)rU   memodicts     r1   __deepcopy__z_BackendData.__deepcopy__   s    tDz$**//+,,r3   )r[   r   rb   r   rE   )rd   re   rf   rg   rR   	functoolscached_propertyrq   rt   rz   r}   r    r3   r1   rj   rj   s   s:    & 3 3
,)-r3   rj   c                    | j                   S rE   )rq   )rS   s    r1   normalize_data_wrapperr      s    ;;r3   c                b    ddl m} d }| D cg c]  } ||      r	 ||d      n| } }| S c c}w )Nr   )from_pandasc                N    t        |        xr t        |       xs t        |       S rE   )r   r   r   )xs    r1   _pd_series_or_dataframez3_maybe_from_pandas.<locals>._pd_series_or_dataframe   s&    %a((XnQ.?.WCTUVCWXr3   r5   )dask.dataframe.dask_exprr   )dfsr   r   r-   s       r1   _maybe_from_pandasr      sA    4Y PS
S!8!<;r1"D
SC
SJ Ts   ,c                .    | | S t               } | dk(  ry| S )Ndisktasksr   )shuffles    r1   _get_shuffle_preferring_orderr      s'     )*G&Nr3   c                    | j                   dk(  rHt        | d      r;| j                  t        k(  rt	        d|z        t        |       rt	        d|z        yyy)zv
    Utility function to raise an error if an object column does not support
    a certain operation like `mean`.
    r5   r>   z%`%s` not supported with object seriesz%`%s` not supported with string seriesN)ndimr@   r>   rv   r7   r   )r   funcnames     r1   _raise_if_object_seriesr      sZ    
 	vv{wq'*77fDxOPPQDxOPP   +{r3   c                    	 ddl m}  ||       S # t        $ r+ ddl m}m}m}  ||       xr  ||        xr	  ||        cY S w xY w)Nr   )is_any_real_numeric_dtype)is_bool_dtypeis_complex_dtypeis_numeric_dtype)pandas.api.typesr   ImportErrorr   r   r   )arr_or_dtyper   r   r   r   s        r1   _is_any_real_numeric_dtyper      s\    
>(66 	
 	WV \* 0$\220!,//	
	
s    1AAc                6    | xs t        j                  dd       S )Nzdataframe.shuffle.method)r   r8   )shuffle_methods    r1   get_specified_shuffler      s    IVZZ(BDIIr3   )r   T)r:   boolrb   zNone | Literal[0, 1])rb   list | NonerE   )rJ   r   rb   r   )1
__future__r   r   collectionsr   r   collections.abcr   typingr   r   r	   r
   pandasr   packaging.versionr   daskr   r   dask.dataframe._compatr   dask.dataframe.corer   r   rp   r   r   
dask.utilsr   r   r   __version__PANDAS_VERSIONmajorPANDAS_GE_300r2   r<   rC   rM   rO   rj   registerr   r   r   r   r   r   r   r3   r1   <module>r      s    "  - $ . .  % + 2 A B 1Cx CL($$)!H(	
((1a4. (&- -D ,' (		Q
"Jr3   