
    bi}             	         d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d d	lmZ d d
lm Z m!Z!m"Z" d dlm#Z$ d dlm%Z% d dl&m'Z' d dlm(Z) d dl*m+Z+ d dl,m-Z. d dl/Z0d dl1m2c m3Z3 d dl0m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z;m<Z< d dl,m=Z= d dl>m?Z?m@Z@mAZA d dlBmCZC d dlDmEZEmFZFmGZG d dlHmIZI d dlJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZS d dlTmUZV d dl/mWZW d dlXmYZYmZZZm[Z[ d dl\m]Z] d dl^m_Z_ d dl`maZambZb d d lcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZo d d!lpmqZqmrZr d d"lsmtZt d d#lumvZv d d$lwmxZxmyZymzZzm{Z{m|Z|m}Z}m~Z~mZmZmZmZmZmZmZmZmZ d d%lmZmZ d d&lmZmZmZmZ d d'lmZ d d(lmZmZmZmZmZmZmZmZmZ d d)lmZmZ d d*lmZmZmZmZ d d+lmZ d d,lmZmZmZmZm#Z#mZmZmZ d d-lmZmZ d d.lmZ d d/lmZmZmZmZmZmZmZmZmZmZmZmZ d d0lmZ dd1d2Zdhd3Zd4 Zdhd5Zd6a G d7 d8e?      Zd9 Zd:D ]  Z eee ej                  eeɬ;             ! d<D ]  Z eee ej                  eeɬ;             !  G d= d>eǫ      Z G d? d@eǫ      ZdAD ]D  Z eeeΫ      rJ eΫ        eee eee̫              eeeΫ      rJ eΫ        eee eeeͫ             F  G dB dCeͫ      Z G dD dEeǫ      ZdidFZdjdGZdkdHZdI Z eWj                  dJ      dKddej                  fdL       ZdldmdMZ eWj                  dJ      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dndO       Z	 	 	 	 	 dodPZ	 	 	 	 	 dpdQZ eej                        	 	 	 	 	 	 	 	 	 	 	 dqdR       Z eej                        	 	 	 	 	 	 	 	 	 	 	 	 drdS       ZdeodddNdTdUZdsdVZdtdWZ eedXgY      dudZ       Z eej                        dhd[       Z eej                        dvd\       Zd] Zeeod6d6dNdNddd^d_       Zeeod6d6dNdNd`da       Zdb Zeodd6dcddZde Z	 ds	 	 	 	 	 dwdfZ ej                  eǫ      dg        Zy)x    )annotationsN)CallableHashableIterableMapping)wraps)IntegralNumber)AnyClassVarLiteral)stringify_path)parse)CategoricalDtype)is_bool_dtypeis_datetime64_any_dtypeis_numeric_dtype)	is_scalar)is_timedelta64_dtype)is_extension_array_dtype)fsfirst)computeget_annotationsnew_collection)OptimizerStage)DictTaskRef)Array)DaskMethodsMixinis_dask_collectionnamed_schedulers)flatten)PANDAS_GE_210PANDAS_GE_220PANDAS_VERSION)CachedAccessor)	_concat_convert_to_numeric_repr_data_series_sqrt_and_convert_to_timedeltacheck_divisionshas_parallel_typeis_dataframe_likeis_series_likemeta_warning_exprdataframe_creation_dispatch)CategoricalAccessor
CategorizeGetCategories)Concat)DatetimeAccessor)DescribeNonNumericDescribeNumeric)BFillDiffEvalFFillFillnaCheckQueryShift
ToDatetime	ToNumericToTimedelta_DelayedExpr
no_default)JoinRecursiveMerge)SeriesQuantile)RepartitionQuantiles)CorrCovCustomReductionDropDuplicates
IndexCountIsMonotonicDecreasingIsMonotonicIncreasingLenMemoryUsageFrameMemoryUsageIndexMomentNLargest	NSmallest
PivotTableUniqueValueCounts)RepartitionRepartitionFreq)RearrangeByColumnSetIndexSetIndexBlockwise
SortValues)StringAccessor)	PANDAS_GE_300_BackendData_convert_to_list_get_shuffle_preferring_order_is_any_real_numeric_dtype_maybe_from_pandas_raise_if_object_series_validate_axisget_specified_shuffle)FromPandasDivisionsFromScalars)get_parallel_typeis_categorical_dtype	make_metameta_nonempty)warn_dtype_mismatch)AttributeNotImplementedErrorhas_known_categoriesindex_summaryinsert_meta_param_descriptionr   meta_frame_constructormeta_series_constructorpyarrow_strings_enabled)Delayeddelayed)_tokenize_deterministic)IndexCallableMderived_fromget_default_shuffle_methodget_meta_libraryis_arraylike	key_splitmaybe_pluralizememory_repr	put_linesrandom_state_datatypename)get_templatewrap_apic           	         | J  | |D cg c]   }t        |t              r|j                  n|" c}i |}t        |t        j                        rt	        |      S |S c c}w N)
isinstance	FrameBaseexprExprr   )r   argskwargsargresults        _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/_collection.py_wrap_expr_apir      sf    EI	Jcji0#((c
9	J
F &$))$f%%M 
Ks   %Ac                   |J t        |t              r|j                  }nrt        |t        j                        rXt        || j                  | j                        }| j                  dk(  r't        | j                        r|| j                  d      }t        |t        j                        st        |      st        |      r| j                  |      j                  }t        |t        j                        s%t         t        | j                  |      |            S t        j                   | j                  |      r%t         t        | j                  |      |            S t        t        j"                  | ||            S )Nindexcolumns   r   )r   r   r   dar!   from_dask_arrayr   r   ndimlenr   r0   r1   _create_alignable_framer   getattrare_co_alignedOpAlignPartitions)selfotherops      r   _wrap_expr_opr      s   >>%#

	E288	$TZZN99>c$,,/$,,q/*E udii(e$% ,,U388eTYY'4gdii4U;<<			TYY	.4gdii4U;<<d44T5"EFF    c                r     |t         k(  rd fd	}n|t        k(  rd fd	}nt        d|       |_        |S )zz
    Add method operators to Series or DataFrame like DataFrame.add.
    _wrap_expr_method_operator("add", DataFrame)
    c           
        |t        d      t        |      }t        |      st        |      r |dv rt	        |      s| j                  |      }|dv r!t        |t              rd d}t        |      | }t        |t              rNt        j                  | j                  |j                        s$t        t        j                  |||||            S t        t        j                  |||||            S )Nlevel must be None)r   r   )r   r   z
Unable to z dd.Series with axis=1r   framer   axislevel
fill_value)nameleftrightr   r   r   )NotImplementedErrorrl   r0   r1   r#   r   r   Series
ValueErrorr   r   r   r   MethodOperatorAlignMethodOperator)r   r   r   r   r   msgr   r   s          r   methodz*_wrap_expr_method_operator.<locals>.method   s    )*>??!$'D "%(!%(L((/44U;~%eV,&tf,BCC$S/)E%+D4G4G		5::5 &,,##!##-	 	 "##)	 	r   c           
        |t        d      t        |      }t        |      rt        |      s| j	                  |      }| }t        |t              rNt        j                  | j                  |j                        s$t        t        j                  |||||            S t        t        j                  |||||            S )Nr   r   )r   r   r   r   r   r   )r   rl   r1   r#   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   s         r   r   z*_wrap_expr_method_operator.<locals>.method   s     )*>??!$'De$-?-F44U;E%+D4G4G		5::5 &,,##!##-	 	 "##)	 	r   z)Cannot create method operator for class_=)r   NNNNr   )	DataFramer   r   __name__)r   class_r   s   `  r   _wrap_expr_method_operatorr      sD    
 *	X 
6	!	H "$Nvi"PQQFOMr   c                R    |J t         t        | j                  |                   S r   )r   r   r   )r   r   s     r   _wrap_unary_expr_opr     s(    >>0'$))R0233r   Tc            	          e Zd ZdZ e ej                  ded               Z ed       Zd Z	e
dd       Ze
d        Zej                  d        Ze
d	        Ze
d
        Ze
d        Ze
d        Ze
d        Zej*                  d        Zd Zd Ze
d        Zd Zd Zd Zd Zd ZddZddZdddZ dddZ!d Z"e
d        Z#d Z$d  Z%d! Z&d" Z'ddd#Z(d$ Z)ed%        Z*d& Z+d' Z,dd fd)Z-e
d*        Z.e
d+        Z/e/j*                  d,        Z/ddd-Z0ddd/Z1ddd0Z2ddd1Z3 e4e5jl                        d2        Z7d3 Z8e
d4        Z9d5 Z:e;d(ddd(d(f	 	 	 	 	 	 	 	 	 	 	 dd6Z< e4e5jl                        dd7       Z=d8 Z> e?d9:      e;ddd(d(ddd;d<       Z@ e?d9:      e;ddd(d(d=d>       ZA	 	 	 	 	 d	 	 	 	 	 	 	 dd?ZB	 d	 	 	 dd@ZCe
dA        ZDdB ZEdC ZFdD ZG e4e5jl                        	 	 	 	 	 ddF       ZHdG ZI e4e5jl                        	 	 	 	 	 ddH       ZJeJZK e4e5jl                        	 	 	 	 	 ddI       ZL e4e5jl                        	 	 	 	 	 ddJ       ZMdK ZN e4e5jl                        	 	 	 	 ddL       ZO e4e5jl                        	 	 	 	 	 ddM       ZPePZQ e4e5jl                        	 ddN       ZRdO ZS	 ddPZT	 	 	 	 	 ddQZU e4e5jl                        	 ddR       ZV e4e5jl                        ddS       ZW e4e5jl                        ddT       ZX e4e5jl                        ddU       ZY e4e5jl                        ddV       ZZ e4e5jl                        ddW       Z[ e4e5jl                        ddX       Z\ e4e5jl                        ddY       Z] e4e5jl                        dZ        Z^ e4e5jl                        d[        Z_ e4e5jl                        d\        Z` e4e5jl                        dd]       Za e4e5jl                        d^        Zbdd_Zc e4e5jl                        dd`       Zd e4e5jl                        eej                  fda       Zg e4e5jl                        eej                  fdb       Zh e4e5jl                        de;d(fdc       Zi e4e5jl                        ddd       Zj e4e5jl                        dde       Zk e4e5jl                        ddf       Zl e4e5jl                        ddg       Zm e4e5jl                        ddh       Zn e4e5jl                        e;e;e;dEfdi       ZoddjZp e4e5jl                        ddk       ZqddlZr e4e5jl                        ddm       Zs e4e5jl                        ddn       Zt e4e5jl                        ddo       Zu e4e5jl                        ddp       Zvdde;dddddfdqZwdddrZxe
ds        Zy e4e5jl                        dt        Zz e4e5jl                        du        Z{dddvZ|e}d.dwdddxdy       Z~dz Z	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 dd{Zd| Zd} Zdd~Zd ZddZddZddZdddZ e4e5j                        e;fd       Z e4e5jl                        d        ZddZd Ze
d        Z xZS )r   z&Base class for Expr-backed Collectionsthreadssyncc                    | S r    )dskkeysr   s      r   <lambda>zFrameBase.<lambda>0  s     r   c                l    t         r't               x}rda t        j                  d| d       || _        y )NFzDask annotations zA detected. Annotations will be ignored when using query-planning.)_WARN_ANNOTATIONSr   warningswarnr4   )r   r   annots      r   __init__zFrameBase.__init__2  s:    ?+<"<%"< %MM#E7*kl 
r   c                    | j                   S r   r3   r   s    r   r   zFrameBase.expr;  s    zzr   c                .    | j                   j                  S r   )r   _metar   s    r   r   zFrameBase._meta?  s    yyr   c                ,    t        | j                        S r   )rs   r   r   s    r   _meta_nonemptyzFrameBase._meta_nonemptyC  s    TZZ((r   c                .    | j                   j                  S )a  
        Tuple of ``npartitions + 1`` values, in ascending order, marking the
        lower/upper bounds of each partition's index. Divisions allow Dask
        to know which partition will contain a given value, significantly
        speeding up operations like `loc`, `merge`, and `groupby` by not
        having to search the full dataset.

        Example: for ``divisions = (0, 10, 50, 100)``, there are three partitions,
        where the index in each partition contains values [0, 10), [10, 50),
        and [50, 100], respectively. Dask therefore knows ``df.loc[45]``
        will be in the second partition.

        When every item in ``divisions`` is ``None``, the divisions are unknown.
        Most operations can still be performed, but some will be much slower,
        and a few may fail.

        It is not supported to set ``divisions`` directly. Instead, use ``set_index``,
        which sorts and splits the data as needed.
        See https://docs.dask.org/en/latest/dataframe-design.html#partitions.
        )r   	divisionsr   s    r   r   zFrameBase.divisionsG  s    , yy"""r   c                .    | j                   j                  S )zReturn number of partitions)r   npartitionsr   s    r   r   zFrameBase.npartitions_  s     yy$$$r   c                B    | j                   j                  j                  S )zReturn data types)r   r   dtypesr   s    r   r   zFrameBase.dtypesd  s     yy%%%r   c                @    t        | j                  j                        S )zSize of the Series or DataFrame as a Delayed object.

        Examples
        --------
        >>> series.size  # doctest: +SKIP
        <dask_expr.expr.Scalar: expr=df.size(), dtype=int64>
        )r   r   sizer   s    r   r   zFrameBase.sizei  s     diinn--r   c                .    | j                   j                  S r   )r   r   r   s    r   r   zFrameBase.columnst  s    zz!!!r   c                    t        |      t        | j                        k7  r|| j                  j                  _        t	        j
                  | |      | _        y r   )r   r   r4   r   r   ColumnsSetter)r   r   s     r   r   zFrameBase.columnsx  s=    w<3t||,,'.DJJ$''g6
r   c                >    t        t        j                  |             S )zhForget division information.

        This is useful if the divisions are no longer meaningful.
        )r   r   ClearDivisionsr   s    r   clear_divisionszFrameBase.clear_divisions  s    
 d11$788r   c                F    t        t        |             j                         S r   )r   rU   r   r   s    r   __len__zFrameBase.__len__  s    c$i(0022r   c                    t        d      )Nz&nbytes is not implemented on DataFramer   r   s    r   nbyteszFrameBase.nbytes  s    !"JKKr   c                (    t         | j                  ffS r   )r   r4   r   s    r   
__reduce__zFrameBase.__reduce__  s    

},,r   c                   t        |t              rwt        j                  | j                  |j                        st	        t        j
                  | |            S t	        | j                  j                  |j                              S t        |t              rddlm	} t        d |j                  |j                  |j                  fD              }| j                  dk(  r3|r1 || j                  j                         rt"        r| j$                  |   S | j&                  |   S t        |t(        j*                        st-        |      rt/        |      }nKt        |t.              r|j1                         }n*t        |t(        j2                        r|j5                         }t	        | j                  j                  |            S )Nr   )is_float_dtypec              3  <   K   | ]  }t        |t                y wr   )r   r	   ).0is     r   	<genexpr>z(FrameBase.__getitem__.<locals>.<genexpr>  s      #,-
1h'#      )r   r   r   r   r   FilterAlign__getitem__slicepandas.api.typesr   anystartstepstopr   r   dtypere   iloclocnpndarrayr1   listcopygenericitem)r   r   r   is_integer_slices       r   r   zFrameBase.__getitem__  s9   eY'&&tyy%**=%d&6&6tU&CDD!$))"7"7

"CDDu%7" #27++uzz5::1V#   		Q$'

(8(89]yy''xx&eRZZ(N5,AKEt$JJLErzz*JJLEdii33E:;;r   c                X    t        |       j                  | j                  j                  fS r   )typer   r4   _namer   s    r   __dask_tokenize__zFrameBase.__dask_tokenize__  s!    Dz""DJJ$4$444r   c           	        | j                         j                  d      }d}t        | t              s>t	        | j
                        s)|j                  d      d   j                  dd      }d| }t	        | j                  j                         D ch c]  }|j                   c}      }|j                  | j                  j                  |t        | j                        t        |d	      | j                  
      S c c}w )N   max_rowszFDask {klass} Structure:
{data}
Dask Name: {name}, {n_expr}
Expr={expr}
Index	DivisionsEmpty 
expression)klassdatar   n_exprr   )
_repr_data	to_stringr   r   r   r   	partitionreplacer   walkr  format	__class__r   r   r   )r   r  _str_fmter  s        r   __repr__zFrameBase.__repr__  s     **A*6 $'DLL0A>>$'+33G[IDz*Htyy~~'78!agg89..))4::&"6<8  
 	
 9s   C=c                H    t        d| j                  j                   d      )NzThe truth value of a z& is ambiguous. Use a.any() or a.all().)r   r   r   r   s    r   __bool__zFrameBase.__bool__  s+    #DNN$;$;#< =& &
 	
r   Nc                H    t        j                  | j                               S r   )r  arrayr   )r   r   r   s      r   	__array__zFrameBase.__array__  s    xx''r   Tc                R    | j                  |      }t        j                  |fi |S Nfuse)optimizer"   persist)r   r,  r   outs       r   r.  zFrameBase.persist  s(    mmm&''6v66r   c                    | }t        |t              s|j                  d      }|j                  j	                  ||      S )a  Outputs statistics about every node in the expression.

        analyze optimizes the expression and triggers a computation. It records statistics
        like memory usage per partition to analyze how data flow through the graph.

        .. warning::
            analyze adds plugins to the scheduler and the workers that have a non-trivial
            cost. This method should not be used in production workflows.

        Parameters
        ----------
        filename: str, None
            File to store the graph representation.
        format: str, default is png
            File format for the graph representation.

        Returns
        -------
            None, but writes a graph representation of the expression enriched with
            statistics to disk.
        r   r   )filenamer  )r   Scalarrepartitionr   analyze)r   r2  r  r/  s       r   r5  zFrameBase.analyze  s=    , #v&//a/0Cxx&AAr   c                    | }t        |t              s|j                  d      }|j                  j	                  ||      S )a7  Create a graph representation of the Expression.

        explain runs the optimizer and creates a graph of the optimized expression
        with graphviz. No computation is triggered.

        Parameters
        ----------
        stage: {"logical", "simplified-logical", "tuned-logical", "physical", "simplified-physical", "fused"}
            The optimizer stage that is returned. Default is "fused".

            - logical: outputs the expression as is
            - simplified-logical: simplifies the expression which includes predicate
              pushdown and column projection.
            - tuned-logical: applies additional optimizations like partition squashing
            - physical: outputs the physical expression; this expression can actually
              be computed
            - simplified-physical: runs another simplification after the physical
              plan is generated
            - fused: fuses the physical expression to reduce the nodes in thr graph.

            .. warning::
                The optimizer stages are subject to change.
        format: str, default None
            The format of the output. Default is "png".

        Returns
        -------
            None, but opens a new window with the graph visualization and outputs
            a file with the graph representation.
        r   r1  )r   r3  r4  r   explain)r   stager  r/  s       r   r7  zFrameBase.explain  s:    > #v&//a/0Cxxv..r   c                6    | j                   j                         S )zOutputs a string representation of the DataFrame.

        The expression is returned as is. Please run optimize manually if necessary.

        Returns
        -------
            None, the representation is put into stdout.
        )r   pprintr   s    r   r:  zFrameBase.pprint  s     yy!!r   c                "    | j                         S r   )__dask_graph__r   s    r   daskzFrameBase.dask  s    ""$$r   c                Z    | j                   }|j                         }|j                         S r   )r   lower_completelyr<  r   r/  s     r   r<  zFrameBase.__dask_graph__  s(    ii""$!!##r   c                Z    | j                   }|j                         }|j                         S r   )r   r?  __dask_keys__r@  s     r   rB  zFrameBase.__dask_keys__!  s(    ii""$  ""r   c                H    t        | j                  j                               S r   )r   r   simplifyr   s    r   rD  zFrameBase.simplify&  s    dii00233r   c                J    t        | j                  j                  i             S r   )r   r   
lower_oncer   s    r   rF  zFrameBase.lower_once)  s    dii222677r   c                L    t        | j                  j                  |            S )a2  Optimizes the DataFrame.

        Runs the optimizer with all steps over the DataFrame and wraps the result in a
        new DataFrame collection. Only use this method if you want to analyze the
        optimized expression.

        Parameters
        ----------
        fuse: bool, default True
            Whether to fuse the expression tree after running the optimizer.
            It is often easier to look at the non-fused expression when analyzing
            the result.

        Returns
        -------
            The optimized Dask Dataframe
        r+  r   r   r-  )r   r,  s     r   r-  zFrameBase.optimize,  s!    $ dii00d0;<<r   c                    t        | j                  j                               }t        |       t        |      k7  r|j	                         S t
        dfS Nr   )r   r   r?  r
  __dask_postcompute__r*   )r   states     r   rK  zFrameBase.__dask_postcompute__@  sB    tyy99;<:e$--//{r   c                2    t        | ||t        |       |      S r   )
from_graphsorted)futuresmetar   r   s       r   _postpersistzFrameBase._postpersistF  s"    7O
 	
r   c                z    t         j                  | j                  | j                  t	        | j
                        ffS r   )r   rR  r   r   r   r  r   s    r   __dask_postpersist__zFrameBase.__dask_postpersist__P  s6    %%JJNN djj!(
 
 	
r   c                   	 t         j                  | |      S # t        $ r]}	 t        | j                  |      }t        |      r t        j                  t        |      cY d }~S |cY d }~S # t        $ r |w xY wd }~ww xY w)Nr   )	object__getattribute__AttributeErrorr   r   callable	functoolspartialr   )r   keyerrvals       r   __getattr__zFrameBase.__getattr__Y  ss    	**455 
		 dii-C=$,,^cJJ
! 	
	s2    	A>;A*A>#A*$A>*A66A99A>Fc                ^    |rt        |   di |S  | j                  j                  di |S )zVisualize the expression or task graph

        Parameters
        ----------
        tasks:
            Whether to visualize the task graph. By default
            the expression graph will be visualized instead.
        r   )super	visualizer   )r   tasksr   r   s      r   rb  zFrameBase.visualizei  s5     7$.v.."tyy"",V,,r   c                .    | j                   j                  S )zWhether the divisions are known.

        This check can be expensive if the division calculation is expensive.
        DataFrame.set_index is a good example where the calculation needs an
        inspection of the data.
        )r   known_divisionsr   s    r   re  zFrameBase.known_divisionsv  s     yy(((r   c                @    t        | j                  j                        S )zReturn dask Index instance)r   r   r   r   s    r   r   zFrameBase.index  s     diioo..r   c                    t        j                  | j                   |j                         sJ d       t        j                  | |      }|| _        y )Nz(value needs to be aligned with the index)r   r   AssignIndexr4   )r   valuer4   s      r   r   zFrameBase.index  sJ    ""IIuzz
 	65	6 
   u-
r   c                @    t        t        j                  | |            S )a{  Reset the index to the default index.

        Note that unlike in ``pandas``, the reset index for a Dask DataFrame will
        not be monotonically increasing from 0. Instead, it will restart at 0
        for each partition (e.g. ``index1 = [0, ..., 10], index2 = [0, ...]``).
        This is due to the inability to statically know the full length of the
        index.

        For DataFrame with multi-level index, returns a new DataFrame with
        labeling information in the columns under the index names, defaulting
        to 'level_0', 'level_1', etc. if any are None. For a standard index,
        the index name will be used (if set), otherwise a default 'index' or
        'level_0' (if 'index' is already taken) will be used.

        Parameters
        ----------
        drop : boolean, default False
            Do not try to insert index into dataframe columns.
        )r   r   
ResetIndex)r   drops     r   reset_indexzFrameBase.reset_index  s    ( doodD9::r   r   c                l    t        t        j                  | ||            }|r|j                         }|S )a0  First n rows of the dataset

        Parameters
        ----------
        n : int, optional
            The number of rows to return. Default is 5.
        npartitions : int, optional
            Elements are only taken from the first ``npartitions``, with a
            default of 1. If there are fewer than ``n`` rows in the first
            ``npartitions`` a warning will be raised and any found rows
            returned. Pass -1 to use all partitions.
        compute : bool, optional
            Whether to compute the result, default is True.
        )nr   )r   r   Headr   )r   ro  r   r   r/  s        r   headzFrameBase.head  s.     TYYtqkJK++-C
r   c                j    t        t        j                  | |            }|r|j                         }|S )zkLast n rows of the dataset

        Caveat, the only checks the last n rows of the last partition.
        )ro  )r   r   Tailr   )r   ro  r   r/  s       r   tailzFrameBase.tail  s,    
 TYYtq12++-C
r   c                J    |durt        d      t        | j                        S )a  Make a copy of the dataframe

        This is strictly a shallow copy of the underlying computational graph.
        It does not affect the underlying data

        Parameters
        ----------
        deep : boolean, default False
            The deep value must be `False` and it is declared as a parameter just for
            compatibility with third-party libraries like cuDF and pandas
        FzfThe `deep` value must be False. This is strictly a shallow copy of the underlying computational graph.)r   r   r   r   deeps     r   r  zFrameBase.copy  s/     u9  dii((r   c                l   t        | t              r&t        t        j                  t        j                  f}nt        f}t        ||      rnt        |t              r>|j
                  dk(  r/|j                  dk(  r t        t        j                  | |            S t        dt        t        |            z        t        |t              r	 t        t        |            }t!        d |D              snt        j"                  j$                  j'                  |d      }h d}||v r&t)        j*                  |t,        t/        |            }nt)        j0                  |      }t        t        j                  | t        j2                  t5        |d	t7        |      z   
                        S # t        $ r Y w xY w)Nr   )valueszPassing a %r to `isin`c              3  2   K   | ]  }t        |        y wr   )r#   )r   vs     r   r   z!FrameBase.isin.<locals>.<genexpr>  s     =)!,=s   Fskipna>   mixed-integerunknown-arraytimemixedperioddecimalcategorical)r   countdelayed-r   )r   r   r   pdr   r   r   r   r   Isinr   r   r
  r  set	TypeErrorr   apitypesinfer_dtyper  fromiterrV  r   asarrayrH   r}   r~   )r   ry  	bad_typesinferred_typeobject_likes        r   isinzFrameBase.isin  s[   dI&"BIIr||<I"Ifi(69-KK1$&&!+ &diiV&DEE%&>$v,AW&WXX
 fd#
c&k* =f== " 8 8 8 N !K/[[vS[QFZZ/FII((F6Mf6U)UV
 	
'  s   	F' '	F32F3c                0   t        |t              r|g}t        j                  | j                  t
              |   j                         }t        |      j                  t        | j                              sJ t        t        j                  | |            S )N)r   )r   intr  aranger   rV  tolistr  issubsetranger   r   
Partitionsr   r   s     r   _partitionszFrameBase._partitions  sv     eS!GE		$**&9%@GGI 5z""5)9)9#:;;; doodE:;;r   c                ,    t        | j                        S )a  Slice dataframe by partitions

        This allows partitionwise slicing of a Dask Dataframe.  You can perform normal
        Numpy-style slicing, but now rather than slice elements of the array you
        slice along partitions so, for example, ``df.partitions[:5]`` produces a new
        Dask Dataframe of the first five partitions. Valid indexers are integers, sequences
        of integers, slices, or boolean masks.

        Examples
        --------
        >>> df.partitions[0]  # doctest: +SKIP
        >>> df.partitions[:3]  # doctest: +SKIP
        >>> df.partitions[::10]  # doctest: +SKIP

        Returns
        -------
        A Dask DataFrame
        )r   r  r   s    r   
partitionszFrameBase.partitions  s    ( T--..r   c                    d|cxk  r| j                   k  sn d| j                    }t        |      | j                  |   S )ap  
        Get a dask DataFrame/Series representing the `nth` partition.

        Parameters
        ----------
        n : int
            The 0-indexed partition number to select.

        Returns
        -------
        Dask DataFrame or Series
            The same type as the original object.

        See Also
        --------
        DataFrame.partitions
        r   zn must be 0 <= n < )r   r   r  )r   ro  r   s      r   get_partitionzFrameBase.get_partition/  sC    $ A(((('(8(8'9:CS/!q!!r   c                    |t         u r|st        d      |t         ur|rt        d      |xs | j                  }t        |t              r5t        j                  | j
                  |j
                        st        d      t        j                  j                  j                  |      rt        |      st        |      }nt        |t        t        f      r|g}n|rg }|D cg c]+  }|| j                  vr|| j                   j"                  k7  r|- }	}|	rt%        d|	 d      |xs
 t'               dk(  rddlm}
  |
| j,                         t/        d	 | j,                  j                  D              rL| j,                  j                  D ci c]  }t        |t              r|t1        |      ! }}t        d
|       t3        t5        | |||t7        |      ||            }|r|j9                  d |j,                        S |S c c}w c c}w )aC  Rearrange DataFrame into new partitions

        Uses hashing of `on` to map rows to output partitions. After this
        operation, rows with the same value of `on` will be in the same
        partition.

        Parameters
        ----------
        on : str, list of str, or Series, Index, or DataFrame
            Column names to shuffle by.
        ignore_index : optional
            Whether to ignore the index. Default is ``False``.
        npartitions : optional
            Number of output partitions. The partition count will
            be preserved by default.
        shuffle_method : optional
            Desired shuffle method. Default chosen at optimization time.
        on_index : bool, default False
            Whether to shuffle on the index. Mutually exclusive with 'on'.
            Set this to ``True`` if 'on' is not provided.
        force : bool, default False
            This forces the optimizer to keep the shuffle even if the final
            expression could be further simplified.
        **options : optional
            Algorithm-specific options.

        Notes
        -----
        This does not preserve a meaningful index/partitioning scheme. This
        is not deterministic if done in parallel.

        Examples
        --------
        >>> df = df.shuffle(df.columns[0])  # doctest: +SKIP
        z~Must shuffle on either columns or the index; currently shuffling on neither. Pass column(s) to 'on' or set 'on_index' to True.zgCannot shuffle on both columns and the index. Do not pass column(s) to 'on' or set 'on_index' to False.zAindex must be aligned with the DataFrame to use as shuffle index.zCannot shuffle on z', column(s) not in dataframe to shufflep2pr   )check_dtype_supportc              3  >   K   | ]  }t        |t                 y wr   r   strr   cs     r   r   z$FrameBase.shuffle.<locals>.<genexpr>  s     Faz!S))F   z0p2p requires all column names to be str, found: )index_shufflec                    | S r   r   xs    r   r   z#FrameBase.shuffle.<locals>.<lambda>  s     r   rQ  )rI   r  r   r   r   r   r   r  r  r  is_list_liker#   r  r  r  r   r   r   KeyErrorr   distributed.shuffle._arrowr  r   r   r
  r   r`   rm   map_partitions)r   onignore_indexr   shuffle_methodon_indexforceoptions	index_colbad_colsr  r  unsupportedress                 r   shufflezFrameBase.shuffleF  s   Z HM  z!h6  "5T%5%5b)$&&tyy"'':W  vv||((,5G5K"XBc
+T "$T\\1	TZZ__8T H 
 (
2YZ  :8:uDF

+F4::3E3EFF(,

(:(:#$*QPSBTAtAwJ   F{mT  %n5&

  %%k		%BB
O s   "0HH"Hc                &    ddl m}  || |||      S )Nr   )	Resampler)closedlabel)dask.dataframe.tseries.resampler  )r   ruler  r  r  s        r   resamplezFrameBase.resample  s    =tF%@@r   c                "    ddl m}  || |fi |S )a2  Provides rolling transformations.

        Parameters
        ----------
        window : int, str, offset
           Size of the moving window. This is the number of observations used
           for calculating the statistic. When not using a ``DatetimeIndex``,
           the window size must not be so large as to span more than one
           adjacent partition. If using an offset or offset alias like '5D',
           the data must have a ``DatetimeIndex``
        min_periods : int, default None
            Minimum number of observations in window required to have a value
            (otherwise result is NA).
        center : boolean, default False
            Set the labels at the center of the window.
        win_type : string, default None
            Provide a window type. The recognized window types are identical
            to pandas.
        axis : int, str, None, default 0
            This parameter is deprecated with ``pandas>=2.1``.

        Returns
        -------
        a Rolling object on which to call a method to compute a statistic
        r   )Rolling)!dask.dataframe.dask_expr._rollingr  )r   windowr   r  s       r   rollingzFrameBase.rolling  s    4 	>tV.v..r      padrQ  enforce_metadatatransform_divisionsr   align_dataframesparent_metarequired_columnsc               2    t        || g|	|||||||d|
S )a  Apply a Python function to each partition

        Parameters
        ----------
        func : function
            Function applied to each partition.
        args, kwargs :
            Arguments and keywords to pass to the function. Arguments and
            keywords may contain ``FrameBase`` or regular python objects.
            DataFrame-like args (both dask and pandas) must have the same
            number of partitions as ``self`` or comprise a single partition.
            Key-word arguments, Single-partition arguments, and general
            python-object arguments will be broadcasted to all partitions.
        enforce_metadata : bool, default True
            Whether to enforce at runtime that the structure of the DataFrame
            produced by ``func`` actually matches the structure of ``meta``.
            This will rename and reorder columns for each partition, and will
            raise an error if this doesn't work, but it won't raise if dtypes
            don't match.
        transform_divisions : bool, default True
            Whether to apply the function onto the divisions and apply those
            transformed divisions to the output.
        clear_divisions : bool, default False
            Whether divisions should be cleared. If True, `transform_divisions`
            will be ignored.
        required_columns : list or None, default None
            List of columns that ``func`` requires for execution. These columns
            must belong to the first DataFrame argument (in ``args``). If None
            is specified (the default), the query optimizer will assume that
            all input columns are required.
        $META

        Examples
        --------
        Given a DataFrame, Series, or Index, such as:

        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        One can use ``map_partitions`` to apply a function on each partition.
        Extra arguments and keywords can optionally be provided, and will be
        passed to the function after the partition.

        Here we apply a function with arguments and keywords to a DataFrame,
        resulting in a Series:

        >>> def myadd(df, a, b=1):
        ...     return df.x + df.y + a + b
        >>> res = ddf.map_partitions(myadd, 1, b=2)
        >>> res.dtype
        dtype('float64')

        Here we apply a function to a Series resulting in a Series:

        >>> res = ddf.x.map_partitions(lambda x: len(x)) # ddf.x is a Dask Series Structure
        >>> res.dtype
        dtype('int64')

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with no name, and dtype
        ``float64``:

        >>> res = ddf.map_partitions(myadd, 1, b=2, meta=(None, 'f8'))

        Here we map a function that takes in a DataFrame, and returns a
        DataFrame with a new column:

        >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y))
        >>> res.dtypes
        x      int64
        y    float64
        z    float64
        dtype: object

        As before, the output metadata can also be specified manually. This
        time we pass in a ``dict``, as the output is a DataFrame:

        >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y),
        ...                          meta={'x': 'i8', 'y': 'f8', 'z': 'f8'})

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ddf.map_partitions(lambda df: df.head(), meta=ddf)

        Also note that the index and divisions are assumed to remain unchanged.
        If the function you're mapping changes the index/divisions, you'll need
        to pass ``clear_divisions=True``.

        >>> ddf.map_partitions(func, clear_divisions=True)  # doctest: +SKIP

        Your map function gets information about where it is in the dataframe by
        accepting a special ``partition_info`` keyword argument.

        >>> def func(partition, partition_info=None):
        ...     pass

        This will receive the following information:

        >>> partition_info  # doctest: +SKIP
        {'number': 1, 'division': 3}

        For each argument and keyword arguments that are dask dataframes you will
        receive the number (n) which represents the nth partition of the dataframe
        and the division (the first index value in the partition). If divisions
        are not known (for instance if the index is not sorted) then you will get
        None as the division.
        r  )r  )r   funcrQ  r  r  r   r  r  r  r   r   s              r   r  zFrameBase.map_partitions  sH    F 
 
 - 3+-#-
 
 	
r   rQ  r  r  r   r  c          	     2    t        || ||g|	|||||d|
S )a.  Apply a function to each partition, sharing rows with adjacent partitions.

        This can be useful for implementing windowing functions such as
        ``df.rolling(...).mean()`` or ``df.diff()``.

        Parameters
        ----------
        func : function
            Function applied to each partition.
        before : int, timedelta or string timedelta
            The rows to prepend to partition ``i`` from the end of
            partition ``i - 1``.
        after : int, timedelta or string timedelta
            The rows to append to partition ``i`` from the beginning
            of partition ``i + 1``.
        args, kwargs :
            Positional and keyword arguments to pass to the function.
            Positional arguments are computed on a per-partition basis, while
            keyword arguments are shared across all partitions. The partition
            itself will be the first positional argument, with all other
            arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``,
            or regular Python objects. DataFrame-like args (both dask and
            pandas) will be repartitioned to align (if necessary) before
            applying the function; see ``align_dataframes`` to control this
            behavior.
        enforce_metadata : bool, default True
            Whether to enforce at runtime that the structure of the DataFrame
            produced by ``func`` actually matches the structure of ``meta``.
            This will rename and reorder columns for each partition,
            and will raise an error if this doesn't work,
            but it won't raise if dtypes don't match.
        transform_divisions : bool, default True
            Whether to apply the function onto the divisions and apply those
            transformed divisions to the output.
        align_dataframes : bool, default True
            Whether to repartition DataFrame- or Series-like args
            (both dask and pandas) so their divisions align before applying
            the function. This requires all inputs to have known divisions.
            Single-partition inputs will be split into multiple partitions.

            If False, all inputs must have either the same number of partitions
            or a single partition. Single-partition inputs will be broadcast to
            every partition of multi-partition inputs.
        $META

        Notes
        -----
        Given positive integers ``before`` and ``after``, and a function
        ``func``, ``map_overlap`` does the following:

        1. Prepend ``before`` rows to each partition ``i`` from the end of
           partition ``i - 1``. The first partition has no rows prepended.

        2. Append ``after`` rows to each partition ``i`` from the beginning of
           partition ``i + 1``. The last partition has no rows appended.

        3. Apply ``func`` to each partition, passing in any extra ``args`` and
           ``kwargs`` if provided.

        4. Trim ``before`` rows from the beginning of all but the first
           partition.

        5. Trim ``after`` rows from the end of all but the last partition.

        Examples
        --------
        Given a DataFrame, Series, or Index, such as:

        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 4, 7, 11],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        A rolling sum with a trailing moving window of size 2 can be computed by
        overlapping 2 rows before each partition, and then mapping calls to
        ``df.rolling(2).sum()``:

        >>> ddf.compute()
            x    y
        0   1  1.0
        1   2  2.0
        2   4  3.0
        3   7  4.0
        4  11  5.0
        >>> ddf.map_overlap(lambda df: df.rolling(2).sum(), 2, 0).compute()
              x    y
        0   NaN  NaN
        1   3.0  3.0
        2   6.0  5.0
        3  11.0  7.0
        4  18.0  9.0

        The pandas ``diff`` method computes a discrete difference shifted by a
        number of periods (can be positive or negative). This can be
        implemented by mapping calls to ``df.diff`` to each partition after
        prepending/appending that many rows, depending on sign:

        >>> def diff(df, periods=1):
        ...     before, after = (periods, 0) if periods > 0 else (0, -periods)
        ...     return df.map_overlap(lambda df, periods=1: df.diff(periods),
        ...                           periods, 0, periods=periods)
        >>> diff(ddf, 1).compute()
             x    y
        0  NaN  NaN
        1  1.0  1.0
        2  2.0  1.0
        3  3.0  1.0
        4  4.0  1.0

        If you have a ``DatetimeIndex``, you can use a ``pd.Timedelta`` for time-
        based windows or any ``pd.Timedelta`` convertible string:

        >>> ts = pd.Series(range(10), index=pd.date_range('2017', periods=10))
        >>> dts = dd.from_pandas(ts, npartitions=2)
        >>> dts.map_overlap(lambda df: df.rolling('2D').sum(),
        ...                 pd.Timedelta('2D'), 0).compute()
        2017-01-01     0.0
        2017-01-02     1.0
        2017-01-03     3.0
        2017-01-04     5.0
        2017-01-05     7.0
        2017-01-06     9.0
        2017-01-07    11.0
        2017-01-08    13.0
        2017-01-09    15.0
        2017-01-10    17.0
        Freq: D, dtype: float64
        r  )map_overlap)r   r  beforeafterrQ  r  r  r   r  r   r   s              r   r  zFrameBase.map_overlapk  sH    ^ 	

 
 - 3+-
 
 	
r   c           
     *   t        |du|du|du|dug      dk7  rt        d      |t        |       |Gt        | j                  d   t
        j                        st        d      t        t        | |            S t        t        | |||||            S )a  Repartition a collection

        Exactly one of `divisions`, `npartitions` or `partition_size` should be
        specified. A ``ValueError`` will be raised when that is not the case.

        Parameters
        ----------
        divisions : list, optional
            The "dividing lines" used to split the dataframe into partitions.
            For ``divisions=[0, 10, 50, 100]``, there would be three output partitions,
            where the new index contained [0, 10), [10, 50), and [50, 100), respectively.
            See https://docs.dask.org/en/latest/dataframe-design.html#partitions.
        npartitions : int, Callable, optional
            Approximate number of partitions of output. The number of
            partitions used may be slightly lower than npartitions depending
            on data distribution, but will never be higher.
            The Callable gets the number of partitions of the input as an argument
            and should return an int.
        partition_size : str, optional
            Max number of bytes of memory for each partition. Use numbers or strings
            like 5MB. If specified npartitions and divisions will be ignored. Note that
            the size reflects the number of bytes used as computed by
            pandas.DataFrame.memory_usage, which will not necessarily match the size
            when storing to disk.

            .. warning::

               This keyword argument triggers computation to determine
               the memory size of each partition, which may be expensive.

        force : bool, default False
            Allows the expansion of the existing divisions.
            If False then the new divisions' lower and upper bounds must be
            the same as the old divisions'.
        freq : str, pd.Timedelta
            A period on which to partition timeseries data like ``'7D'`` or
            ``'12h'`` or ``pd.Timedelta(hours=12)``.  Assumes a datetime index.

        Notes
        -----
        Exactly one of `divisions`, `npartitions`, `partition_size`, or `freq`
        should be specified. A ``ValueError`` will be raised when that is
        not the case.

        Also note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
        represents the upper and lower bounds of each partition. The first item is the
        lower bound of the first partition, the second item is the lower bound of the
        second partition and the upper bound of the first partition, and so on.
        The second-to-last item is the lower bound of the last partition, and the last
        (extra) item is the upper bound of the last partition.

        Examples
        --------
        >>> df = df.repartition(npartitions=10)  # doctest: +SKIP
        >>> df = df.repartition(divisions=[0, 5, 10, 20])  # doctest: +SKIP
        >>> df = df.repartition(freq='7d')  # doctest: +SKIP

        See Also
        --------
        DataFrame.memory_usage_per_partition
        pandas.DataFrame.memory_usage
        Nr   zWPlease provide exactly one of the ``npartitions=`` or ``divisions=`` keyword arguments.r   z0Can only repartition on frequency for timeseries)sumr   r.   r   r   r  	Timestampr  r   r_   r^   )r   r   r   partition_sizefreqr  s         r   r4  zFrameBase.repartition  s    P T)t+"$.$	  4   I&dnnQ/> RSS!/$"=>>!D+y%QUV r   c                    |du r,t        | j                  t              j                               }| j                  }| j                  ||      }||_        |||_        |S )a  Convert a dask DataFrame to a dask array.

        Parameters
        ----------
        lengths : bool or Sequence of ints, optional
            How to determine the chunks sizes for the output array.
            By default, the output array will have unknown chunk lengths
            along the first axis, which can cause some later operations
            to fail.

            * True : immediately compute the length of each partition
            * Sequence : a sequence of integers to use for the chunk sizes
              on the first axis. These values are *not* validated for
              correctness, beyond ensuring that the number of items
              matches the number of partitions.
        meta : object, optional
            An optional `meta` parameter can be passed for dask to override the
            default metadata on the underlying dask array.
        optimize : bool
            Whether to optimize the expression before converting to an Array.

        Returns
        -------
        A Dask Array
        T)tupler  r   r   ry  _validate_chunks_chunksr   )r   lengthsrQ  r-  optimize_kwargsarrchunkss          r   to_dask_arrayzFrameBase.to_dask_arrayi  s]    8 d?D//4<<>?Gkk&&sG4CI
r   c                    t        | j                  j                        r<t        j                  d| j                  j                  j
                   dt               | j                  t        j                        S )zReturn a dask.array of the values of this dataframe

        Warning: This creates a dask.array without precise shape information.
        Operations that depend on shape information, like slicing or reshaping,
        will not work.
        z`Dask currently has limited support for converting pandas extension dtypes to arrays. Converting z to object dtype.)	r   r   ry  r   r   r   UserWarningr  methodsr   s    r   ry  zFrameBase.values  s`     $DJJ$5$56MM))-):):)@)@(AART
 ""7>>22r   c                p    | j                   j                  |      }t        |d         t        |d         fS Nr   r   )r   
__divmod__r   r   r   r   s      r   r  zFrameBase.__divmod__  s3    %%e,fQi(.*CCCr   c                p    | j                   j                  |      }t        |d         t        |d         fS r  )r   __rdivmod__r   r  s      r   r  zFrameBase.__rdivmod__  s3    &&u-fQi(.*CCCr   c                "    | j                         S r   )absr   s    r   __abs__zFrameBase.__abs__  s    xxzr   r   c                    | j                  |      }|dk(  r$| j                  t        j                  ||||      S t	        | j
                  j                  ||||            }| j                  ||      S Nr   )r}  numeric_onlyr   	min_count)rl   r  r   r  r   r   _apply_min_countr   r   r}  r  r  split_everyr   r   s           r   r  zFrameBase.sum  sy     ""4(19&&)# '    		flKQU VW$$VY77r   c                6   |r| j                         j                         |k\  }|j                  }t        |      s |j	                         }|j	                         }|j                  |t        j                        }t        |      s|j                         S |S |S )N)r   )	notnullr  r   r1   	to_serieswherer  nanmin)r   r   r  cond	cond_metas        r   r  zFrameBase._apply_min_count  s}    <<>%%'94D

I!),))+~~'\\$bff\5F!),zz|#Mr   c                    | j                  |      }|dk(  r$| j                  t        j                  ||||      S t	        | j
                  j                  ||||            }| j                  ||      S r  )rl   r  r   prodr   r   r  r  s           r   r  zFrameBase.prod  sy     ""4(19&&)# '    		v|[RV WX$$VY77r   c           	     `   t        | d       | j                  |      }| j                  j                  |||       | }t	        | j                        r4|r2|t        | j                  j                  d      j                           }t        |j                  j                  |||||            S )Nvar)r   r}  r  T)r  r  )	rk   rl   r   r  r0   r  r   r   r   )r   r   r}  ddofr  r  r   r   s           r   r  zFrameBase.var  s     	 e,""4(

DlKTZZ(\$tzz~~4~@FFGHEJJNN4|NU
 	
r   c           
        t        | d       | j                  |      }| }t        | j                        j	                  ||||      }d\  }	}
t        | j                        rY|dk(  r|t        |j                           }n|j                         }|du r,|j                  j                  t        j                  g      }n|j                  }|j                  ddg      j                  }
t        |
      dkD  r|d	k(  rtt        |
      t        | j                        k7  rSt         t        |       d
 t!        |       t        j"                  g      i| j                        | j$                        }nmd}	|d	k(  r)|j'                  d|j(                  j*                   d      }|
D ]  }t-        ||   |      ||<    n#t/        | j                        }	|	rt-        | |      }d }|	r3|
1|
D cg c]&  }t1        | j                  |   j(                  dd       ( }}|d	k(  rW|	si n"|j(                  j*                  |j2                  d} |j4                  |	st6        j                  nt8        f||||d|d|S |j;                  ||||      }|	r9t        | j                        |
|t1        |dd       t1        |dd       |d}t8        }ni }t        j<                  } |j4                  |f|d| j                  d|}|S c c}w )Nstdr   r}  r	  r  FNr   Tincludedatetime	timedeltar   _r   r1  zdatetime64[]unit)r  r   F)rQ  r   r}  r	  r  r  )r}  r	  r  r  r   )
is_df_like	time_colsr   r   r  unitsrQ  r  r  )rk   rl   rs   r   r  r0   r  r   r  select_dtypesr  numberr   r   from_pandasry   rz   r   r   astyper'  r  r+   r   r   r   r  r   r-   r  sqrt)r   r   r}  r	  r  r  r   
numeric_ddrQ  needs_time_conversionr  r   colr  r  _kwargsr   sqrt_func_kwargs	sqrt_funcs                      r   r  zFrameBase.std   s    	 e,""4(
TZZ(,,f4l - 
 ,7(yTZZ(qy'TZZ(89
'__.
t#"((66		{6K"((++Z4M+NVVI9~!19Y3t||3D!D!,4.t4 "?"9$"?"IJ"&** %)$4$4"J -1)qy%/%6%6TZZ__DUUV7W%X
( W*=jov*V
3W %<DJJ$G!$0v>
 Y%:IRSAWTZZ]00&$?SES19 - "jjoo

C 
 -:,,28V	!&)	 	 	 <[   
 !/

;& w5fd3  7I!I&&&
"

	

 
 Y Ts   )+K7c                l    | j                   st        d      t        t        j                  |             S )zEnforce the current divisions at runtime.

        Injects a layer into the Task Graph that checks that the current divisions
        match the expected divisions at runtime.
        zNo known divisions to enforce!)re  r   r   r   EnforceRuntimeDivisionsr   s    r   enforce_runtime_divisionsz#FrameBase.enforce_runtime_divisions`  s/     ##=>>d::4@AAr   c                |   t        | d       |t        d      | j                  |      }t        |       r| j                  j                  ||      }n| j                  j                         }|dk(  r#| j                  t        j
                  ||d      S |st        d      |dk7  rt        d	      | }|j                  dkD  r$|j                  d
dgt        j                  g      }t        t        |d            }t        t        |d            }||dz  z  }	|	j                  dk(  r|	j                  d      }	|	S )af  
        .. note::

           This implementation follows the dask.array.stats implementation
           of skewness and calculates skewness without taking into account
           a bias term for finite sample size, which corresponds to the
           default settings of the scipy.stats skewness calculation. However,
           Pandas corrects for this, so the values differ by a factor of
           (n * (n - 1)) ** 0.5 / (n - 2), where n is the number of samples.

           Further, this method currently does not support filtering out NaN
           values, which is again a difference to Pandas.
        skew0`axis=None` isn't currently supported for `skew`r   r  r   F)rQ  r   r  bias=False is not implemented.	propagate>`nan_policy` other than 'propagate' have not been implemented.r  boolr  excluder   order   g      ?        )rk   r   rl   r0   r   r)  r  r   r   r   r  r  timedelta64r   rX   fillna)
r   r   bias
nan_policyr  rQ  r   m2m3r   s
             r   r)  zFrameBase.skewj  sF   * 	 f-<OPP""4(T"&&++L+QD&&++-D19&&!&	 '   %&FGG$%P  ::>''!6*R^^4D ( E F523F523b#g;;!]]3'Fr   c                   t        | d       |t        d      | j                  |      }t        |       r| j                  j                  ||      }n| j                  j                         }|dk(  r,t        t        j
                  | || j                  dz   |d      S |st        d      |dk7  rt        d	      | }|j                  dkD  r$|j                  d
dgt        j                  g      }t        t        |d            }t        t        |d            }	|	|dz  z  }
|
j                  dk(  r|
j!                  d      }
|r|
dz
  S |
S )a  
        .. note::

           This implementation follows the dask.array.stats implementation
           of kurtosis and calculates kurtosis without taking into account
           a bias term for finite sample size, which corresponds to the
           default settings of the scipy.stats kurtosis calculation. This differs
           from pandas.

           Further, this method currently does not support filtering out NaN
           values, which is again a difference to Pandas.
        kurtosisr*  r+  r   F)rQ  tokenr   r  r,  r-  r.  r  r/  r0  r   r2     g       @r5  r4  )rk   r   rl   r0   r   r=  r  r   _token_prefixr   r   r  r  r6  r   rX   r7  )r   r   fisherr8  r9  r  rQ  r   r:  m4r   s              r   r=  zFrameBase.kurtosis  sY   * 	 j1<OPP""4(T"&&//T/UD&&//1D19!

((:5!&  %&FGG$%P  ::>''!6*R^^4D ( E F523F523b#g;;!]]3'FA:Mr   c                   | j                  |      }t        | d       |dk(  r$| j                  t        j                  ||||      S | j
                  j	                  |||      }| }| j                  dk(  r| t        |j                           }|j                  |||      }|j                  |      }	t        t        j                  ||	z  |d| j
                  	      }
|
S )
Nsemr   r  )r}  r	  r  r   )r}  r	  r  r  Fr  )rl   rk   r  r   rD  r   r   r  r   r  r  r  r  )r   r   r}  r	  r  r  rQ  r   r{  ro  r   s              r   rD  zFrameBase.sem  s     ""4(e,19&&) '   zz~~V$\~R99>djj)*EIIV$KIHKKKK0GGE"


 r   c                0   |d}n|dk  rt        d      | j                  j                  ||       | }|r]| j                  j                         }t	        |j
                        t	        | j
                        k7  r|t        |j
                           }||fS )Nr   zmin_periods must be >= 2)r  min_periods)r   r   cov_get_numeric_datar   r   r  )r   rF  r  r   numericss        r   _prepare_cov_corrzFrameBase._prepare_cov_corr  s    K1_788

LkJzz335H8##$DLL(99d8#3#345k!!r   c                Z    | j                  ||      \  }}t        t        ||||            S r   )rJ  r   rO   )r   rF  r  r  scalarr   s         r   _covzFrameBase._cov  s1     "33KN{c%k6JKKr   c                z    |dk7  rt        d      | j                  ||      \  }}t        t        ||||            S )Npearsonz-Only Pearson correlation has been implemented)r   rJ  r   rN   )r   r   rF  r  r  rL  r   s          r   _corrzFrameBase._corr  sE     Y%&UVV!33KN{d5+{FKLLr   c                    t        | d       | j                  |      }|dk(  r#| j                  t        j                  |||      S t        | j                  j	                  ||||            S )Nmeanr   r}  r  r   )r  r   )rk   rl   r  r   rR  r   r   r   r   r}  r  r  r   s         r   rR  zFrameBase.mean'  sq     	 f-""4(19&&vLt '   IINN6<[tNT
 	
r   c                    | j                  |      }|dk(  r#| j                  t        j                  |||      S t	        | j
                  j                  ||||            S Nr   rS  )rl   r  r   maxr   r   rT  s         r   rW  zFrameBase.max5  ^    ""4(19&&f<d '   diimmFL+tTUUr   c                    | j                  |      }|dk(  r"| j                  t        j                  ||      S t	        | j
                  j                  ||            S Nr   )r}  r   )rl   r  r   r   r   r   r   r   r}  r  r   s        r   r   zFrameBase.any>  O    ""4(19&&quuV$&GGdiimmFK@AAr   c                    | j                  |      }|dk(  r"| j                  t        j                  ||      S t	        | j
                  j                  ||            S rZ  )rl   r  r   allr   r   r[  s        r   r^  zFrameBase.allE  r\  r   c                    | j                  |      }|dk(  r#| j                  t        j                  |||      S t	        | j
                  j                  |||            S rV  )rl   r  r   idxminr   r   r   r   r}  r  r  s        r   r`  zFrameBase.idxminL  ^    ""4(19&&l '   dii..v|[QRRr   c                    | j                  |      }|dk(  r#| j                  t        j                  |||      S t	        | j
                  j                  |||            S rV  )rl   r  r   idxmaxr   r   ra  s        r   rd  zFrameBase.idxmaxU  rb  r   c                    | j                  |      }|dk(  r#| j                  t        j                  |||      S t	        | j
                  j                  ||||            S rV  )rl   r  r   r  r   r   rT  s         r   r  zFrameBase.min^  rX  r   c                    | j                  |      }|dk(  r"| j                  t        j                  ||      S t	        | j
                  j                  ||            S )Nr   )r  r   )rl   r  r   r  r   r   )r   r   r  r  s       r   r  zFrameBase.countg  sP    ""4(19&&qww\PT&UUdiioolKHIIr   c                    t        | d       t        | j                        j                          t	        | j
                  j                               S )Nr  )rk   rs   r   r  r   r   r   s    r   r  zFrameBase.absn  s8     	 e,djj!%%'diimmo..r   c                J    t        | j                  j                  |            S r   )r   r   r  )r   r   s     r   r  zFrameBase.astypeu  s    dii..v677r   c                    | j                  |d      j                  }t        | j                  j                  |            S Nouter)r   r   r   combine_first)r   r   s     r   rl  zFrameBase.combine_firsty  s4    ,,UG<AAdii55e<==r   c                L    t        | j                  j                  ||            S r   )r   r   to_timestamp)r   r  hows      r   rn  zFrameBase.to_timestamp~  s    dii44T3?@@r   c                H    t        | j                  j                               S r   )r   r   isnar   s    r   rq  zFrameBase.isna  s    diinn.//r   c           
     2   t        j                  t        |      d      st        d      t	        j
                  | |||      }g }t        t        |            D ];  }|j                  t        t	        j                  ||| j                                     = |S )a@  Pseudorandomly split dataframe into different pieces row-wise

        Parameters
        ----------
        frac : list
            List of floats that should sum to one.
        random_state : int or np.random.RandomState
            If int or None create a new RandomState with this as the seed.
            Otherwise draw from the passed RandomState.
        shuffle : bool, default False
            If set to True, the dataframe is shuffled (within partition)
            before the split.

        Examples
        --------

        50/50 split

        >>> a, b = df.random_split([0.5, 0.5])  # doctest: +SKIP

        80/10/10 split, consistent random_state

        >>> a, b, c = df.random_split([0.8, 0.1, 0.1], random_state=123)  # doctest: +SKIP

        See Also
        --------
        dask.DataFrame.sample
        r   zfrac should sum to 1)r  allcloser  r   r   Splitr  r   appendr   	SplitTaker   )r   fracrandom_stater  r   r/  r   s          r   random_splitzFrameBase.random_split  s|    : {{3t9a(344

4|W=s4y! 	LAJJ~dnnUAtyy&IJK	L
r   c                J    t        | j                  j                  |            S r   )r   r   round)r   decimalss     r   r{  zFrameBase.round  s    diiooh788r   c                   | j                  |      }| j                  |      }t        |t              r|j                  n|}t        |t              r|j                  n|}t	        | j                  j                  ||            S r   )r   r   r   r   r   r  r   r  r   s      r   r  zFrameBase.where  sd    ++D1,,U3&tY7tyyT(	:

diioodE:;;r   c                   | j                  |      }| j                  |      }t        |t              r|j                  n|}t        |t              r|j                  n|}t	        | j                  j                  ||            S r   )r   r   r   r   r   maskr~  s      r   r  zFrameBase.mask  sd    ++D1,,U3&tY7tyyT(	:

diinnT59::r   c                N    t        | j                  j                  |||            S r   )r   r   r  )r   
to_replaceri  regexs       r   r  zFrameBase.replace  s     dii//
E5IJJr   c                    t        |      }|dk(  r"| j                  t        j                  ||      S | }|t	        | dd       }t        t        ||            S )Nr   r   limitffillc                     yNr   r   r  s    r   r   z!FrameBase.ffill.<locals>.<lambda>  s    r   )rl   r  r   r  rB   r   rA   r   r   r  r   s       r   r  zFrameBase.ffill  sY    d#19&&qwwT&GG=g{;EeE5122r   c                    t        |      }|dk(  r"| j                  t        j                  ||      S | }|t	        | dd       }t        t        ||            S )Nr   r  bfillc                     | j                   dz
  S Nr   r1  r  s    r   r   z!FrameBase.bfill.<locals>.<lambda>  s    9J r   )rl   r  r   r  rB   r   r>   r  s       r   r  zFrameBase.bfill  sZ    d#19&&qwwT&GG=g/JKEeE5122r   c                    | j                  |      }|dk(  r"| j                  t        j                  ||      S t	        |t
              r|j                  }t        | j                  j                  |            S Nr   r   )rl   r  r   r7  r   r   r   r   )r   ri  r   s      r   r7  zFrameBase.fillna  sb    ""4(19&&qxxT&BBeY'JJEdii..u566r   c                    t        |t              st        d      t        |      }|dk(  rt	        t        | ||            S | j                  t
        j                  dd|||      S )Nzperiods must be an integerr   F)r  r  r  periodsr   r  )r   r	   r  rl   r   rD   r  r  )r   r  r  r   s       r   shiftzFrameBase.shift  sk    '8,899d#19!%gt"<==""" % # 
 	
r   c                    t        |      }|dk(  rt        t        | |            S | j                  t        j                  ddd||      S )a|  
        .. note::

           Pandas currently uses an ``object``-dtype column to represent
           boolean data with missing values. This can cause issues for
           boolean-specific operations, like ``|``. To enable boolean-
           specific operations, at the cost of metadata that doesn't match
           pandas, use ``.astype(bool)`` after the ``shift``.
        r   F)r  r  r  r   r  r   )rl   r   r?   r  r  )r   r  r   s      r   diffzFrameBase.diff  sU     d#19!$tW"566""" %! # 
 	
r   c                P    t        | j                  j                  ||||            S r   )r   r   rename_axis)r   mapperr   r   r   s        r   r  zFrameBase.rename_axis
  s$     dii33FE7DQRRr   c                t    t        |      s,t        |      st        |      r|dv rd}nd}t        ||      }|S )N)innerr   r   r   r1  )r#   r1   r0   r  )r   r   joinr   s       r   r   z!FrameBase._create_alignable_frame  s@    !%(5!%6u%=((  ;?Er   c                v    | j                  ||      }| j                  j                  |j                  |||      S r   )r   r   align)r   r   r  r   r   s        r   r  zFrameBase.align  s1    ,,UD9yyuzz4zBBr   c                L    t        | j                  j                  |            S )a9  Approximate number of unique rows.

        This method uses the HyperLogLog algorithm for cardinality
        estimation to compute the approximate number of unique rows.
        The approximate error is 0.406%.

        Parameters
        ----------
        split_every : int, optional
            Group partitions into groups of this size while performing a
            tree-reduction. If set to False, no tree-reduction will be used.
            Default is 8.

        Returns
        -------
        a float representing the approximate number of elements
        r  )r   r   nunique_approxr   r  s     r   r  zFrameBase.nunique_approx"  s!    $ dii66;6OPPr   c                    |dk(  r"| j                  t        j                  ||      S t        | j                  j                  |            S Nr   )r   r}  r|  )r  r   cumsumr   r   r   r   r}  r   s       r   r  zFrameBase.cumsum6  B    19&&qxxd6&JJdii..f.=>>r   c                    |dk(  r"| j                  t        j                  ||      S t        | j                  j                  |            S r  )r  r   cumprodr   r   r  s       r   r  zFrameBase.cumprod<  sB    19&&qyytF&KKdii//v/>??r   c                    |dk(  r"| j                  t        j                  ||      S t        | j                  j                  |            S r  )r  r   cummaxr   r   r   r   r}  s      r   r  zFrameBase.cummaxB  r  r   c                    |dk(  r"| j                  t        j                  ||      S t        | j                  j                  |            S r  )r  r   cumminr   r   r  s      r   r  zFrameBase.cumminH  r  r   c
                   ||dk  r|durt        d      ||	rt        d      |r|j                         ni }|j                  |
       ||d<   |	r|	j                         ni }	|	j                  |
       |xs |xs ||	d<   |r|j                         ni }|j                  |
       |xs ||d<   t        t	        | ||||	||            S )aP  Generic row-wise reductions.

        Parameters
        ----------
        chunk : callable
            Function to operate on each partition. Should return a
            ``pandas.DataFrame``, ``pandas.Series``, or a scalar.
        aggregate : callable, optional
            Function to operate on the concatenated result of ``chunk``. If not
            specified, defaults to ``chunk``. Used to do the final aggregation
            in a tree reduction.

            The input to ``aggregate`` depends on the output of ``chunk``.
            If the output of ``chunk`` is a:

            - scalar: Input is a Series, with one row per partition.
            - Series: Input is a DataFrame, with one row per partition. Columns
              are the rows in the output series.
            - DataFrame: Input is a DataFrame, with one row per partition.
              Columns are the columns in the output dataframes.

            Should return a ``pandas.DataFrame``, ``pandas.Series``, or a
            scalar.
        combine : callable, optional
            Function to operate on intermediate concatenated results of
            ``chunk`` in a tree-reduction. If not provided, defaults to
            ``aggregate``. The input/output requirements should match that of
            ``aggregate`` described above.
        $META
        token : str, optional
            The name to use for the output keys.
        split_every : int, optional
            Group partitions into groups of this size while performing a
            tree-reduction. If set to False, no tree-reduction will be used,
            and all intermediates will be concatenated and passed to
            ``aggregate``. Default is 8.
        chunk_kwargs : dict, optional
            Keyword arguments to pass on to ``chunk`` only.
        aggregate_kwargs : dict, optional
            Keyword arguments to pass on to ``aggregate`` only.
        combine_kwargs : dict, optional
            Keyword arguments to pass on to ``combine`` only.
        kwargs :
            All remaining keywords will be passed to ``chunk``, ``combine``,
            and ``aggregate``.

        Examples
        --------
        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': range(50), 'y': range(50, 100)})
        >>> ddf = dd.from_pandas(df, npartitions=4)

        Count the number of rows in a DataFrame. To do this, count the number
        of rows in each partition, then sum the results:

        >>> res = ddf.reduction(lambda x: x.count(),
        ...                     aggregate=lambda x: x.sum())
        >>> res.compute()
        x    50
        y    50
        dtype: int64

        Count the number of rows in a Series with elements greater than or
        equal to a value (provided via a keyword).

        >>> def count_greater(x, value=0):
        ...     return (x >= value).sum()
        >>> res = ddf.x.reduction(count_greater, aggregate=lambda x: x.sum(),
        ...                       chunk_kwargs={'value': 25})
        >>> res.compute()
        np.int64(25)

        Aggregate both the sum and count of a Series at the same time:

        >>> def sum_and_count(x):
        ...     return pd.Series({'count': x.count(), 'sum': x.sum()},
        ...                      index=['count', 'sum'])
        >>> res = ddf.x.reduction(sum_and_count, aggregate=lambda x: x.sum())
        >>> res.compute()
        count      50
        sum      1225
        dtype: int64

        Doing the same, but for a DataFrame. Here ``chunk`` returns a
        DataFrame, meaning the input to ``aggregate`` is a DataFrame with an
        index with non-unique entries for both 'x' and 'y'. We groupby the
        index, and sum each group to get the final result.

        >>> def sum_and_count(x):
        ...     return pd.DataFrame({'count': x.count(), 'sum': x.sum()},
        ...                         columns=['count', 'sum'])
        >>> res = ddf.reduction(sum_and_count,
        ...                     aggregate=lambda x: x.groupby(level=0).sum())
        >>> res.compute()
           count   sum
        x     50  1225
        y     50  3725
        r   Fzsplit_every must be at least 2z+`combine_kwargs` provided with no `combine`r  )r   r  updater   rP   )r   chunk	aggregatecombinerQ  r>  r  chunk_kwargsaggregate_kwargscombine_kwargsr   s              r   	reductionzFrameBase.reductionN  s    ` "{Q;e;S=>>? !NOO.:|((*F#$V2@,,.bf%!(!>I!>v6F+002B'#,#5  

 
	
r   c                L    t        | j                  j                  ||            S )at  Return the memory usage of each partition

        Parameters
        ----------
        index : bool, default True
            Specifies whether to include the memory usage of the index in
            returned Series.
        deep : bool, default False
            If True, introspect the data deeply by interrogating
            ``object`` dtypes for system-level memory consumption, and include
            it in the returned values.

        Returns
        -------
        Series
            A Series whose index is the partition number and whose values
            are the memory usage of each partition in bytes.
        )r   r   memory_usage_per_partition)r   r   rw  s      r   r  z$FrameBase.memory_usage_per_partition  s     & diiBB5$OPPr   c                    ddl m}  ||       S )zPurely label-location based indexer for selection by label.

        >>> df.loc["b"]  # doctest: +SKIP
        >>> df.loc["b":"d"]  # doctest: +SKIP
        r   )
LocIndexer)"dask.dataframe.dask_expr._indexingr  )r   r  s     r   r  zFrameBase.loc  s     	B$r   c                >    t        t        j                  |             S r   )r   r   NotNullr   s    r   r  zFrameBase.notnull  s    dll4011r   c                $    | j                          S r   )r  r   s    r   isnullzFrameBase.isnull	  s    r   c           	        || j                   r|r| S | j                  S ||rt        d      || |   }n| j                  }t	        ||      \  }t              d   fz   }|s|S t              t        | j                        dz
  k(  rGt        fdt        dt                    D              st        t        j                  | |            S t        t        j                  | |            S )ae  Compute the current divisions of the DataFrame.

        This method triggers immediate computation. If you find yourself running this command
        repeatedly for the same dataframe, we recommend storing the result
        so you don't have to rerun it.

        If the column or index values overlap between partitions, raises ``ValueError``.
        To prevent this, make sure the data are sorted by the column or index.

        Parameters
        ----------
        col : string, optional
            Calculate the divisions for a non-index column by passing in the name of the column.
            If col is not specified, the index will be used to calculate divisions.
            In this case, if the divisions are already known, they will be returned
            immediately without computing.
        set_divisions : bool, default False
            Whether to set the computed divisions into the DataFrame. If False, the divisions
            of the DataFrame are unchanged.

        Examples
        --------
        >>> import dask
        >>> ddf = dask.datasets.timeseries(start="2021-01-01", end="2021-01-07", freq="1h").clear_divisions()
        >>> divisions = ddf.compute_current_divisions()
        >>> print(divisions)  # doctest: +NORMALIZE_WHITESPACE
        (Timestamp('2021-01-01 00:00:00'),
         Timestamp('2021-01-02 00:00:00'),
         Timestamp('2021-01-03 00:00:00'),
         Timestamp('2021-01-04 00:00:00'),
         Timestamp('2021-01-05 00:00:00'),
         Timestamp('2021-01-06 00:00:00'),
         Timestamp('2021-01-06 23:00:00'))

        >>> ddf = ddf.reset_index().clear_divisions()
        >>> divisions = ddf.compute_current_divisions("timestamp")
        >>> print(divisions)  # doctest: +NORMALIZE_WHITESPACE
        (Timestamp('2021-01-01 00:00:00'),
         Timestamp('2021-01-02 00:00:00'),
         Timestamp('2021-01-03 00:00:00'),
         Timestamp('2021-01-04 00:00:00'),
         Timestamp('2021-01-05 00:00:00'),
         Timestamp('2021-01-06 00:00:00'),
         Timestamp('2021-01-06 23:00:00'))

        >>> ddf = ddf.set_index("timestamp", divisions=divisions, sorted=True)
        z9Can't set divisions of non-index, call set_index instead.)allow_overlapr  r   c              3  :   K   | ]  }|   |d z
     k\    ywr   Nr   )r   r   maxesminss     r   r   z6FrameBase.compute_current_divisions.<locals>.<genexpr>I	  s"     L1tAw%A,.L   )re  r   r   r   _compute_partition_statsr  r   r   r  r   r   SetDivisionsResolveOverlappingDivisions)r   r!  set_divisionsr   lensr   r  r  s         @@r   compute_current_divisionsz#FrameBase.compute_current_divisions	  s    ` ;4//>>!?}%K  ?IEJJE4U-XeT$K59,.	t9DNN+a//LaT8KLL%d&7&7i&HIId>>tT5RVWXXr   r   )r   orientr   r   c               "    t        |||||      S )z
        Construct a Dask DataFrame from a Python Dictionary

        See Also
        --------
        dask.dataframe.from_dict
        )r   r   )	from_dict)clsr  r   r  r   r   s         r   r  zFrameBase.from_dictN	  s     {F%QQr   c                (    ddl m}  || |g|i |S )z-See dd.to_json docstring for more informationr   )to_json)dask.dataframe.ior  )r   r2  r   r   r  s        r   r  zFrameBase.to_json[	  s    -tX7777r   c                8    ddl m}  || |||||||||	|
||      S )Nr   )to_sql)
schema	if_existsr   index_label	chunksizer   r   r   parallelengine_kwargs)dask.dataframe.io.sqlr  )r   r   urir  r  r   r  r  r   r   r   r  r  r  s                 r   r  zFrameBase.to_sqla	  s;     	1#'
 	
r   c                (    ddl m}  || |g|i |S )z,See dd.to_orc docstring for more informationr   )to_orc)dask.dataframe.io.orcr  )r   pathr   r   r  s        r   r  zFrameBase.to_orc	  s    0dD242622r   c                "    ddl m}  || |fi |S )z,See dd.to_csv docstring for more informationr   )to_csv)dask.dataframe.io.csvr  )r   r2  r   r  s       r   r  zFrameBase.to_csv	  s    0dH///r   c                    ddl m} |du r,t        | j                  t              j                               } ||       }| j                  ||      }|d   f|_        |S )Nr   )
to_recordsT)#dask.dataframe.dask_expr.io.recordsr  r  r  r   r   r  r  )r   r   r  r  recordsr  s         r   r  zFrameBase.to_records	  sX    Bd?D//4<<>?GT"&&w8!!9,r   c                n   ddl m} ddlm} t	        ||      rt        |      }t        |      | j                  k7  r$t        dt        |       d| j                         | j                  dk(  r ||f      }|S  ||t        | j                        ff      }|S |t        d| d      |j                  S )	Nr   )Sequence)normalize_chunkszJThe number of items in 'lengths' does not match the number of partitions.  != r   z!Unexpected value for 'lengths': '')collections.abcr  dask.array.corer  r   r  r   r   r   r   r   r  )r   r  r  r  r  r  s         r   r  zFrameBase._validate_chunks	  s    ,4gx(GnG7|t/// ##&w<.T5E5E4FH 
 yyA~)7*5 M *7S5F4H*IJM @	KLL{{r   c                $    ddl m}  || ||      S )zCreate a Dask Bag from a Seriesr   )to_bag)r  )dask.dataframe.dask_expr.io.bagr  )r   r   r  r  s       r   r  zFrameBase.to_bag	  s    :dE&11r   c                (    ddl m}  || ||||fi |S )z,See dd.to_hdf docstring for more informationr   )to_hdf)dask.dataframe.io.hdfr  )r   path_or_bufr\  moderu  r   r  s          r   r  zFrameBase.to_hdf	  s    0dKdFEfEEr   c                    ddl m} |r| j                         }n| }|j                         }|j	                         }d|j
                  z   }|j                  ||d      }|D cg c]  }t        |||       c}S c c}w )a  Convert into a list of ``dask.delayed`` objects, one per partition.

        Parameters
        ----------
        optimize_graph : bool, optional
            If True [default], the graph is optimized before converting into
            ``dask.delayed`` objects.

        Examples
        --------
        >>> partitions = df.to_delayed()  # doctest: +SKIP

        See Also
        --------
        dask_expr.from_delayed
        r   )HighLevelGraphr  r   )dependencies)layer)dask.highlevelgraphr  r-  rB  r<  r  from_collectionsr|   )r   optimize_graphr  r   r   graphr  ks           r   
to_delayedzFrameBase.to_delayed	  sx    " 	7MMOEE""$$$&U[[(//u2/N8<=15.===s   #A;c                x    ddl m} |xs |j                  } |j                  |      } |j                  | fi |S )a1  Move to a new DataFrame backend

        Parameters
        ----------
        backend : str, Optional
            The name of the new backend to move to. The default
            is the current "dataframe.backend" configuration.

        Returns
        -------
        DataFrame, Series or Index
        r   r5   )"dask.dataframe.dask_expr._backendsr6   backenddispatch
to_backend)r   r  r   r6   backend_entrypoints        r   r  zFrameBase.to_backend	  sH     	S @8@@A8AA'J,!,,T<V<<r   c                \   t        |t              st        d      t        |t              rZ| j	                  t
        j                  ||      }|j                  |j                        j                  d |j                        S | j	                  t        ||      j                  d      S )Nz+The second operand must be a dask dataframer  )byc                &    | j                  d      S )NFr|  )r  r  s    r   r   zFrameBase.dot.<locals>.<lambda>	  s    !%%u%- r   Fr|  )r   r   r  r   r  r   dotgroupbyr   applyr   _dot_seriesr  )r   r   rQ  ss       r   r  zFrameBase.dot	  s    %+IJJeY'##AEE5t#<A999(..-A4D4D /   "";D"AEEUESSr   c                    t        |t              r$|\  }}||v rt        d|z        | ||<    ||i |S  || g|i |S )Nz1%s is both the pipe target and a keyword argument)r   r  r   )r   r  r   r   targets        r   pipezFrameBase.pipe	  sb    dE"LD& G&P  "F6N(((.t.v..r   c                j   |Md}t        |t              r0d|cxk  rdk  r%n t	        |      t        j                  |       |}nt	        |      |t	        d      |t
        j                  j                         }t        | j                  |      }t        t        j                  | |||            S )a  Random sample of items

        Parameters
        ----------
        n : int, optional
            Number of items to return is not supported by dask. Use frac
            instead.
        frac : float, optional
            Approximate fraction of items to return. This sampling fraction is
            applied to all partitions equally. Note that this is an
            **approximate fraction**. You should not expect exactly ``len(df) * frac``
            items to be returned, as the exact number of elements selected will
            depend on how your data is partitioned (but should be pretty close
            in practice).
        replace : boolean, optional
            Sample with or without replacement. Default = False.
        random_state : int or ``np.random.RandomState``
            If an int, we create a new RandomState with this as the seed;
            Otherwise we draw from the passed RandomState.

        See Also
        --------
        DataFrame.random_split
        pandas.DataFrame.sample
        zlsample does not support the number of sampled items parameter, 'n'. Please use the 'frac' parameter instead.r   r   zfrac must not be None)
state_datarw  r  )r   r
   r   r   r   r  randomRandomStater   r   r   r   Sample)r   ro  rw  r  rx  r   r  s          r   samplezFrameBase.sample
  s    4 =K  !V$a1 !o% c" o%<45599002L&t'7'7F
KK$P
 	
r   c                    t         r   r   r   s    r   r  zFrameBase._repr_data<
  s    !!r   c                    d| j                    }| j                  r#t        j                  | j                  |      }|S t        j                  dg| j                   dz   z  |      }|S )Nznpartitions=r   r   )r   re  r  r  r   )r   r   r   s      r   _repr_divisionszFrameBase._repr_divisions?
  sd    d../0d;I  ")9)9A)=!>TJIr   )returnz	expr.Exprr   TNN)r2  
str | Noner  r   r  None)fusedN)r8  r   r  r   )r,  r/  F)rc  r/  )rl  r/  )r  r   T)ro  r  r   r/  )r  T)rw  r/  )r  zstr | list | no_defaultr  r/  r   
int | Noner  r   r  r/  r  r/  )NNNNF)r   ztuple | Noner   r$  r  r   r  r/  )NNT)r-  r/  r  r!   )r   TFr   F)r   Tr   FF)r   Tr-  F)r   TTr-  F)NTr   FF)NFFF)rO  NFFF)r   TFFr   TF)r   FF)Nr   NFr   )r   N)r   Nr   )r   r   )rk  )rk  NNr   TTF)r   r/  rw  r/  )r  r/  )
NfailTNNNNTFN)r   r  r  r  r  r  r   r/  r  )Fr  )aF)r  r   )NNFN)r   
__module____qualname____doc__staticmethodr$   get__dask_scheduler____dask_optimize__r   propertyr   r   rZ  cached_propertyr   r   r   r   r   r   setterr   r   r   r   r   r  r#  r%  r(  r.  r5  r7  r:  r=  r<  rB  rD  rF  r-  rK  rR  rT  r_  rb  re  r   rm  rq  rt  r  r   r  r   r  r  r  r  rI   r  r  r  rx   r  r  r4  r  ry  r  r  r  r  r  r  productr  r  r'  r)  r=  kurtrD  rJ  rM  rP  rR  rW  r   r^  r`  rd  r  r  r  r  rl  rn  rq  ry  r{  r  r   r  r  r  r  r  r7  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  classmethodr  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  __classcell__r   s   @r   r   r   *  s	   0%Y(8(@A %%DE     ) ) # #. % % & & . . " " ^^7 793 L L-<65
&
(7B6"/H	" % %$
#
48=( 
 

 - ) ) / / \\ ;,()& ",,4
  4
l< / /*"2 '1""&%)n#n n  	n
 #n n n` ",,A  A
/< #r*
  N
 +N
` #r*  Z
 +Z
| #'"&%)__  _ #	_ _D 9='15'	'R 3 3DD ",, 8  8,  ",, 8  8* G",, 
  
& ",, ]  ]~B ",, 8  8t ",, =  =~ D",,NS  :"  OTL M ",,CH
  
 ",,V  V ",,B  B ",,B  B ",,S  S ",,S  S ",,V  V ",,J  J ",,/  / ",,8  8 ",,>  > ",,A  A ",,0  0$L ",,9  9 ",, " <  < ",,!vv ;  ; ",,!%Zu K  K ",,3  3 ",,3  3 ",,7  7 ",,
  
" ",,
  
, ",,z:AS  S
 ",,C  CQ( ",,?  ?
 ",,@  @
 ",,?  ?
 ",,?  ? M
^Q*     ",,2  2 ",,  GYR "#IT4
R 
R8 

 

 
 
B30
22F>:=, "))( 
T 
T ",,
/  
/.
`"  r   r   c                 P     t        | d         t        j                  | i |      S r  )rz   r   r  )r   r   s     r   r  r  J
  s)     ,"47+AEE4,B6,BCCr   )__add____radd____sub____rsub____mul____rmul____mod____rmod____floordiv____rfloordiv____truediv____rtruediv____pow____rpow____lt____rlt____gt____rgt____le____rle____ge____rge____eq____ne____and____rand____or____ror____xor____rxor__)r   )
__invert____neg____pos__c                  |    e Zd ZU dZ e       Zded<   ej                  Z	e
d        Ze
d        Ze
d        Z eej                        d        Ze
d        Zd	 Zd
 Zd Z eej                        d        Z eej                        dSd       Ze
d        Zd ZdTdZd Zd Z eej                        d        Z eej                        dUd       Z	 	 	 	 	 	 	 	 	 	 	 dVdZ eej                        	 	 	 	 	 	 dWd       Z eej                        	 dXd       Zd Z d Z!d Z" fdZ#d Z$dYdZ% eej                        dZd       Z& eej                        dZd        Z' eej                        d[d!       Z( eej                        d\d"       Z) eej                  d#$      	 	 	 	 	 	 d]d%       Z* e+d&'      e,d(d)d*       Z- eej                        e,de,fd+       Z.e/d^d_d,       Z0 eej                  d-g.      dYd/       Z1 eej                        dTd0       Z2 eej                        d1        Z3 eej                        d`d2       Z4d3 Z5 eej                        dYd4       Z6 eej                        d5        Z7	 	 	 	 	 	 	 	 	 da	 	 	 	 	 	 	 	 	 dbd6Z8	 	 	 	 	 	 	 	 	 dc	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddd7Z9d8 Z: eej                        ded9       Z; eej                        d:        Z< eej                        d;        Z=dfd<Z>e
d=        Z?d> Z@dgd?ZAdgd@ZBdgdAZCdgdBZDdgdCZEdgdDZFdUdEZG eej                        dhdF       ZHdidGZI eej                        djdH       ZJdkdIZK eej                        	 	 	 	 	 dldJ       ZL eej                        dK        ZMdmdLZN eej                        dmdM       ZO eej                        	 	 	 	 dndN       ZP eej                        dodO       ZQ eej                        dodP       ZR eej                        	 	 	 	 	 dpdQ       ZSdR ZT xZUS )qr   zDataFrame-like Expr Collection.

    The constructor takes the expression that represents the query as input. The class
    is not meant to be instantiated directly. Instead, use one of the IO connectors from
    Dask.
    ClassVar[set[str]]
_accessorsc                    | j                   t        t        | j                        d      z  t        | j                        fS r  )r   rW  r   r   r   s    r   shapezDataFrame.shape
  s/    yyCDLL 1155s4<<7HHHr   c                     y)Return dimensionalityr   r   r   s    r   r   zDataFrame.ndim
       r   c                    t        d      )NzChecking whether a Dask DataFrame has any rows may be expensive. However, checking the number of columns is fast. Depending on which of these results you need, use either `len(df.index) == 0` or `len(df.columns) == 0`)ru   r   s    r   emptyzDataFrame.empty
  s     +=
 	
r   c              #  t   K   t        | j                        D ]  \  }}|| j                  d d |f   f  y wr   )	enumerater   r   )r   r   r   s      r   itemszDataFrame.items
  s9      . 	*GAt1a4))	*s   68c                2    | j                   | j                  gS r   r   r   s    r   axeszDataFrame.axes
  s    

DLL))r   c                    || j                   v S r   )r   )r   r\  s     r   __contains__zDataFrame.__contains__
  s    djj  r   c                ,    t        | j                        S r   )iterr   r   s    r   __iter__zDataFrame.__iter__
  s    DJJr   c                    ddl m}  ||       S )Nr   )DaskDataFrameInterchange)%dask.dataframe.dask_expr._interchangerr  )r   r   r   rr  s       r   __dataframe__zDataFrame.__dataframe__
  s    R'--r   c              #     K   | j                         }t        | j                        D ]9  }|j                  |      j	                         }|j                         E d {    ; y 7 wr   )r-  r  r   r  r   iterrows)r   r   r   dfs       r   rv  zDataFrame.iterrows
  sV     t''( 	%A$$Q'//1B{{}$$	%$s   AA'A%A'c              #     K   | j                         }t        | j                        D ]<  }|j                  |      j	                         }|j                  ||      E d {    > y 7 wNr   r   )r-  r  r   r  r   
itertuples)r   r   r   r   r   rw  s         r   r{  zDataFrame.itertuples
  s]     t''( 	=A$$Q'//1B}}5t}<<<	=<s   AA* A(!A*c                    t         S r   elemwiser   s    r   	_elemwisezDataFrame._elemwise
      r   c                   |j                  dd      }||z   D ]  }t        |t        j                        r|j                  dk(  r-t        |t
        t        t        t        t        j                  t        j                  t        j                  f      r{t        c S  |dk(  r6|j                  t        S |j                  dkD  rt        S t!        |g|i |S t        S Nr/  r   __call__r   r0  r   r  r  ra  r
   r3  r   r!   r  r   r   r  NotImplemented	signaturenoutr~  r   numpy_ufuncr   inputsr   r/  r  s          r   __array_ufunc__zDataFrame.__array_ufunc__
      jj## 	&A !RZZ(QWW]FFIubllBIIrxxX &%	& Z$$0%%!# &%?f??? "!r   Nc                   t        |t              rYt        |      dkD  rKt        |d   d   t        j                        r|d   d   j
                  dk(  rd }nA|d   d   j                  }n.	 dd l}d |j                         d   d    d}t        | d       t        |       ||| j                        S # t        $ r d}Y 9w xY w)	Nr   r   r   `r4  This methodz3 is not implemented for `dask.dataframe.DataFrame`.r   )r   r  r   r  r  ra  r   inspectstack
IndexErrorr   ry   r   r   r'  contextr   r  method_names         r   __array_wrap__zDataFrame.__array_wrap__
  s    gu%#g,*:'!*Q-4A9L9LPR9R
1++, !-'--/!"4Q"7!8: &-RS  ,%d+EUU  ,+,   +B6 6CCc                @    t        j                  | j                        S r   )r  r  r   r   s    r   _ipython_key_completions_z#DataFrame._ipython_key_completions_
  s    ~~dll++r   c                "    | j                         S r   )to_htmlr   s    r   _repr_html_zDataFrame._repr_html_
  s    ||~r   c           
        | }g }|j                         D ]  \  }}t        |g      d   }t        |t              st	        dt        |             t        |      rHt        t        j                  |g|       }g }t        t        j                  || ||                  }t        |t        t        f      rt        |t              rJt        j                  |j                  |j                        st        |      dkD  rt        j                  |g| }g }t        t        j                  |||j                              }4t        |t              st        |t               rnt        |t"              rt        |j$                        dkD  rt'        d      |j(                  |j(                  k7  r&t'        d|j(                   d|j(                   d      t+        ||j,                  |j.                        }nt	        d	t        |             |j1                  ||g        t        |      dkD  rt        t        j                  |g|       }|S )
Nr   zColumn name cannot be type r   z)Array assignment only supports 1-D arraysz#Number of partitions do not match (r  ))r   rQ  z'Column assignment doesn't support type )ri  rj   r   r  r  r
  rY  r   r   Assignr3  r   r   r   AssignAlignr   r   r!   ra  r   r   r   r   r   extend)r   pairsr   r   r   r{  s         r   assignzDataFrame.assign
  s   KKM !	 DAq"A3'*Aa%"=d1gY GHH{'F(BT(BC'FAqy(IJA/0a(..v{{AFFCt9q=%)[[%?$%?F#%D!/0@0@AFF0S!T 9-*Q2IAu%qww<!#$%PQQ==F$6$66$MM?$v/A/A.B!E  $AV\\M"I$q' STTKKAC!	 F t9q=#DKK$>$>?Fr   c                    | j                  |      }|dk(  r#| j                  t        j                  |||      S t	        | j
                  j                  |||            S r  )rl   r  r   clipr   r   r   lowerupperr   r   s        r   r  zDataFrame.clip  sS    ""4(19&&qvvue$&GGdiinnUE4@AAr   c                2    t        | |||||||||	|
||      S )u7  Merge the DataFrame with another DataFrame

        This will merge the two datasets, either on the indices, a certain column
        in each dataset or the index in one dataset and the column in another.

        Parameters
        ----------
        right: dask.dataframe.DataFrame
        how : {'left', 'right', 'outer', 'inner', 'leftsemi'}, default: 'inner'
            How to handle the operation of the two objects:

            - left: use calling frame's index (or column if on is specified)
            - right: use other frame's index
            - outer: form union of calling frame's index (or column if on is
              specified) with other frame's index, and sort it
              lexicographically
            - inner: form intersection of calling frame's index (or column if
              on is specified) with other frame's index, preserving the order
              of the calling's one
            - leftsemi: Choose all rows in left where the join keys can be found
              in right. Won't duplicate rows if the keys are duplicated in right.
              Drops all columns from right.

        on : label or list
            Column or index level names to join on. These must be found in both
            DataFrames. If on is None and not merging on indexes then this
            defaults to the intersection of the columns in both DataFrames.
        left_on : label or list, or array-like
            Column to join on in the left DataFrame. Other than in pandas
            arrays and lists are only support if their length is 1.
        right_on : label or list, or array-like
            Column to join on in the right DataFrame. Other than in pandas
            arrays and lists are only support if their length is 1.
        left_index : boolean, default False
            Use the index from the left DataFrame as the join key.
        right_index : boolean, default False
            Use the index from the right DataFrame as the join key.
        suffixes : 2-length sequence (tuple, list, ...)
            Suffix to apply to overlapping column names in the left and
            right side, respectively
        indicator : boolean or string, default False
            If True, adds a column to output DataFrame called "_merge" with
            information on the source of each row. If string, column with
            information on source of each row will be added to output DataFrame,
            and column will be named value of string. Information column is
            Categorical-type and takes on a value of "left_only" for observations
            whose merge key only appears in `left` DataFrame, "right_only" for
            observations whose merge key only appears in `right` DataFrame,
            and "both" if the observation’s merge key is found in both.
        npartitions: int or None, optional
            The ideal number of output partitions. This is only utilised when
            performing a hash_join (merging on columns only). If ``None`` then
            ``npartitions = max(lhs.npartitions, rhs.npartitions)``.
            Default is ``None``.
        shuffle_method: {'disk', 'tasks', 'p2p'}, optional
            Either ``'disk'`` for single-node operation or ``'tasks'`` and
            ``'p2p'``` for distributed operation.  Will be inferred by your
            current scheduler.
        broadcast: boolean or float, optional
            Whether to use a broadcast-based join in lieu of a shuffle-based
            join for supported cases.  By default, a simple heuristic will be
            used to select the underlying algorithm. If a floating-point value
            is specified, that number will be used as the ``broadcast_bias``
            within the simple heuristic (a large number makes Dask more likely
            to choose the ``broacast_join`` code path). See ``broadcast_join``
            for more information.

        Notes
        -----

        There are three ways to join dataframes:

        1. Joining on indices. In this case the divisions are
           aligned using the function ``dask.dataframe.multi.align_partitions``.
           Afterwards, each partition is merged with the pandas merge function.

        2. Joining one on index and one on column. In this case the divisions of
           dataframe merged by index (:math:`d_i`) are used to divide the column
           merged dataframe (:math:`d_c`) one using
           ``dask.dataframe.multi.rearrange_by_divisions``. In this case the
           merged dataframe (:math:`d_m`) has the exact same divisions
           as (:math:`d_i`). This can lead to issues if you merge multiple rows from
           (:math:`d_c`) to one row in (:math:`d_i`).

        3. Joining both on columns. In this case a hash join is performed using
           ``dask.dataframe.multi.hash_join``.

        In some cases, you may see a ``MemoryError`` if the ``merge`` operation requires
        an internal ``shuffle``, because shuffling places all rows that have the same
        index in the same partition. To avoid this error, make sure all rows with the
        same ``on``-column value can fit on a single partition.
        )r   	broadcast)merge)r   r   ro  r  left_onright_on
left_indexright_indexsuffixes	indicatorr  r   r  s                r   r  zDataFrame.merge%  s9    V #
 	
r   c           
        t        |t              st        |      st        |d      }t        |t              sIt	        |j
                        s4t        |j
                  d      rt        t        j                  |            }t        |t              st        |t              rt        d |D              st        d      |dvrt        d      |D cg c]  }t        |      st        |d      n| }}t        t        | j                  g|D cg c]  }|j                   c}z   |            S | j                  ||d u d	||||f||
      S c c}w c c}w )Nr   r1  r   c              3  <   K   | ]  }t        |t                y wr   r   r   )r   os     r   r   z!DataFrame.join.<locals>.<genexpr>  s      6-.
1i(6r   z-other must be DataFrame or list of DataFrames)rk  r   z-merge_multi only supports left or outer joins)ro  T)r   r  r  r  ro  r  r  r   )r   r  r#   r  r0   r   hasattrr   r   ToFramer   r^  r   rJ   r  )	r   r   r  ro  lsuffixrsuffixr  r   r  s	            r   r  zDataFrame.join  sO    %&/A%/H15E5$'%ekk2V,"4<<#67E%+eT*# 6276 3 !!PQQ++ !PQQ  6H5JA1-PQQE 
 "tyykU,CQVV,CCM  zzTzw')#  	
 		
 -Ds   !EEc                    ddl m} t        |t              rt        |t              st        d| d       || |f||||d|S )Nr   )GroupByz3`by` must be a column name or list of columns, got .)
group_keyssortobserveddropna)!dask.dataframe.dask_expr._groupbyr  r   r   r   r   )r   r	  r  r  r  r  r   r  s           r   r  zDataFrame.groupby  se     	>b)$ZF-CEbTK  
 "
 
 	
r   c                   t        |t        t        f      rNt        |t              r> | j                  di t        ||j                        D ci c]  \  }}|||    c}}}nt        |t        j                        r@t        |t              s0 | j                  di t        j                  t        |      |      }n|t        |      s!t        |      st        |t        t        f      r| j                  | |      }n<t        |t              st!        dt#        |       d       | j                  di ||i}|j$                  | _        y c c}}w )NzItem assignment with z not supportedr   )r   r  r  r   r  zipr   r  r  dictfromkeysr0   r1   r   r  r  r   r
  r4   )r   r\  ri  r   r  r/  s         r   __setitem__zDataFrame.__setitem__  s   cE4=)j	.J$++RS%--9P QAE!H QRCRXX&z%/K$++@d3i ?@Cc"c"#	623**cT5)CC%%(=d3i[&WXX$++-e-CYY
 !Rs   Ec                x    | j                   D cg c]
  }||k7  s	| }}| |   }|j                  | _        y c c}w r   )r   r4   )r   r\  r  r   r/  s        r   __delitem__zDataFrame.__delitem__  s8    "ll7a3h1777mYY
 8s   
77c                    	 t         j                  | d      j                  }||v r
|dvr|| |<   y t         j	                  | ||       y # t        $ r d}Y 3w xY w)Nr4   r   )r   r=  r  r   r4   )rV  rW  r   rX  __setattr__)r   r\  ri  r   s       r   r  zDataFrame.__setattr__  sg    	--dG<DDG
 '>c *
 
 DItS%0  	G	s    A AAc                   	 t         j                  | |      S # t        $ rj}	 || j                  j                  j
                  v rt        | j                  |         cY d }~S |# t        $ r t        | !  |      cY cY d }~S w xY wd }~ww xY wr   )	rV  rW  rX  r   r   r   r   ra  r_  )r   r\  r]  r   s      r   r_  zDataFrame.__getattr__  s~    	0**455 		00 $))//111)$))C.99	! 0w*3//0		0s>    	B9A$B"A$$B<B=BBBBc                .   t        t        t        |                   }|j                  | j                         |j                  t        t        t
        j                                     |j                  d | j                  D               t        |      S )Nc              3  d   K   | ](  }t        |t              s|j                         s%| * y wr   )r   r  isidentifierr  s     r   r   z$DataFrame.__dir__.<locals>.<genexpr>!  s"     VqZ3-?ANNDTVs   000)	r  dirr
  r  __dict__r   r   r   r  r   r  s     r   __dir__zDataFrame.__dir__  s\    DJ 		S^$%	VDLLVVAwr   c                   t         st        dt         d      |Ht        j                  t
        j                  | ||d      }t        j                  t        |d             t        t        j                  | |||            S )Nz1DataFrame.map requires pandas>=2.1.0, but pandas=z is installed.T	na_actionudfmapr   r   r  rQ  )r&   r   r(   r   emulater   r  r   r   r2   r   Map)r   r  r  rQ  s       r   r  zDataFrame.map$  st    %CNCS T   <<<tTYDQDMM,tE:;dhhtQUVWWr   c                2    t        t        | |||            S N)ro  _columnsr  r   rY   r   ro  r   r  s       r   nlargestzDataFrame.nlargest/  s    TQkJ
 	
r   c                2    t        t        | |||            S r  r   rZ   r  s       r   	nsmallestzDataFrame.nsmallest5  s    da'{K
 	
r   c                0    t        t        | ||            S N)rw  _indexr   rV   r   rw  r   s      r   memory_usagezDataFrame.memory_usage;      .t$uMNNr   c           	        | j                  |d      }t        j                  | j                  |j                        s"t        t        j                  | ||||            S t        t        j
                  | ||||            S rj  )r   r   r   r   CombineFrameAlignCombineFrame)r   r   r  r   	overwrites        r   r  zDataFrame.combine?  st    ,,UG<""499ejj9!&&tUD*iP  dE4YG
 	
r   z/keep=False will raise a ``NotImplementedError``)inconsistenciesc                    t        |      }|du rt        d      t        |      }t        | j                        j                  ||       t        t        | ||||||            S )NFdrop_duplicates with keep=False)subsetkeep)r  r  	split_outr  r  r  )rh   r   rg   rs   r   drop_duplicatesr   rQ   )r   r  r  r  r  r  r  s          r   r  zDataFrame.drop_duplicatesJ  sq     7~F5=%&GHH!&)djj!11d1K)#'-

 
	
r   r  r  r   rQ  r   c               @   | j                  |      }|dk(  rd}t        |      |t        u rHt        j                  t
        j                  | |f|d|d|}t        j                  t        |             t         | j                  j                  |g|||d|      S )a  Parallel version of pandas.DataFrame.apply

        This mimics the pandas version except for the following:

        1.  Only ``axis=1`` is supported (and must be specified explicitly).
        2.  The user should provide output metadata via the `meta` keyword.

        Parameters
        ----------
        func : function
            Function to apply to each column/row
        axis : {0 or 'index', 1 or 'columns'}, default 0
            - 0 or 'index': apply function to each column (NOT SUPPORTED)
            - 1 or 'columns': apply function to each row
        $META
        args : tuple
            Positional arguments to pass to function in addition to the array/series

        Additional keyword arguments will be passed as keywords to the function

        Returns
        -------
        applied : Series or DataFrame

        Examples
        --------
        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        Apply a function to row-wise passing in extra arguments in ``args`` and
        ``kwargs``:

        >>> def myadd(row, a, b=1):
        ...     return row.sum() + a + b
        >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5)  # doctest: +SKIP

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with name ``'x'``, and dtype
        ``float64``:

        >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5, meta=('x', 'f8'))

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ddf.apply(lambda row: row + 1, axis=1, meta=ddf)

        See Also
        --------
        DataFrame.map_partitions
        r   zGDask DataFrame.apply only supports axis=1
  Try: df.apply(func, axis=1)T)r   r  r   r  )rl   r   rI   r   r  r   r  r   r   r2   r   )r   functionrQ  r   r   r   r   s          r   r  zDataFrame.applyi  s    | ""4(190  &c**:<<x.24KQD MM,t,-DIIOOHLtL$TLVL
 	
r   c                    |t         ur|t         urt        d      t        |      }t        t	        j
                  | |||            S )NzBYou cannot set both the how and thresh arguments at the same time.)ro  r  thresh)rI   r  rg   r   r   DropnaFrame)r   ro  r  r  s       r   r  zDataFrame.dropna  sN    j V:%=T  "&)Ts6&I
 	
r   c                \    |dvrt        d|       |rddd}|j                  ||      S |S )N)r   r   r   r   NNo axis named r   r   r   r   r0  r  r   numeric_axisnum_axiss       r   rl   zDataFrame._validate_axis  sA    77~dV455BCPQ8RH<<d++Kr   r   ua_argsc                \    |t        d      t        t        j                  | |            S )NzCannot rename index.r   )r   r   r   RenameFrame)r   r   r   s      r   renamezDataFrame.rename  s,    344d..tWEFFr   c                    |dv r,t        | j                        dk(  r| | j                  d      S | S |dk(  rt        t        |        d      t	        d| dt        |              )Nr  r   r   z& does not support squeeze along axis 0zNo axis z for object type )r   r   r   r
  r   )r   r   s     r   squeezezDataFrame.squeeze  su    94<< A%DLLO,,QY%:,DE 
 xv->tDzlKLLr   c                X    t        |      }t        t        j                  | |            S )N)column)rg   r   r   ExplodeFrame)r   r  s     r   explodezDataFrame.explode  s$    !&)d//VDEEr   c                    ||t        d      t        |      }|dk(  r|xs |}n|dk(  r|t        d      t        t	        j
                  | ||            S )Nz)must either specify 'columns' or 'labels'r   r   z@Drop currently only works for axis=1 or when columns is not None)r   errors)r  rl   r   r   r   Drop)r   labelsr   r   r  s        r   rl  zDataFrame.drop  sg    ?v~GHHd#19'GQY7?%R  diigfMNNr   c                "    ddl m}  || |fi |S )Nr   )
to_parquet)#dask.dataframe.dask_expr.io.parquetr  )r   r  r   r  s       r   r  zDataFrame.to_parquet  s    B$///r   c                    t        | j                  j                  ||      j                        }t	        | j
                  |         S )Nr0  )r  r   r  r   r   r   )r   r  r1  r   s       r   r  zDataFrame.select_dtypes  s?    JJ$$Wg$FNN
 dii011r   c                N    d|v rt        d      t        t        | ||            S )Ninplacez!inplace is not supported for eval)r4   expr_kwargs)r   r   r@   r   r   r   s      r   evalzDataFrame.eval  s*    %&IJJd4tHIIr   c                2   t        |t              rt        |      dk(  r|d   }t        |t              r,t        d |D              rt	        d      t        d| d      t        |t              r"t        dt        |j                         d      t        |t              r%|j                  | j                  j                  k(  r| S || j                  j                  k(  r| S |t        |       |s|s|t        d	      |rU|(t        |      dz
  | j                  k7  rd
}t        |      t        t!        | ||||
            }|j#                  d      S |st        t!        | ||d|
            S t        t%        | ||||||	t'        |      |
|
            S )aB  Set the DataFrame index (row labels) using an existing column.

        If ``sort=False``, this function operates exactly like ``pandas.set_index``
        and sets the index on the DataFrame. If ``sort=True`` (default),
        this function also sorts the DataFrame by the new index. This can have a
        significant impact on performance, because joins, groupbys, lookups, etc.
        are all much faster on that column. However, this performance increase
        comes with a cost, sorting a parallel dataset requires expensive shuffles.
        Often we ``set_index`` once directly after data ingest and filtering and
        then perform many cheap computations off of the sorted dataset.

        With ``sort=True``, this function is much more expensive. Under normal
        operation this function does an initial pass over the index column to
        compute approximate quantiles to serve as future divisions. It then passes
        over the data a second time, splitting up each input partition into several
        pieces and sharing those pieces to all of the output partitions now in
        sorted order.

        In some cases we can alleviate those costs, for example if your dataset is
        sorted already then we can avoid making many small pieces or if you know
        good values to split the new index column then we can avoid the initial
        pass over the data. For example if your new index is a datetime index and
        your data is already sorted by day then this entire operation can be done
        for free. You can control these options with the following parameters.

        Parameters
        ----------
        other: string or Dask Series
            Column to use as index.
        drop: boolean, default True
            Delete column to be used as the new index.
        sorted: bool, optional
            If the index column is already sorted in increasing order.
            Defaults to False
        npartitions: int, None, or 'auto'
            The ideal number of output partitions. If None, use the same as
            the input. If 'auto' then decide by memory use.
            Only used when ``divisions`` is not given. If ``divisions`` is given,
            the number of output partitions will be ``len(divisions) - 1``.
        divisions: list, optional
            The "dividing lines" used to split the new index into partitions.
            For ``divisions=[0, 10, 50, 100]``, there would be three output partitions,
            where the new index contained [0, 10), [10, 50), and [50, 100), respectively.
            See https://docs.dask.org/en/latest/dataframe-design.html#partitions.
            If not given (default), good divisions are calculated by immediately computing
            the data and looking at the distribution of its values. For large datasets,
            this can be expensive.
            Note that if ``sorted=True``, specified divisions are assumed to match
            the existing partitions in the data; if this is untrue you should
            leave divisions empty and call ``repartition`` after ``set_index``.
        inplace: bool, optional
            Modifying the DataFrame in place is not supported by Dask.
            Defaults to False.
        sort: bool, optional
            If ``True``, sort the DataFrame by the new index. Otherwise
            set the index on the individual existing partitions.
            Defaults to ``True``.
        shuffle_method: {'disk', 'tasks', 'p2p'}, optional
            Either ``'disk'`` for single-node operation or ``'tasks'`` and
            ``'p2p'`` for distributed operation.  Will be inferred by your
            current scheduler.
        compute: bool, default False
            Whether or not to trigger an immediate computation. Defaults to False.
            Note, that even if you set ``compute=False``, an immediate computation
            will still be triggered if ``divisions`` is ``None``.
        partition_size: int, optional
            Desired size of each partitions in bytes.
            Only used when ``npartitions='auto'``

        Examples
        --------
        >>> import dask
        >>> ddf = dask.datasets.timeseries(start="2021-01-01", end="2021-01-07", freq="1h").reset_index()
        >>> ddf2 = ddf.set_index("x")
        >>> ddf2 = ddf.set_index(ddf.x)
        >>> ddf2 = ddf.set_index(ddf.timestamp, sorted=True)

        A common case is when we have a datetime column that we know to be
        sorted and is cleanly divided by day.  We can set this index for free
        by specifying both that the column is pre-sorted and the particular
        divisions along which is is separated

        >>> import pandas as pd
        >>> divisions = pd.date_range(start="2021-01-01", end="2021-01-07", freq='1D')
        >>> divisions
        DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
                       '2021-01-05', '2021-01-06', '2021-01-07'],
                      dtype='datetime64[ns]', freq='D')

        Note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
        represents the upper and lower bounds of each partition. The first item is the
        lower bound of the first partition, the second item is the lower bound of the
        second partition and the upper bound of the first partition, and so on.
        The second-to-last item is the lower bound of the last partition, and the last
        (extra) item is the upper bound of the last partition.

        >>> ddf2 = ddf.set_index("timestamp", sorted=True, divisions=divisions.tolist())

        If you'll be running `set_index` on the same (or similar) datasets repeatedly,
        you could save time by letting Dask calculate good divisions once, then copy-pasting
        them to reuse. This is especially helpful running in a Jupyter notebook:

        >>> ddf2 = ddf.set_index("name")  # slow, calculates data distribution
        >>> ddf2.divisions  # doctest: +SKIP
        ["Alice", "Laura", "Ursula", "Zelda"]
        >>> # ^ Now copy-paste this and edit the line above to:
        >>> # ddf2 = ddf.set_index("name", divisions=["Alice", "Laura", "Ursula", "Zelda"])
        r   r   c              3  <   K   | ]  }t        |t                y wr   r  r  s     r   r   z&DataFrame.set_index.<locals>.<genexpr>  s     ;:a+;r   z*List[FrameBase] not supported by set_indexzWDask dataframe does not yet support multi-indexes.
You tried to index with this index: z%
Indexes must be single columns only.zgDask dataframe does not yet support multi-indexes.
You tried to index with a frame with these columns: NzfSpecifying npartitions with sort=False or sorted=True is not supported. Call `repartition` afterwards.ae  When doing `df.set_index(col, sorted=True, divisions=...)`, divisions indicates known splits in the index column. In this case divisions must be the same length as the existing divisions in `df`

If the intent is to repartition into new divisions after setting the index, you probably want:

`df.set_index(col, sorted=True).repartition(divisions=divisions)`)new_divisionsru  T)r  )ru  )user_divisionsr   upsampler  r  ru  r  )r   r  r   r   r  r   r   r   r   r  r   r   r.   r   r   r   rb   r  ra   rm   )r   r   rl  rO  r   r   r  r  r   r  ru  r  r   r   s                 r   	set_indexzDataFrame.set_index  s   t eT"s5zQ!HEeT";U;; LMM);;@' B;; 
 eY'%GGKEMMGZF[ \77 
 eV${{djj...djjoo%K I&$K$;< 
 $Y!);t?O?O)OX  !o%#!%YvF
 33$3GG!!$tT&I  ('!-4^D
 	
r   c                Z   |dvrt        d      t        |t              s|g}t        d |D              rt	        dt        |      z        t        |t              s(t        |      t        |      k(  st        d|d|      t        t        | |||||||||	t        |
      |            S )a  Sort the dataset by a single column.

        Sorting a parallel dataset requires expensive shuffles and is generally
        not recommended. See ``set_index`` for implementation details.

        Parameters
        ----------
        by: str or list[str]
            Column(s) to sort by.
        npartitions: int, None, or 'auto'
            The ideal number of output partitions. If None, use the same as
            the input. If 'auto' then decide by memory use.
        ascending: bool, optional
            Sort ascending vs. descending.
            Defaults to True.
        na_position: {'last', 'first'}, optional
            Puts NaNs at the beginning if 'first', puts NaN at the end if 'last'.
            Defaults to 'last'.
        sort_function: function, optional
            Sorting function to use when sorting underlying partitions.
            If None, defaults to ``M.sort_values`` (the partition library's
            implementation of ``sort_values``).
        sort_function_kwargs: dict, optional
            Additional keyword arguments to pass to the partition sorting function.
            By default, ``by``, ``ascending``, and ``na_position`` are provided.

        Examples
        --------
        >>> df2 = df.sort_values('x')  # doctest: +SKIP
        )r   lastz,na_position must be either 'first' or 'last'c              3  >   K   | ]  }t        |t                 y wr   r  )r   bs     r   r   z(DataFrame.sort_values.<locals>.<genexpr>  s     2!:a%%2r  zuDataframes only support sorting by named columns which must be passed as a string or a list of strings.
You passed %szLength of ascending=z != length of by=)r  )r   r   r  r   r   r  r/  r   r   rc   rm   )r   r	  r   	ascendingna_positionr  sort_functionsort_function_kwargsr   r  r  r  s               r   sort_valueszDataFrame.sort_values  s    X //KLL"d#B2r22% "%b'*  )T*3y>SW3L4)5GB5IJJ$%n5
 	
r   c                .    t        t        | ||            S )a  Filter dataframe with complex expression

        Blocked version of pd.DataFrame.query

        Parameters
        ----------
        expr: str
            The query string to evaluate.
            You can refer to column names that are not valid Python variable names
            by surrounding them in backticks.
            Dask does not fully support referring to variables using the '@' character,
            use f-strings or the ``local_dict`` keyword argument instead.

        Notes
        -----
        This is like the sequential version except that this will also happen
        in many threads.  This may conflict with ``numexpr`` which will use
        multiple threads itself.  We recommend that you set ``numexpr`` to use a
        single thread:

        .. code-block:: python

            import numexpr
            numexpr.set_num_threads(1)

        See also
        --------
        pandas.DataFrame.query
        pandas.eval

        Examples
        --------
        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 1, 2],
        ...                    'y': [1, 2, 3, 4],
        ...                    'z z': [4, 3, 2, 1]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        Refer to column names directly:

        >>> ddf.query('y > x').compute()
           x  y  z z
        2  1  3    2
        3  2  4    1

        Refer to column name using backticks:

        >>> ddf.query('`z z` > x').compute()
           x  y  z z
        0  1  1    4
        1  2  2    3
        2  1  3    2

        Refer to variable name using f-strings:

        >>> value = 1
        >>> ddf.query(f'x == {value}').compute()
           x  y  z z
        0  1  1    4
        2  1  3    2

        Refer to variable name using ``local_dict``:

        >>> ddf.query('x == @value', local_dict={"value": value}).compute()
           x  y  z z
        0  1  1    4
        2  1  3    2
        )r   rC   r  s      r   queryzDataFrame.query  s    L eD$788r   c                    g }| j                         D ]]  \  }}|r4t        j                  j                  j	                  |j
                        s<|j                  |j                  ||             _ t        |d      S )Nr  r  r   r  )	ri  r  r  r  r   r   ru  r  concat)r   r  r  r  modesr  r!  s          r   r  zDataFrame.mode[  si    jjl 	KFAsBFFLL$A$A#))$LLL[IJ	K e!$$r   c                @    t        t        j                  | |            S r   )r   r   	AddPrefixr   prefixs     r   
add_prefixzDataFrame.add_prefixd      dnnT6:;;r   c                @    t        t        j                  | |            S r   )r   r   	AddSuffixr   suffixs     r   
add_suffixzDataFrame.add_suffixh  r6  r   c                     t        | ||||      S )a/  
        Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
        must have category dtype to infer result's ``columns``.
        ``index``, ``columns``, ``values`` and ``aggfunc`` must be all scalar.

        Parameters
        ----------
        values : scalar
            column to aggregate
        index : scalar
            column to be index
        columns : scalar
            column to be columns
        aggfunc : {'mean', 'sum', 'count'}, default 'mean'

        Returns
        -------
        table : DataFrame
        )pivot_table)r   r   r   ry  aggfuncs        r   r=  zDataFrame.pivot_tablel  s    ( 4AAr   c                    ddl m}  ||       S )aL  Purely integer-location based indexing for selection by position.

        Only indexing the column positions is supported. Trying to select
        row positions will raise a ValueError.

        See :ref:`dataframe.indexing` for more.

        Examples
        --------
        >>> df.iloc[:, [2, 0, 1]]  # doctest: +SKIP
        r   )ILocIndexer)r  r@  )r   r@  s     r   r   zDataFrame.iloc  s     	C4  r   c                d    |t        d      | j                  |      }t         || ||            S )Nr   r   rl   r   )r   expr_clsr   r   r   s        r   _comparison_opzDataFrame._comparison_op  s8    %&:;;""4(htUD9::r   c                F    | j                  t        j                  |||      S r   )rD  r   LTFramer   r   r   r   s       r   ltzDataFrame.lt      ""4<<tDDr   c                F    | j                  t        j                  |||      S r   )rD  r   LEFramerG  s       r   lezDataFrame.le  rI  r   c                F    | j                  t        j                  |||      S r   )rD  r   GTFramerG  s       r   gtzDataFrame.gt  rI  r   c                F    | j                  t        j                  |||      S r   )rD  r   GEFramerG  s       r   gezDataFrame.ge  rI  r   c                F    | j                  t        j                  |||      S r   )rD  r   NEFramerG  s       r   nezDataFrame.ne  rI  r   c                F    | j                  t        j                  |||      S r   )rD  r   EQFramerG  s       r   eqzDataFrame.eq  rI  r   c                   | }|j                   }|'t        |j                  g d      j                        }nt	        |      r|g}|D cg c]   }t        ||         rt        ||         s|" }}|durOt        |j                        rt        |j                         }n#|!t        |j                  j                        dv }t        |      s|du r|S ddlm}  |t        | |||            j                         \  }	}|	j                         D 
ci c]  \  }
}|
|j!                          }	}
} |t#        | |	|            S c c}w c c}}
w )a  Convert columns of the DataFrame to category dtype.

        .. warning:: This method eagerly computes the categories of the chosen columns.

        Parameters
        ----------
        columns : list, optional
            A list of column names to convert to categoricals. By default any
            column with an object dtype is converted to a categorical, and any
            unknown categoricals are made known.
        index : bool, optional
            Whether to categorize the index. By default, object indices are
            converted to categorical, and unknown categorical indices are made
            known. Set True to always categorize the index, False to never.
        split_every : int, optional
            Group partitions into groups of this size while performing a
            tree-reduction. If set to False, no tree-reduction will be used.
        kwargs
            Keyword arguments are passed on to compute.
        )rV  stringcategoryF)rV  rZ  r   r   )r   r   r  )r   r  r  r   r   rq   rv   r   r  r   r   $dask.dataframe.dask_expr._collectionr   r9   r   ri  r*  r8   )r   r   r   r  r   rw  rQ  r  r   
categoriesr   r{  s               r   
categorizezDataFrame.categorize  sN   * xx?4--.NOWWXGwiG
 
(a16J4PQ76S 
 
 #DJJ/0<<DJJ,,-1EE 7|IG +$u+V

') 	
E
 6@5E5E5GHTQa(H
H jz5ABB7
0 Is   
%EE
c           
         |dk(  r!t        t        j                  | ||            S t        | j	                         D cg c]'  \  }}|j                  ||      j                  |      ) c}}      S c c}}w )Nr   )r   r  r.  )r   r   NUniqueColumnsr/  ri  nuniquer  )r   r   r  r  r   r!  s         r   ra  zDataFrame.nunique  sp    19!$"5"5df"UVV &*ZZ\!c KKv;KGQQRVW s   ,A3
c                   g d}||vrt        d      t        t        | j                        j	                  |||            }|dk(  r@t        |t              rt        d      | j                  t        j                  ||d||      S |r2| j                  dt        j                  t        j                  g	      }n| }g }|j                         D ]'  \  }	}
|j                  |
j	                  ||
             ) t        |      dkD  r4t        |d   t               r!t#        |||j$                  j&                        S t)        |d      S )a  Approximate row-wise and precise column-wise quantiles of DataFrame

        Parameters
        ----------
        q : list/array of floats, default 0.5 (50%)
            Iterable of numbers ranging from 0 to 1 for the desired quantiles
        axis : {0, 1, 'index', 'columns'} (default 0)
            0 or 'index' for row-wise, 1 or 'columns' for column-wise
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use dask's internal custom
            algorithm (``'dask'``).  If set to ``'tdigest'`` will use tdigest
            for floats and ints and fallback to the ``'dask'`` otherwise.
        defaultr=  tdigest1method can only be 'default', 'dask' or 'tdigest')qr  r   r   z+'q' must be scalar when axis=1 is specifiedF)r  rQ  r  r  )r1  )rg  r   r   r  )r   rr   rs   r   quantiler   r  r  r   r  r  r6  
datetime64ri  ru  r   r3  _from_scalarsr   r   r/  )r   rg  r   r  r   allowed_methodsrQ  r   collectionsr  r!  s              r   rh  zDataFrame.quantile  sG    9(PQQ$**%..,T / 
 19!T" !NOO&&

!&) '   &&2>>2=="A ' E Ekkm 	AFAss||a|?@	A {aJ{1~v$F dEJJ4F4FGGk**r   c                f    |dk(  s| j                   dk(  r| j                  ||      S t        d      )Nr   r+  Dask doesn't implement an exact median in all cases as this is hard to do in parallel. See the `median_approximate` method instead, which uses an approximate algorithm.r   median_approximater   )r   r   r  s      r   medianzDataFrame.median&  s?    19((A-**<*PP!`
 	
r   c                H    | j                  |||      j                  d      S )a  Return the approximate median of the values over the requested axis.

        Parameters
        ----------
        axis : {0, 1, "index", "columns"} (default 0)
            0 or ``"index"`` for row-wise, 1 or ``"columns"`` for column-wise
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use Dask's internal custom
            algorithm (``"dask"``).  If set to ``"tdigest"`` will use tdigest
            for floats and ints and fallback to the ``"dask"`` otherwise.
        )r   r   r  N)rh  r  )r   r   r   r  s       r   rp  zDataFrame.median_approximate/  s*     }}f<  

&,	r   c                
   |||zt         j                  t         j                  t         j                  g}| j                  j                  |      j                  }t        |      dk(  rg| j                  j                  }nP|dk(  r$|t        d      | j                  j                  }n'| j                  j                  ||      j                  }|D cg c]  }| |   j                  |||       }	}t        |	d      S c c}w )	Nr  r   r^  z*exclude must be None when include is 'all'r0  )r  percentilespercentiles_methodr   r  )r  r  r6  ri  r   r  r   r   r   describer/  )
r   r  rt  ru  r  r1  _includer   r!  statss
             r   rv  zDataFrame.describe?  s     ?w		2>>2==AHjj..x.@HHG7|q **,," !MNNjj((Gjj..w.PXXG 
  I''#5  
 
 e!$$
s   D c                J    | |   }t        j                  | |g      | _        |S )Nr  )r   r  r4   )r   r  r/  s      r   popzDataFrame.pop_  s$    4jYYtdV4

r   c                <   |ddl }|j                  }t        t        |             j	                  dd      g}t        | j                        dk(  ro|j                  t        | j                  j                        j                   d       |j                  dt        |       j                          t        ||       yi }|r,|j                  | j                  | j                         d       |r| j                  dd	      |d
<   t        t!        |j#                         t%        |j'                                      }|rpddl}|d   }|d   }	|j                  t+        |             |j                  dt        | j                         d       ddlm t1        fd| j                  D              dz   }
t1        |
d      }|j3                  d      j5                  |      j5                  dd      }|j3                  dj5                  |            }t7        t!        | j                  |	j&                  | j8                              D cg c]6  \  }\  }}}|j5                   |       |       |       |            8 }}}}}|j;                  |j=                  d             nt+        | j                  d      g}|j;                  |       t?        | j8                  jA                         jC                         t              D cg c]  }d|z  	 }}|j                  dj5                  d jE                  |                   |r1|d
   jG                         }|j                  d!tI        |       d       t        ||       yc c}}}}w c c}w )"z5
        Concise summary of a Dask DataFrame
        Nr   z._collectionr  z: 0 entriesr  )r   r  T)rw  r   r  r   r  zData columns (total z
 columns):)pprint_thingc              3  @   K   | ]  }t         |              y wr   )r   )r   r   r|  s     r   r   z!DataFrame.info.<locals>.<genexpr>  s     CLO,Cs   r      z             #   {{column:<{column_width}}} Non-Null Count  Dtype
            ---  {{underl:<{column_width}}} --------------  -----)column_widthColumnz------)r  underlzP            {{i:^3}}  {{name:<{column_width}}} {{count}} non-null      {{dtype}})r   r   r  r   r  Columnsr  )r\  z%s(%d)z
dtypes: {}, zmemory usage: )%sysstdoutr  r
  r  r   r   ru  r   r   r   r   r  r  r  r  r  r   r   ry  textwraprw   pandas.io.formats.printingr|  rW  dedentr  rh  r   r  splitrO  value_countsri  r  r  r   )r   bufverboser  r  linescomputationsr  r   countsspacer  headercolumn_templater   r   r  r   column_infor   dtype_counts
memory_intr|  s                         @r   infozDataFrame.infoe  s-    ;**CT$Z((<=t||!LLD!1!12;;<KHILL6$t*"5"5!678c5! $**tzz| LM+/+<+<$d+<+SL(C 1 1 3Wl>Q>Q>S5TUV )E!'*FLLu-.LL/DLL0A/B*MN?CdllCCaGEua=L E
 \2x9  'ooTTZTZ!- U[ UO 09fmmT[[A0  ,A+eU  &&"1o%d+&u-&u-	 ' K  LLd+,(IFGK[!"()A)A)C)I)I)KQT"U
HqL
 
 	\((<)@AB%n599;JLL>+j*A)B"EF#u5"
s   ;N
Nc                (    | j                  |||      S r   )rM  )r   rF  r  r  s       r   rG  zDataFrame.cov  s    yylK@@r   c                *    | j                  ||||      S r   )rP  )r   r   rF  r  r  s        r   corrzDataFrame.corr  s     zz&+|[IIr   c                D    | j                         j                  |d      S )NFr  show_dimensionsr  r  r   r  s     r   r  zDataFrame.to_string  s!      **He*TTr   c                   | j                         j                  |d      }t        | j                         D ch c]  }|j                   c}      }t        d      j                  || j                  t        |d            S c c}w )NFr  zdataframe.html.j2r  )r  r   layers)r  r  r   r  r  r   renderr   )r   r  r  r"  r  s        r   r  zDataFrame.to_html  sv      (((E(Rtyy{3!agg34/077"6<8 8 
 	
 4s   Bc                $    t        | |||||      S )N)id_vars
value_varsvar_name
value_name	col_level)melt)r   r  r  r  r  r  s         r   r  zDataFrame.melt  s#     !!
 	
r   c           
     J   | j                   }| j                  }|j                  }t        |      dk(  r't	        j
                  g gt        |      z  ||      }|S t	        j                  |j                         D cg c]  \  }}t        ||       c}}d      }|S c c}}w )Nr   )r   r   r  r   r  )	r   r  r   r   r  r   r/  ri  r,   )r   rQ  r   cols	series_dfr  r  s          r   r  zDataFrame._repr_data  s    zz$$||t9>bTCJ%6ERI
  		?Czz|Ltq!"1E2LSTI  Ms   <B
)TPandasr   NNNr  NNNFF_x_yFNNN)Nr   r  r  NN)TNNNr  )r  NNFT)NT)NNTNFr   r(  r  r/  r  zNone | Literal[0, 1])Nr   Nraise)	TFNNTN      ?    AF)
r   r$  r  r/  r   floatr  r  ru  r/  )	NTr#  r  NNr  FN)r	  zstr | list[str]r   r$  r&  zbool | list[bool]r'  zLiteral['first', 'last']r  r  r(  z-Callable[[pd.DataFrame], pd.DataFrame] | Noner)  zMapping[str, Any] | Noner   r  r  zbool | Noner  r   )TFFrR  r  r%  )      ?r   Frd  )r   F)r   rd  FFNrd  NN)NFF)rO  NFFr  NNNri  N)Vr   r,  r-  r.  r  r_  __annotations__r  r   _partition_typer3  ra  r   rf  r   ri  rk  rm  rp  rt  rv  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r_  r  r  r  r  r  r  r  rx   rI   r  r  r8  rl   r  r
  r  rl  r  r  r  r!  r*  r,  r  r5  r;  r=  r   rD  rH  rL  rO  rR  rU  rX  r^  ra  rh  rq  rp  rv  rz  r  rG  r  r  r  r  r  r9  r:  s   @r   r   r   {
  s    &)UJ"*llOI I   
 
 ",,*  * * *! .
 ",,%  % ",,=  =  "2V(, ",,)  )V ",,B  B y
v ",, -
  -
^ ",,DH
  
($
1$0	X ",,
  

 ",,
  

 ",,O  O ",,
  
 
I 
	
6 #r**41 K
 +K
Z ",,#D 
  
   ",,	2G 3G
 ",,M  M ",,F  F ",,O  O0
 ",,2  2 ",,J  J "& %~

  ~
 ~
 ~
 ~
 ~
F #''+06 %GK9=$)%)I
I
  I
 %	I

 .I
 I
 EI
 7I
 I
 "I
 #I
VF9P ",,%  % ",,<  < ",,<  <B, ! ! ;EEEEEE8Ct ",,	  	3+j ",,
  
  ",, $%  %> ",,  
Ob ",,A  A ",, J  J ",,U  U ",,
  
 ",, 
  
"
r   r   c                  P    e Zd ZU dZ e       Zded<   ej                  Z	e
d        Ze
d        Ze
d        Ze
d        Zd Zd	 Z eej                        d
        Z fdZe
d        Zej*                  d        Ze
d        Ze
d        Zd ZdFdZ eej                        dGd       Z eej                        dHd       Z eej                        efd       Zd ZdIdZdIdZ dIdZ!dIdZ"dIdZ#dIdZ$ eej                        dddddefd        Z% eej                        dJd!       Z& eej                        dKd"       Z' eej                        dKd#       Z( eej                        dLd$       Z)dMd%Z* eej                        dNd&       Z+	 	 	 	 	 dOd'Z, e-d()      edd*d+       Z.e/dPdQd,       Z0 eej                        d-        Z1 eej                        d.        Z2 eej                        dRd/       Z3 eej                        dFd0       Z4 eej                        d1        Z5 eej                        d2        Z6 eej                        d3        Z7 e8d4e9      Z: e8d5e;      Z< e8d6e=      Z>dSd7Z? eej                        d8        Z@dTd9ZAdUd:ZB eej                        d;        ZCdVd<ZD eej                        dWd=       ZE eej                        dXd>       ZF eej                        dYd?       ZG eej                        	 	 	 	 	 dZd@       ZHe
 eej                        dA               ZIe
 eej                        dB               ZJ eej                        d[dC       ZKdD ZLeMr eej                        dE        ZN xZOS  xZOS )\r   zSeries-like Expr Collection.

    The constructor takes the expression that represents the query as input. The class
    is not meant to be instantiated directly. Instead, use one of the IO connectors from
    Dask.
    r^  r_  c                    | j                   fS )z
        Return a tuple representing the dimensionality of the DataFrame.

        The number of rows is a Delayed result. The number of columns
        is a concrete integer.
        )r   r   s    r   ra  zSeries.shape  s     		|r   c                    | j                   gS r   r  r   s    r   rk  zSeries.axes  s    

|r   c                     y)rc  r   r   r   s    r   r   zSeries.ndim  rd  r   c                    t         S r   r}  r   s    r   r  zSeries._elemwise  r  r   c                H   t        t        t        |                   }|j                  | j                         |j                  t        t        t
        j                                     dD ]*  }t        | j                  |      r|j                  |       , t        |      S )N)catr  )r  r  r
  r  r  r   r   r  r   remover  )r   r  accessors      r   r  zSeries.__dir__  sq    DJ 		S^$%& 	#H4::x0"	# Awr   c                    t        d      )NzJUsing 'in' to test for membership is not supported. Use the values insteadr   )r   r  s     r   rm  zSeries.__contains__  s    !X
 	
r   c              #     K   | j                         }t        | j                        D ]+  }|j                  |      j	                         }|E d {    - y 7 wr   )r-  r  r   r  r   )r   r   r   r  s       r   rp  zSeries.__iter__#  sN     t''( 	A##A&..0ALL	s   AAAAc                ~    t        |t              s| j                  dk(  rt        |   |      S | j
                  |   S r  )r   r   r   ra  r   r  r   r\  r   s     r   r   zSeries.__getitem__*  s8    c6"d&6&6!&;7&s++xx}r   c                B    | j                   j                  j                  S r   )r   r   r   r   s    r   r   zSeries.name/  s    yy###r   c                F    | j                  |      j                  | _        y Nr  )r  r4   r   r   s     r   r   zSeries.name3  s    [[t[,22
r   c                B    | j                   j                  j                  S r   )r   r   r   r   s    r   r   zSeries.dtype7  s    yy$$$r   c                @    t        | j                  j                        S )zNumber of bytes)r   r   r   r   s    r   r   zSeries.nbytes;  s     dii..//r   c                   |j                  dd      }||z   D ]  }t        |t        j                        r|j                  dk(  r-t        |t
        t        t        t        t        j                  t        j                  t        j                  f      r{t        c S  |dk(  r6|j                  t        S |j                  dkD  rt        S t!        |g|i |S t        S r  r  r  s          r   r  zSeries.__array_ufunc__@  r  r   Nc                   t        |t              rYt        |      dkD  rKt        |d   d   t        j                        r|d   d   j
                  dk(  rd }nA|d   d   j                  }n.	 dd l}d |j                         d   d    d}t        | d       t        |       ||| j                        S # t        $ r d}Y 9w xY w)	Nr   r   r   r  r4  r  z0 is not implemented for `dask.dataframe.Series`.rz  )r   r  r   r  r  ra  r   r  r  r  r   rz   r   r  s         r   r  zSeries.__array_wrap__Y  s    gu%#g,*:'!*Q-4A9L9LPR9R
1++, !-'--/!"4Q"7!8: &-OP  -&t,U%diiPP  ,+,r  c           	        t        |t              rot        j                  | j                  |j                        sE| t	        j
                  t        |d             t        t        j                  | |d ||            S |Ht        j                  t        j                  | ||d      }t	        j
                  t        |d             t        t        j                  | |||            S )Nr  r  )r   r  rQ  Tr  r  )r   r   r   r   r   r   r2   r   MapAlignr  r   r  r  )r   r   r  rQ  s       r   r  z
Series.mapl  s    c6"&&tyy#((;<MM,tE"BC%MM$	PTU  <<<tSI4PDMM,tE:;dhht	PTUVVr   c                p    | j                  |      }t        | j                  j                  |||            S r   )rl   r   r   r  r  s        r   r  zSeries.clipz  s.    ""4(diinnUE4@AAr   c                B    t        t        j                  | |            S Nr  )r   r   r  r  s     r   to_framezSeries.to_frame  s    dll4d;<<r   c                f    |t        d      | j                  |       t         || ||            S )Nr   )r   rB  )r   rC  r   r   r   r   s         r   rD  zSeries._comparison_op  s6    %&:;;D!htUzJKKr   r   c                H    | j                  t        j                  ||||      S r   )rD  r   LTSeriesr   r   r   r   r   s        r   rH  z	Series.lt      ""4==%
DQQr   c                H    | j                  t        j                  ||||      S r   )rD  r   LESeriesr  s        r   rL  z	Series.le  r  r   c                H    | j                  t        j                  ||||      S r   )rD  r   GTSeriesr  s        r   rO  z	Series.gt  r  r   c                H    | j                  t        j                  ||||      S r   )rD  r   GESeriesr  s        r   rR  z	Series.ge  r  r   c                H    | j                  t        j                  ||||      S r   )rD  r   NESeriesr  s        r   rU  z	Series.ne  r  r   c                H    | j                  t        j                  ||||      S r   )rD  r   EQSeriesr  s        r   rX  z	Series.eq  r  r   FTc                p   |t         u r[t        | j                  t              r?| j                  j
                  r&dt        | j                  j                        dz  z   }nd}nd}|dk(  r|dur|d}d }|dkD  s|du r!|r|s| n| j                         }t        |      }t        t        | |||||||            S )Nr   i T)rI   r   r   r   r  knownr   r]  r  rU   r   r]   )	r   r  r&  r  	normalizer  r  lengthr   s	            r   r  zSeries.value_counts  s     
"$**&6788>> !C

(=(=$>'$I II $I 	>it3DMY$.I &DDKKMEZFdIvy+yRX
 	
r   c                L    t        | j                  j                  ||            S r   )r   r   r  )r   r  r  s      r   r  zSeries.mode  s    diinnV[ABBr   c                0    t        t        | ||            S N)ro  r  r  r   ro  r  s      r   r  zSeries.nlargest  s    htqkJKKr   c                0    t        t        | ||            S r  r  r  s      r   r  zSeries.nsmallest  s    i{KLLr   c                0    t        t        | ||            S r  r  r  s      r   r  zSeries.memory_usage  r  r   c                F    t        |      }t        t        | |||            S )z
        Return Series of unique values in the object. Includes NA values.

        Returns
        -------
        uniques : Series
        )rh   r   r\   )r   r  r  r  s       r   uniquezSeries.unique  s$     7~FfT;	>RSSr   c                    | j                  ||      }|r0t        |t              r|j                         }|j	                         S |j
                  S )N)r  r  )r  r   r  r  r  r   )r   r  r  r  uniqss        r   ra  zSeries.nunique  sF    $$	$R%');;= ::r   c           
     j    t        |      }|du rt        d      t        t        | |||||            S )NFr  )r  r  r  r  r  )rh   r   r   rQ   )r   r  r  r  r  r  s         r   r  zSeries.drop_duplicates  sH     7~F5=%&GHH)#'-	
 		
r   r  r  r  c                  | j                  |       |t        u rGt        j                  t        j
                  | |f|dd|}t        j                  t        |             t         | j                  j
                  |g|d|i|      S )a  Parallel version of pandas.Series.apply

        Parameters
        ----------
        func : function
            Function to apply
        $META
        args : tuple
            Positional arguments to pass to function in addition to the value.

        Additional keyword arguments will be passed as keywords to the function.

        Returns
        -------
        applied : Series or DataFrame if func returns a Series.

        Examples
        --------
        >>> import dask.dataframe as dd
        >>> s = pd.Series(range(5), name='x')
        >>> ds = dd.from_pandas(s, npartitions=2)

        Apply a function elementwise across the Series, passing in extra
        arguments in ``args`` and ``kwargs``:

        >>> def myadd(x, a, b=1):
        ...     return x + a + b
        >>> res = ds.apply(myadd, args=(2,), b=1.5)  # doctest: +SKIP

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with name ``'x'``, and dtype
        ``float64``:

        >>> res = ds.apply(myadd, args=(2,), b=1.5, meta=('x', 'f8'))

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ds.apply(lambda x: x + 1, meta=ds)

        See Also
        --------
        Series.map_partitions
        T)r   r  rQ  )
rl   rI   r   r  r   r  r   r   r2   r   )r   r  rQ  r   r   r   s         r   r  zSeries.apply  sx    h 	D!:<<xWdWPVWDMM,t,-odiioohSSDSFSTTr   c                \    |dvrt        d| d      |rddi}|j                  ||      S |S )N)r   r   Nr  z for Seriesr   r   r  r   s       r   rl   zSeries._validate_axis2  sA    ))~dV;?@@9@!H<<d++Kr   c                    | S r   r   r   s    r   r
  zSeries.squeeze<  s    r   c                >    t        t        j                  |             S r   )r   r   DropnaSeriesr   s    r   r  zSeries.dropna@  s    d//566r   c                F    t        t        j                  | |||            S )N)r   r   	inclusive)r   r   Between)r   r   r   r  s       r   betweenzSeries.betweenD  s!    LLDK
 	
r   c                    | j                  |d      }t        j                  | j                  |j                        s!t        t        j                  | |||            S t        t        j
                  | |||            S rj  )r   r   r   r   CombineSeriesAlignCombineSeries)r   r   r  r   s       r   r  zSeries.combineJ  sk    ,,UG<""499ejj9!''eT:F  d00udJOPPr   c                >    t        t        j                  |             S r   )r   r   ExplodeSeriesr   s    r   r  zSeries.explodeS  s    d00677r   c                @    t        t        j                  | |            S r   )r   r   AddPrefixSeriesr3  s     r   r5  zSeries.add_prefixW      d224@AAr   c                @    t        t        j                  | |            S r   )r   r   AddSuffixSeriesr9  s     r   r;  zSeries.add_suffix[  r  r   r  dtr  c                0    t        t        | |||            S r   )r   rM   )r   r   r   rx  s       r   _repartition_quantileszSeries._repartition_quantilesc  s     {HlK
 	
r   c                "    ddl m}  || |fi |S )Nr   )SeriesGroupBy)r  r  )r   r	  r   r  s       r   r  zSeries.groupbyh  s    CT2000r   c                B    t        t        j                  | ||            S )a  Alter Series index labels or name

        Function / dict values must be unique (1-to-1). Labels not contained in
        a dict / Series will be left as-is. Extra labels listed don't throw an
        error.

        Alternatively, change ``Series.name`` with a scalar value.

        Parameters
        ----------
        index : scalar, hashable sequence, dict-like or callable, optional
            If dict-like or callable, the transformation is applied to the
            index. Scalar or hashable sequence-like will alter the
            ``Series.name`` attribute.
        inplace : boolean, default False
            Whether to return a new Series or modify this one inplace.
        sorted_index : bool, default False
            If true, the output ``Series`` will have known divisions inferred
            from the input series and the transformation. Ignored for
            non-callable/dict-like ``index`` or when the input series has
            unknown divisions. Note that this may only be set to ``True`` if
            you know that the transformed index is monotonically increasing. Dask
            will check that transformed divisions are monotonic, but cannot
            check all the values between divisions, so incorrectly setting this
            can result in bugs.

        Returns
        -------
        renamed : Series

        See Also
        --------
        pandas.Series.rename
        )r   r   RenameSeries)r   r   sorted_indexs      r   r  zSeries.renamen  s    F d//e\JKKr   c                l    t        | d       g d}||vrt        d      t        t        | ||            S )a  Approximate quantiles of Series

        Parameters
        ----------
        q : list/array of floats, default 0.5 (50%)
            Iterable of numbers ranging from 0 to 1 for the desired quantiles
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use dask's internal custom
            algorithm (``'dask'``).  If set to ``'tdigest'`` will use tdigest
            for floats and ints and fallback to the ``'dask'`` otherwise.
        rh  rc  rf  )rk   r   r   rL   )r   rg  r   rk  s       r   rh  zSeries.quantile  s;     	 j18(PQQnT1f=>>r   c                V    | j                   dk(  r| j                         S t        d      )Nr   rn  ro  r   s    r   rq  zSeries.median  s2    q **,,!`
 	
r   c                &    | j                  |      S )a  Return the approximate median of the values over the requested axis.

        Parameters
        ----------
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use Dask's internal custom
            algorithm (``"dask"``).  If set to ``"tdigest"`` will use tdigest
            for floats and ints and fallback to the ``"dask"`` otherwise.
        r  )rh  )r   r   s     r   rp  zSeries.median_approximate  s     }}F}++r   c                    t        |t              st        d      t        | |gd      }|j	                  |d|d      S Nz%other must be a dask.dataframe.Seriesr   r  T)rL  )r   r   r  r/  rM  )r   r   rF  r  rw  s        r   rG  z
Series.cov  s?    %(CDDT5M*ww{D+dwCCr   c                    t        |t              st        d      t        | |gd      }|j	                  ||d|d      S r  )r   r   r  r/  rP  )r   r   r   rF  r  rw  s         r   r  zSeries.corr  sA    %(CDDT5M*xxT;txLLr   c                    t        |t              st        d      | j                  |dk(  r| |      S | j	                  |      |      S )Nzlag must be an integerr   r  )r   r	   r  r  r  )r   lagr  s      r   autocorrzSeries.autocorr  sE    #x(455yyKyXXtzz#KyXXr   c                   t        | j                        rt        | j                        r*t        | j                        st	        | j                        rt        t        | |||            S t        t        | |||            S r   )r   r   r   r   r   r   r=   r<   )r   r  rt  ru  r  r1  s         r   rv  zSeries.describe  sn     TZZ(!$**-#DJJ/&tzz2!k;@RS  ""4kCUV r   c                *    t        t        |             S r   )r   rT   r   s    r   is_monotonic_increasingzSeries.is_monotonic_increasing       3D9::r   c                *    t        t        |             S r   )r   rS   r   s    r   is_monotonic_decreasingzSeries.is_monotonic_decreasing  r  r   c                B    | j                         j                  |      S )Nr  r  r  s     r   r  zSeries.to_string  s      **H*==r   c                B    t        | j                  | j                        S r   )r,   r   r  r   s    r   r  zSeries._repr_data  s     T-A-ABBr   c           
         t        |t              st        d      t        t        |D cg c]	  \  }}||g c}}t                    }t	        t        j                  | g|       S c c}}w )Nz&The caselist argument should be a list	container)r   r  r  r%   r   r   CaseWhen)r   caselistr  r{  s       r   	case_whenzSeries.case_when  s[    h- HIIG$A1aV$ATRSH!$--"@x"@AA %Bs   A*r   r  r  r   r)  )r  Nr  NTN)TFT)FNTNr   r(  r  )both)r  Nr#  )r  rd  )rd  r&  )rO  NF)r   Fr  r  )Pr   r,  r-  r.  r  r_  r  r  r   r  r3  ra  rk  r   r  r  rm  r   rp  r   r   r5  r   r   r  r  r  r  rI   r  rD  rH  rL  rO  rR  rU  rX  r  r  r  r  r  r  ra  r  rx   r  r8  rl   r
  r  r   r  r  r5  r;  r)   r7   r  r;   r  rd   r  r  r  r  rh  rq  rp  rG  r  r  rv  r  r!  r  r  r'   r)  r9  r:  s   @r   r   r     s    &)UJ"*iiO       

 ")) 
 $ $ 
[[3 3 % % 0 0"2Q& "))W W "))B B "))& = =LRRRRRR ")) 
 
@ "))C C "))L L "))M M "))O O	T "))  
, #r**41 7U +7Ur   "))  "))7 7 "))
 

 "))Q Q "))8 8 "))B B "))B B  3
4C	.	/B

/C

 "))1 1
#LJ?$ "))
 

, "))D D "))M M "))Y Y
 ")) $ , "));  ; "));  ; "))> >C 	bii	 	B 
!	B r   r   )addsubmuldivdividetruedivfloordivmodpowraddrsubrmulrdivrtruediv	rfloordivrmodrpowc                      e Zd ZU dZ e       Zded<   ej                  Z	h dZ
h dZ fdZd Zdd	Z eej                        defd
       Z eej                  dg      defd       Z eej                        dd       ZddZ eej                        dd       Zd ZddZed        Zd Zd Zd Zd Zd Zd Zd Z  xZ!S ) r  zIndex-like Expr Collection.

    The constructor takes the expression that represents the query as input. The class
    is not meant to be instantiated directly. Instead, use one of the IO connectors from
    Dask.
    r^  r_  >   dayhourweekyearmonthminutesecondquarterweekday	dayofweek	dayofyear
nanosecond
weekofyearmicrosecondmillisecond>   codesr  orderedas_known
as_ordered
as_unknownr]  as_unorderedadd_categoriesset_categoriesremove_categoriesrename_categoriesreorder_categoriesremove_unused_categoriesc                ~   t        | j                  j                  t        j                        r$|| j
                  v rt        | j                  |      S || j                  v rt        | j                  |      S t        t               |      r$t        | j                  j                  d|      t        | =  |      S )Nz object has no attribute )r   r   r   r  r   _cat_attributesr   r  _dt_attributesr  r  ra  rX  r   r   r_  r  s     r   r_  zIndex.__getattr__I  s    tzz'')<)<=t+++488S))D'''477C((57C  !>>**--FsgN  w"3''r   c                "    d| j                    dS )Nz<dask_expr.expr.Index: expr=>)r   r   s    r   r#  zIndex.__repr__Z  s    -dii[::r   Nc                D    t        j                  || j                        S r  )r  r  r   )r   r'  r  s      r   r  zIndex.__array_wrap__]  s    xxDII..r   c                T    |t         t        t        j                  | ||            S ry  )r   r   r   ToSeriesIndexr   r   r   s      r   r  zIndex.to_series`  s(    %%d00UNOOr   r   r  Tc                T    |st         t        t        j                  | ||            S ry  )r   r   r   ToFrameIndexra  s      r   r  zIndex.to_framef  s%    %%d//EMNNr   c                .    t        t        | |            S )N)rw  )r   rW   rv  s     r   r  zIndex.memory_usagel  s    .t$?@@r   c                B    t        t        j                  | ||            S r   )r   r   
ShiftIndex)r   r  r  s      r   r  zIndex.shiftp  s    doodGTBCCr   c           	        t        |t              rnt        j                  | j                  |j                        sD| t	        j
                  t        |d             t        t        j                  | ||||            S |Ht        j                  t        j                  | ||d      }t	        j
                  t        |d             t        t        j                  | ||||            S )a	  
        Note that this method clears any known divisions.

        If your mapping function is monotonically increasing then use `is_monotonic`
        to apply the mapping function to the old divisions and assign the new
        divisions to the output.

        r  r  Tr  )r   r  rQ  is_monotonic)r   r   r   r   r   r   r2   r   MapIndexAlignr  r   r  r  )r   r   r  rQ  rh  s        r   r  z	Index.maps  s     c6"&&tyy#((;<MM,tE"BC%&&tS)T<P  <<<tSI4PDMM,tE:;HH#L
 	
r   c                   t        t        t        |                   }|j                  | j                         |j                  t        t        t
        j                                     |j                  | j                         t        | j                  t        j                        r|j                  | j                         t        |      S r   )r  r  r
  r  r  r   r   r[  r   r   r  r   rZ  r  r  s     r   r  zIndex.__dir__  s~    DJ 		S^$%	$$%djj""5"56HHT))*Awr   c                ,    t        t        | |            S r   )r   rR   r  s     r   r  zIndex.count  s    j{;<<r   c                    t        d      )Nz''Index' object has no attribute 'index'rX  r   s    r   r   zIndex.index  s    FGGr   c                    t        d      )Nz%'Index' object has no attribute 'sum'rm  r   r   r   s      r   r  z	Index.sum      DEEr   c                    t        d      )Nz&'Index' object has no attribute 'prod'rm  ro  s      r   r  z
Index.prod      EFFr   c                    t        d      )Nz&'Index' object has no attribute 'mean'rm  ro  s      r   rR  z
Index.mean  rr  r   c                    t        d      )Nz%'Index' object has no attribute 'std'rm  ro  s      r   r  z	Index.std  rp  r   c                    t        d      )Nz%'Index' object has no attribute 'var'rm  ro  s      r   r  z	Index.var  rp  r   c                    t        d      )Nz('Index' object has no attribute 'idxmax'rm  ro  s      r   rd  zIndex.idxmax      GHHr   c                    t        d      )Nz('Index' object has no attribute 'idxmin'rm  ro  s      r   r`  zIndex.idxmin  rw  r   r   r#  r  )NNF)"r   r,  r-  r.  r  r_  r  r  r  r  r[  rZ  r_  r#  r  r   rI   r  r  r  r  r  r  r  r3  r   r  r  rR  r  r  rd  r`  r9  r:  s   @r   r  r    s    &)UJ"*hhON$O"(";/ "((" P P
 "((WI.!
 O /O
 "((A AD "((
 
2= H HFGGFFIIr   r  c                  j     e Zd ZdZd Zd Zd Zd	d
dZd Ze	j                  d        Zd fd	Z xZS )r3  zScalar Expr Collectionc                <    d| j                    d| j                   dS )Nz<dask_expr.expr.Scalar: expr=z, dtype=r]  )r   r   r   s    r   r#  zScalar.__repr__  s    .tyyk$**QOOr   c                     t        d|  d      )NzTrying to convert a   to a boolean value. Because Dask objects are lazily evaluated, they cannot be converted to a boolean value or used in boolean conditions like if statements. Try calling .compute() to force computation prior to converting to a boolean value or using in a conditional statement.)r  r   s    r   r%  zScalar.__bool__  s       '' '
 	
r   c                    t         dfS rJ  r   r   s    r   rK  zScalar.__dask_postcompute__  s    byr   c                B    t        t        j                  | |            S r  )r   r   ScalarToSeriesr  s     r   r  zScalar.to_series  s    d11$eDEEr   c                H    t        j                  | j                               S r   )r  r  r   r   s    r   r(  zScalar.__array__  s     zz$,,.))r   c                T    t        j                  | j                        j                  S r   )r  r   r   r   r   s    r   r   zScalar.dtype  s    yy$***r   c                *    t         |   |      d   S )N)r  r   )ra  r  )r   r  r   s     r   r  zScalar.to_delayed  s    w!!@CCr   r'  )r  r   r  )r   r,  r-  r.  r#  r%  rK  r  r(  rZ  r4  r   r  r9  r:  s   @r   r3  r3    sG     P
F*
 + +D Dr   r3  c                V    t        t        j                  | j                  |            S r*  rH  )
collectionr,  s     r   r-  r-    s    $--
dCDDr   c           
        ||t        d      ||d}t        |       st        d      | j                  j	                         j                         r t        | j                        st        d      |t        |t              st        d      |t        |t              st        d      ddl
m} t         |t        | j                               |||t               	            S )
a
  
    Construct a Dask DataFrame from a Pandas DataFrame

    This splits an in-memory Pandas dataframe into several parts and constructs
    a dask.dataframe from those parts on which Dask.dataframe can operate in
    parallel.  By default, the input dataframe will be sorted by the index to
    produce cleanly-divided partitions (with known divisions).  To preserve the
    input ordering, make sure the input index is monotonically-increasing. The
    ``sort=False`` option will also avoid reordering, but will not result in
    known divisions.

    Parameters
    ----------
    data : pandas.DataFrame or pandas.Series
        The DataFrame/Series with which to construct a Dask DataFrame/Series
    npartitions : int, optional, default 1
        The number of partitions of the index to create. Note that if there
        are duplicate values or insufficient elements in ``data.index``, the
        output may have fewer partitions than requested.
    chunksize : int, optional
        The desired number of rows per index partition to use. Note that
        depending on the size and index of the dataframe, actual partition
        sizes may vary.
    sort: bool, default True
        Sort the input by index first to obtain cleanly divided partitions
        (with known divisions).  If False, the input will not be sorted, and
        all divisions will be set to None. Default is True.

    Returns
    -------
    dask.DataFrame or dask.Series
        A dask DataFrame/Series partitioned along the index

    Examples
    --------
    >>> from dask.dataframe import from_pandas
    >>> df = pd.DataFrame(dict(a=list('aabbcc'), b=list(range(6))),
    ...                   index=pd.date_range(start='20100101', periods=6))
    >>> ddf = from_pandas(df, npartitions=3)
    >>> ddf.divisions  # doctest: +NORMALIZE_WHITESPACE
    (Timestamp('2010-01-01 00:00:00'),
     Timestamp('2010-01-03 00:00:00'),
     Timestamp('2010-01-05 00:00:00'),
     Timestamp('2010-01-06 00:00:00'))
    >>> ddf = from_pandas(df.a, npartitions=3)  # Works with Series too!
    >>> ddf.divisions  # doctest: +NORMALIZE_WHITESPACE
    (Timestamp('2010-01-01 00:00:00'),
     Timestamp('2010-01-03 00:00:00'),
     Timestamp('2010-01-05 00:00:00'),
     Timestamp('2010-01-06 00:00:00'))

    Raises
    ------
    TypeError
        If something other than a ``pandas.DataFrame`` or ``pandas.Series`` is
        passed in.

    See Also
    --------
    from_array : Construct a dask.DataFrame from an array that has record dtype
    read_csv : Construct a dask.DataFrame from a CSV file
    z;Exactly one of npartitions and chunksize must be specified.r   z+Input must be a pandas DataFrame or Series.zIndex in passed data is non-numeric and contains nulls, which Dask does not entirely support.
Consider passing `data.loc[~data.isna()]` instead.zSPlease provide npartitions as an int, or possibly as None if you specify chunksize.zSPlease provide chunksize as an int, or possibly as None if you specify npartitions.r   )
FromPandas)r   r  r  r{   )r   r/   r  r   rq  r   ri   r   r   r  dask.dataframe.dask_expr.io.ior  r   rf   r  r{   )r  r   r  r  r  s        r   r  r    s    ~ !8VWW		{2T"EFFzz'A$**'M!A
 	

 z+s'Ca
 	
 
	z)S'Aa
 	
 :%#$;$=	
 r   c                    ddl m} t        | |j                        rt	        | ||      S ddlm}  || |||      }t               r-| j                  j                  dv rt        j                  |      }t        |      S )a|  Read any sliceable array into a Dask Dataframe

    Uses getitem syntax to pull slices out of the array.  The array need not be
    a NumPy array but must support slicing syntax

        x[50000:100000]

    and have 2 dimensions:

        x.ndim == 2

    or have a record dtype:

        x.dtype == [('name', 'O'), ('balance', 'i8')]

    Parameters
    ----------
    x : array_like
    chunksize : int, optional
        The number of rows per partition to use.
    columns : list or string, optional
        list of column names if DataFrame, single string if Series
    meta : object, optional
        An optional `meta` parameter can be passed for dask
        to specify the concrete dataframe type to use for partitions of
        the Dask dataframe. By default, pandas DataFrame is used.

    Returns
    -------
    dask.DataFrame or dask.Series
        A dask DataFrame/Series
    r   N)r   rQ  )	FromArray)r  original_columnsrQ  OU)
dask.arrayr'  r   r!   r   r  r  r{   r   kindr   ArrowStringConversionr   )r  r  r   rQ  r   r  r   s          r   
from_arrayr  >  sp    B #rxx sG$??8 	F  SYY^^t%;++F3&!!r   c           	     :    ddl m} t         || ||||            S )Nr   )	FromGraph)r  r   r   r   name_prefix)r  r  r   )r  r   r   r   r  r  s         r   rN  rN  q  s)    8#	
 r   pandasr   c                    | j                         D ch c]  }t        |      st        |       }}|rt        d| d      t	        |j                  | |||      |      S c c}w )a  
    Construct a Dask DataFrame from a Python Dictionary

    Parameters
    ----------
    data : dict
        Of the form {field : array-like} or {field : dict}.
    npartitions : int
        The number of partitions of the index to create. Note that depending on
        the size and index of the dataframe, the output may have fewer
        partitions than requested.
    orient : {'columns', 'index', 'tight'}, default 'columns'
        The "orientation" of the data. If the keys of the passed dict
        should be the columns of the resulting DataFrame, pass 'columns'
        (default). Otherwise if the keys should be rows, pass 'index'.
        If 'tight', assume a dict with keys
        ['index', 'columns', 'data', 'index_names', 'column_names'].
    dtype: bool
        Data type to force, otherwise infer.
    columns: string, optional
        Column labels to use when ``orient='index'``. Raises a ValueError
        if used with ``orient='columns'`` or ``orient='tight'``.
    constructor: class, default pd.DataFrame
        Class with which ``from_dict`` should be called with.

    Examples
    --------
    >>> import dask.dataframe as dd
    >>> ddf = dd.from_dict({"num1": [1, 2, 3, 4], "num2": [7, 8, 9, 10]}, npartitions=2)
    zPfrom_dict doesn't currently support Dask collections as inputs. Objects of type z were given in the input dict.)ry  r#   r
  r   r  r  )r  r   r  r   r   constructorr{  collection_typess           r   r  r    s{    P *.PA:LQ:OQPP!/00NP
 	

 dFE7;  Qs
   A"A"c                d    ddl m} |t        |t              rt	        |      sd} || |||      S )a  Create a Dask DataFrame from a Dask Array.

    Converts a 2d array into a DataFrame and a 1d array into a Series.

    Parameters
    ----------
    x : da.Array
    columns : list or string
        list of column names if DataFrame, single string if Series
    index : dask.dataframe.Index, optional
        An optional *dask* Index to use for the output Series or DataFrame.

        The default output index depends on whether `x` has any unknown
        chunks. If there are any unknown chunks, the output has ``None``
        for all the divisions (one per chunk). If all the chunks are known,
        a default index with known divisions is created.

        Specifying `index` can be useful if you're conforming a Dask Array
        to an existing dask Series or DataFrame, and you would like the
        indices to match.
    meta : object, optional
        An optional `meta` parameter can be passed for dask
        to specify the concrete dataframe type to be returned.
        By default, pandas DataFrame is used.

    Examples
    --------
    >>> import dask.array as da
    >>> import dask.dataframe as dd
    >>> x = da.ones((4, 2), chunks=(2, 2))
    >>> df = dd.io.from_dask_array(x, columns=['a', 'b'])
    >>> df.compute()
         a    b
    0  1.0  1.0
    1  1.0  1.0
    2  1.0  1.0
    3  1.0  1.0

    See Also
    --------
    dask.bag.to_dataframe: from dask.bag
    dask.dataframe.DataFrame.values: Reverse conversion
    dask.dataframe.DataFrame.to_records: Reverse conversion
    r   )r   N)r   r   rQ  )r  r   r   r  r   )r  r   r   rQ  r   s        r   r   r     s3    Z 2z'48W1gUFFr   Fc                   ddl m}m}m} t	        | t
              st        |       } ||d<   |r||d<   |Ht        |t              D ]4  }|\  }}}|dk(  st	        |t        t        t        f      r+t        d       t	        |t        j                        s"t	        |t
              r|j                         dv rt        t         j"                        t        d	      k  rt%        d
      |	t'        d      |
dk7  rt'        d      ||dk7  rt'        d      |t'        d      |dk7  rt'        d      |t'        d      t)         || t+        |      ||||||||t-               |t	        |t
                          S t)         || fi dt+        |      d|d|d|d|d|d|d|	d|
d|d|d |d!|d" ||      d#|d$t	        |t
                    S )%u,  
    Read a Parquet file into a Dask DataFrame

    This reads a directory of Parquet data into a Dask.dataframe, one file per
    partition.  It selects the index among the sorted columns if any exist.

    .. note::
        Dask automatically resizes partitions to ensure that each partition is of
        adequate size. The optimizer uses the ratio of selected columns to total
        columns to squash multiple files into one partition.

        Additionally, the Optimizer uses a minimum size per partition (default 75MB)
        to avoid too many small partitions. This configuration can be set with

        >>> dask.config.set({"dataframe.parquet.minimum-partition-size": "100MB"})  # doctest: +SKIP

    .. note::
        Specifying ``filesystem="arrow"`` leverages a complete reimplementation of
        the Parquet reader that is solely based on PyArrow. It is significantly faster
        than the legacy implementation, but doesn't yet support all features.

    Parameters
    ----------
    path : str or list
        Source directory for data, or path(s) to individual parquet files.
        Prefix with a protocol like ``s3://`` to read from alternative
        filesystems. To read from multiple files you can pass a globstring or a
        list of paths, with the caveat that they must all have the same
        protocol.
    columns : str or list, default None
        Field name(s) to read in as columns in the output. By default all
        non-index fields will be read (as determined by the pandas parquet
        metadata, if present). Provide a single field name instead of a list to
        read in the data as a Series.
    filters : Union[List[Tuple[str, str, Any]], List[List[Tuple[str, str, Any]]]], default None
        List of filters to apply, like ``[[('col1', '==', 0), ...], ...]``.
        Using this argument will result in row-wise filtering of the final partitions.

        Predicates can be expressed in disjunctive normal form (DNF). This means that
        the inner-most tuple describes a single column predicate. These inner predicates
        are combined with an AND conjunction into a larger predicate. The outer-most
        list then combines all of the combined filters with an OR disjunction.

        Predicates can also be expressed as a ``List[Tuple]``. These are evaluated
        as an AND conjunction. To express OR in predicates, one must use the
        (preferred for "pyarrow") ``List[List[Tuple]]`` notation.
    index : str, list or False, default None
        Field name(s) to use as the output frame index. By default will be
        inferred from the pandas parquet file metadata, if present. Use ``False``
        to read all fields as columns.
    categories : list or dict, default None
        For any fields listed here, if the parquet encoding is Dictionary,
        the column will be created with dtype category. Use only if it is
        guaranteed that the column is encoded as dictionary in all row-groups.
        If a list, assumes up to 2**16-1 labels; if a dict, specify the number
        of labels expected; if None, will load categories automatically for
        data written by dask, not otherwise.
    storage_options : dict, default None
        Key/value pairs to be passed on to the file-system backend, if any.
        Note that the default file-system backend can be configured with the
        ``filesystem`` argument, described below.
    open_file_options : dict, default None
        Key/value arguments to be passed along to ``AbstractFileSystem.open``
        when each parquet data file is open for reading. Experimental
        (optimized) "precaching" for remote file systems (e.g. S3, GCS) can
        be enabled by adding ``{"method": "parquet"}`` under the
        ``"precache_options"`` key. Also, a custom file-open function can be
        used (instead of ``AbstractFileSystem.open``), by specifying the
        desired function under the ``"open_file_func"`` key.
    dtype_backend : {'numpy_nullable', 'pyarrow'}, defaults to NumPy backed DataFrames
        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
        nullable dtypes are used for all dtypes that have a nullable implementation
        when 'numpy_nullable' is set, pyarrow is used for all dtypes if 'pyarrow'
        is set.
        ``dtype_backend="pyarrow"`` requires ``pandas`` 1.5+.
    calculate_divisions : bool, default False
        Whether to use min/max statistics from the footer metadata (or global
        ``_metadata`` file) to calculate divisions for the output DataFrame
        collection. Divisions will not be calculated if statistics are missing.
        This option will be ignored if ``index`` is not specified and there is
        no physical index column specified in the custom "pandas" Parquet
        metadata. Note that ``calculate_divisions=True`` may be extremely slow
        when no global ``_metadata`` file is present, especially when reading
        from remote storage. Set this to ``True`` only when known divisions
        are needed for your workload (see :ref:`dataframe-design-partitions`).
    ignore_metadata_file : bool, default False
        Whether to ignore the global ``_metadata`` file (when one is present).
        If ``True``, or if the global ``_metadata`` file is missing, the parquet
        metadata may be gathered and processed in parallel. Parallel metadata
        processing is currently supported for ``ArrowDatasetEngine`` only.
    metadata_task_size : int, default configurable
        If parquet metadata is processed in parallel (see ``ignore_metadata_file``
        description above), this argument can be used to specify the number of
        dataset files to be processed by each task in the Dask graph.  If this
        argument is set to ``0``, parallel metadata processing will be disabled.
        The default values for local and remote filesystems can be specified
        with the "metadata-task-size-local" and "metadata-task-size-remote"
        config fields, respectively (see "dataframe.parquet").
    split_row_groups : 'infer', 'adaptive', bool, or int, default 'infer'
        If True, then each output dataframe partition will correspond to a single
        parquet-file row-group. If False, each partition will correspond to a
        complete file.  If a positive integer value is given, each dataframe
        partition will correspond to that number of parquet row-groups (or fewer).
        If 'adaptive', the metadata of each file will be used to ensure that every
        partition satisfies ``blocksize``. If 'infer' (the default), the
        uncompressed storage-size metadata in the first file will be used to
        automatically set ``split_row_groups`` to either 'adaptive' or ``False``.
    blocksize : int or str, default 'default'
        The desired size of each output ``DataFrame`` partition in terms of total
        (uncompressed) parquet storage space. This argument is currently used to
        set the default value of ``split_row_groups`` (using row-group metadata
        from a single file), and will be ignored if ``split_row_groups`` is not
        set to 'infer' or 'adaptive'. Default is 256 MiB.
    aggregate_files : bool or str, default None
        WARNING: Passing a string argument to ``aggregate_files`` will result
        in experimental behavior. This behavior may change in the future.

        Whether distinct file paths may be aggregated into the same output
        partition. This parameter is only used when `split_row_groups` is set to
        'infer', 'adaptive' or to an integer >1. A setting of True means that any
        two file paths may be aggregated into the same output partition, while
        False means that inter-file aggregation is prohibited.

        For "hive-partitioned" datasets, a "partition"-column name can also be
        specified. In this case, we allow the aggregation of any two files
        sharing a file path up to, and including, the corresponding directory name.
        For example, if ``aggregate_files`` is set to ``"section"`` for the
        directory structure below, ``03.parquet`` and ``04.parquet`` may be
        aggregated together, but ``01.parquet`` and ``02.parquet`` cannot be.
        If, however, ``aggregate_files`` is set to ``"region"``, ``01.parquet``
        may be aggregated with ``02.parquet``, and ``03.parquet`` may be aggregated
        with ``04.parquet``::

            dataset-path/
            ├── region=1/
            │   ├── section=a/
            │   │   └── 01.parquet
            │   ├── section=b/
            │   └── └── 02.parquet
            └── region=2/
                ├── section=a/
                │   ├── 03.parquet
                └── └── 04.parquet

        Note that the default behavior of ``aggregate_files`` is ``False``.
    parquet_file_extension: str, tuple[str], or None, default (".parq", ".parquet", ".pq")
        A file extension or an iterable of extensions to use when discovering
        parquet files in a directory. Files that don't match these extensions
        will be ignored. This argument only applies when ``paths`` corresponds
        to a directory and no ``_metadata`` file is present (or
        ``ignore_metadata_file=True``). Passing in ``parquet_file_extension=None``
        will treat all files in the directory as parquet files.

        The purpose of this argument is to ensure that the engine will ignore
        unsupported metadata files (like Spark's '_SUCCESS' and 'crc' files).
        It may be necessary to change this argument if the data files in your
        parquet dataset do not end in ".parq", ".parquet", or ".pq".
    filesystem: "fsspec", "arrow", or fsspec.AbstractFileSystem backend to use.
    dataset: dict, default None
        Dictionary of options to use when creating a ``pyarrow.dataset.Dataset`` object.
        These options may include a "filesystem" key to configure the desired
        file-system backend. However, the top-level ``filesystem`` argument will always
        take precedence.

        **Note**: The ``dataset`` options may include a "partitioning" key.
        However, since ``pyarrow.dataset.Partitioning``
        objects cannot be serialized, the value can be a dict of key-word
        arguments for the ``pyarrow.dataset.partitioning`` API
        (e.g. ``dataset={"partitioning": {"flavor": "hive", "schema": ...}}``).
        Note that partitioned columns will not be converted to categorical
        dtypes when a custom partitioning schema is specified in this way.
    read: dict, default None
        Dictionary of options to pass through to ``engine.read_partitions``
        using the ``read`` key-word argument.
    arrow_to_pandas: dict, default None
        Dictionary of options to use when converting from ``pyarrow.Table`` to
        a pandas ``DataFrame`` object. Only used by the "arrow" engine.
    **kwargs: dict (of dicts)
        Options to pass through to ``engine.read_partitions`` as stand-alone
        key-word arguments. Note that these options will be ignored by the
        engines defined in ``dask.dataframe``, but may be used by other custom
        implementations.

    Examples
    --------
    >>> df = dd.read_parquet('s3://bucket/my-parquet-data')  # doctest: +SKIP

    See Also
    --------
    to_parquet
    pyarrow.parquet.ParquetDataset
    r   )ReadParquetFSSpecReadParquetPyarrowFS_set_parquet_enginedtype_backendarrow_to_pandasr%  inz2Value of 'in' filter must be a list, set or tuple.)arrowpyarrowz15.0.0z:pyarrow>=15.0.0 is required to use the pyarrow filesystem.zFmetadata_task_size is not supported when using the pyarrow filesystem.inferzDsplit_row_groups is not supported when using the pyarrow filesystem.rd  z=blocksize is not supported when using the pyarrow filesystem.zCaggregate_files is not supported when using the pyarrow filesystem.z.parqz.parquetz.pqzJparquet_file_extension is not supported when using the pyarrow filesystem.z:engine is not supported when using the pyarrow filesystem.)r   filtersr]  r   calculate_divisionsstorage_options
filesystemignore_metadata_filer  r{   r   _seriesr   r  r]  r   r  r  r  metadata_task_sizesplit_row_groups	blocksizeaggregate_filesparquet_file_extensionr  enginer   r  )r  r  r  r  r   r  r   r%   r  r  r  r  pa_fs
FileSystemr  parse_versionpa__version__r   r   r   rg   r{   )r  r   r  r]  r   r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  filterr!  r   r^  s                            r   read_parquetr    s   j  dC d#+F?$3 !g6 	VF!LCSTz*S3e2D"E TUU	V 	:u//0j#&"66(=+BBL  )%X  w&%V   Y)%;%O  &%U  "%AA%\  %L   (1%$7 /%%9 /(?(A"7C0
 	
$ 	
$W-	
 	
 "		

 	
 ,	
 !4	
 "6	
  2	
 .	
  	
 ,	
 $:	
 "	
 'v.	
  !	
" w,#	
 r   c           
        t        | t              st        d      t        |       dk(  rt	        d      t        |       dk(  r0|dk(  r&t        | d   t
              r| d   j                         S | d   S |dvrt	        d      | D cg c]  }t        |t              st        |      n|! } }|dk(  r8| D cg c]-  }t        |j                        dkD  st        |t
              s,|/ } }t        t        ||||||g|        S c c}w c c}w )a@  Concatenate DataFrames along rows.

    - When axis=0 (default), concatenate DataFrames row-wise:

      - If all divisions are known and ordered, concatenate DataFrames keeping
        divisions. When divisions are not ordered, specifying
        interleave_partition=True allows concatenate divisions each by each.

      - If any of division is unknown, concatenate DataFrames resetting its
        division to unknown (None)

    - When axis=1, concatenate DataFrames column-wise:

      - Allowed if all divisions are known.

      - If any of division is unknown, it raises ValueError.

    Parameters
    ----------
    dfs : list
        List of dask.DataFrames to be concatenated
    axis : {0, 1, 'index', 'columns'}, default 0
        The axis to concatenate along
    join : {'inner', 'outer'}, default 'outer'
        How to handle indexes on other axis
    interleave_partitions : bool, default False
        Whether to concatenate DataFrames ignoring its order. If True, every
        divisions are concatenated each by each.
    ignore_unknown_divisions : bool, default False
        By default a warning is raised if any input has unknown divisions.
        Set to True to disable this warning.
    ignore_order : bool, default False
        Whether to ignore order when doing the union of categoricals.

    Notes
    -----
    This differs in from ``pd.concat`` in the when concatenating Categoricals
    with different categories. Pandas currently coerces those to objects
    before concatenating. Coercing to objects is very expensive for large
    arrays, so dask preserves the Categoricals by taking the union of
    the categories.

    Examples
    --------
    If all divisions are known and ordered, divisions are kept.

    >>> import dask.dataframe as dd
    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(1, 3, 5)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(6, 8, 10)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(1, 3, 6, 8, 10)>

    Unable to concatenate if divisions are not ordered.

    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(1, 3, 5)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(2, 3, 6)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    ValueError: All inputs have known divisions which cannot be concatenated
    in order. Specify interleave_partitions=True to ignore order

    Specify interleave_partitions=True to ignore the division order.

    >>> dd.concat([a, b], interleave_partitions=True)   # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(1, 2, 3, 5, 6)>

    If any of division is unknown, the result division will be unknown

    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(None, None)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(1, 4, 10)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(None, None, None, None)>

    By default concatenating with unknown divisions will raise a warning.
    Set ``ignore_unknown_divisions=True`` to disable this:

    >>> dd.concat([a, b], ignore_unknown_divisions=True)# doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(None, None, None, None)>

    Different categoricals are unioned

    >>> dd.concat([
    ...     dd.from_pandas(pd.Series(['a', 'b'], dtype='category'), 1),
    ...     dd.from_pandas(pd.Series(['a', 'c'], dtype='category'), 1),
    ... ], interleave_partitions=True).dtype
    CategoricalDtype(categories=['a', 'b', 'c'], ordered=False, categories_dtype=object)
    z/dfs must be a list of DataFrames/Series objectsr   zNo objects to concatenater   )r  rk  z!'join' must be 'inner' or 'outer')r   r  r  r   r   r   r  r   r  r   r   r:   )dfsr   r  ignore_unknown_divisionsignore_orderinterleave_partitionsr   rw  s           r   r/  r/    s   J c4 IJJ
3x1}455
3x1}19CFF3q6??$$1v%%<==NQ
R*R";;r?C
RC
RqyQb3rzz?Q#6*R:PrQQ$!	
 	

 
 S Rs   $D5-D#Dc           
     D    t        t        j                  | d|||||      S )NT)r   r  r  r  r  r  )r  r   r  )r   r  r  r  r  r  s         r   r  r    s-     		 	r   c                \   |||fD ]  }t        |t              st                |s6|s4|s2|s0|s.| j                  D cg c]  }|j                  v s| }}|sdx}}|r|s|s|x}}t        j
                  j                  j                  |      rt        |t              st        |      }t        j
                  j                  j                  |      rt        |t              st        |      }d}||vrt        d| d| d      |dk(  ro|st        fdt        |      D              rt        d      t        |         j                  t        t        t        |      t        |                  	      |}t        |       s)|r|r| j!                  | |         } d }d}t#        | d
      } t              s)|r|rj!                  |         d }d}t#        d
      t%        j&                        sJ |r|rt)        | ||       t+        t-        | |||||||	t/        |
      ||            }| j&                  j0                  j2                  j&                  j0                  j2                  k7  r0|j5                  |j&                  j0                  j2                        S |S c c}w )NT)r   r   rk  r  leftsemiz+dask.dataframe.merge does not support how='z'.Options are: r  r  c              3  :   K   | ]  }|j                   v  y wr   r  )r   r  r   s     r   r   zmerge.<locals>.<genexpr>  s      
'(AU]]"
r  zMhow='leftsemi' does not support right_index=True or on columns from the indexr  r   r1  )
ro  r  r  r  r  r  r  r  _npartitionsr  r  )r   r   r   r   r  r  r  r  r  r   r   rg   r  r  r  r#   r!  r  r0   r   rt   r   rK   rm   r   r   r  )r   r   ro  r  r  r  r  r  r  r  r  r   r  r  r  supported_howr   s    `               r   r  r    s     '8$ (a#%''( ghz+<Aemm);a<<'++J	'((	vv||  )*Wi2Pw-	vv||  *:h	3R>CM
-9# ?)?!-
 	

 j# 
,<X,F
 
 &_  *845<<S!1(!;=Mg=VWX = E H d#7>>$w-0DGJ4Q/e$(OOE(O4EHKEq1U[[)))8D%(;!#0@$	
F  zz 1 1 6 66!!(:(:(?(?!@@M =s   J)J)c                   |dvrt        d      ||||||||	|
|||d}| |t        d      t        | t        j                        r1t        |t        j                        rt        j                  | |fi |S |||t        d      |x}}||d<   ||d<   |d= ||fD ]  }t        |t
              st        d	       t        |       st        | d
      } t        |      st        |d
      }|||	t        d      |x|d<   |d<   |d= ||	t        d      ||	t        d      ddl	m
} t         || |fi |      S )N)backwardforwardnearestzLInvalid merge_asof direction. Choose from 'backward' 'forward', or 'nearest')r  r  r  r  r  r	  left_byright_byr  	toleranceallow_exact_matches	directionzCannot merge_asof on NonezSCan only pass argument 'on' OR 'left_on' and 'right_on', not a combination of both.r  r  r  z7Dask collections not currently allowed in merge columnsr   r1  zSCan only pass argument 'by' OR 'left_by' and 'right_by', not a combination of both.r  r  r	  z;Must specify both left_on and right_on if one is specified.r   )	MergeAsof)r   r   r  r   
merge_asofr   r   r#   r  $dask.dataframe.dask_expr._merge_asofr  r   )r   r   r  r  r  r  r  r	  r  r  r  r  r  r  r   r  r  s                    r   r  r    s   " ::'
 	
  "2F |u}455 $%*UBLL*I}}T53F33	~("6'   (#y%ztx  a#%I  d#4Q/e$Eq1	~("6e  243yF:.t8/VWWx/VWW>)D%:6:;;r   )r   rQ  r   r  r  c               "   ddl m}m}	 ddlm}
 d|v rt        d      t               }t        |      }t        |      D ]G  \  }}t        |t              st        dt        |             	 |j                  t        |             I t        |      dk(  rt        d      t        |      dkD  rt        d	      |dhk(  rt        d
      d}d}t#        j$                  |       j&                  j)                  dd      x}r/d}|j*                  |j,                  u }|t.        u r1|r/t!        d      t        | |
      rt1        j2                  d       nd}|g n|}|i n|}|r-|j5                  dd      }t7         |	| |||||||||
            }nt7         || |||||||            }t9               rt7        t;        j<                  |            S |S # t        t         f$ r/ t        |      ||<   |j                  t        ||                Y w xY w)a  Create a DataFrame collection from a custom function map.

    ``from_map`` is the preferred option when reading from data sources
    that are not natively supported by Dask or if the data source
    requires custom handling before handing things of to Dask DataFrames.
    Examples are things like binary files or other unstructured data that
    doesn't have an IO connector.

    ``from_map`` supports column projection by the optimizer. The optimizer
    tries to push column selections into the from_map call if the function
    supports a ``columns`` argument.

    Parameters
    ----------
    func : callable
        Function used to create each partition. Column projection will be
        enabled if the function has a ``columns`` keyword argument.
    *iterables : Iterable objects
        Iterable objects to map to each output partition. All iterables must
        be the same length. This length determines the number of partitions
        in the output collection (only one element of each iterable will
        be passed to ``func`` for each partition).
    args : list or tuple, optional
        Positional arguments to broadcast to each output partition. Note
        that these arguments will always be passed to ``func`` after the
        ``iterables`` positional arguments.
    $META
    divisions : tuple, str, optional
        Partition boundaries along the index.
        For tuple, see https://docs.dask.org/en/latest/dataframe-design.html#partitions
        For string 'sorted' will compute the delayed values to find index
        values.  Assumes that the indexes are mutually sorted.
        If None, then won't use index information
    label : str, optional
        String to use as the function-name label in the output
        collection-key names.
    token : str, optional
        String to use as the "token" in the output collection-key names.
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work,
        but it won't raise if dtypes don't match.
    **kwargs:
        Key-word arguments to broadcast to each output partition. These
        same arguments will be passed to ``func`` for every output partition.

    Examples
    --------
    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> func = lambda x, size=0: pd.Series([x] * size)
    >>> inputs = ["A", "B"]
    >>> dd.from_map(func, inputs, size=2).compute()
    0    A
    1    A
    0    B
    1    B
    dtype: string

    The optimizer will identify a column selection that happens after from_map
    and push the columns argument into the actual map call to drop unnecessary
    columns as early as possible.

    >>> def map_function(x, columns=None):
    ...     df = pd.DataFrame({"a": [1, 2], "b": x})
    ...     if columns is not None:
    ...         df = df[columns]
    ...     return df
    >>> dd.from_map(map_function, [1, 2])["b"].compute()
    0    1
    1    1
    0    2
    1    2
    Name: b, dtype: int64

    This API can also be used as an alternative to other file-based
    IO functions, like ``read_csv`` (which are already just
    ``from_map`` wrapper functions):

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> paths = ["0.csv", "1.csv", "2.csv"]
    >>> dd.from_map(pd.read_csv, paths).head()  # doctest: +SKIP
                        name
    timestamp
    2000-01-01 00:00:00   Laura
    2000-01-01 00:00:01  Oliver
    2000-01-01 00:00:02   Alice
    2000-01-01 00:00:03  Victor
    2000-01-01 00:00:04     Bob

    Since ``from_map`` allows you to map an arbitrary function
    to any number of iterable objects, it can be a very convenient
    means of implementing functionality that may be missing
    from other DataFrame-creation methods. For example, if you
    happen to have apriori knowledge about the number of rows
    in each of the files in a dataset, you can generate a
    DataFrame collection with a global RangeIndex:

    >>> import pandas as pd
    >>> import numpy as np
    >>> import dask.dataframe as dd
    >>> paths = ["0.csv", "1.csv", "2.csv"]
    >>> file_sizes = [86400, 86400, 86400]
    >>> def func(path, row_offset):
    ...     # Read parquet file and set RangeIndex offset
    ...     df = pd.read_csv(path)
    ...     return df.set_index(
    ...         pd.RangeIndex(row_offset, row_offset+len(df))
    ...     )
    >>> def get_ddf(paths, file_sizes):
    ...     offsets = [0] + list(np.cumsum(file_sizes))
    ...     return dd.from_map(
    ...         func, paths, offsets[:-1], divisions=offsets
    ...     )
    >>> ddf = get_ddf(paths, file_sizes)  # doctest: +SKIP
    >>> ddf.index  # doctest: +SKIP
    Dask Index Structure:
    npartitions=3
    0         int64
    86400       ...
    172800      ...
    259200      ...
    dtype: int64
    Dask Name: myfunc, 6 tasks
    r   )FromMapFromMapProjectable)DataFrameIOFunctionr>  z,dask_expr does not support a token argument.z2All elements of `iterables` must be Iterable, got z/`from_map` requires at least one Iterable inputr   z)All `iterables` must have the same lengthz+All `iterables` must have a non-zero lengthFr   NTzArgument `func` of `from_map` has a required `columns`  parameter and not `meta` provided.Either provide `meta` yourself or make `columns` an optional argument.zdask_expr does not support the DataFrameIOFunction protocol for column projection. To enable column projection, please ensure that the signature of `func` includes a `columns=` keyword argument instead.)dask.dataframe.dask_expr.ior  r  dask.dataframe.io.utilsr  r   r  r  rh  r   r   r   r
  r,  r   rX  r  r  r  
parametersr0  rd  rf  rI   r   r   rz  r   r{   r   r  )r  r   rQ  r   r  r  	iterablesr   r  r  r  r  r   iterableallow_projectioncolumns_arg_requiredparamr   r   s                      r   from_mapr  e  s?   T H;&!"PQQeGYI + 	+8(H-DT(^DTU 	+KKH&	+ 7|qJKK	W	DEE1#~FGG  !!$'2266y$GGuG$}};:"6Y 
 
D-	.>	
 !24D>RvF**Y-$ 
   	
  d88@AAMC 	* 	+>IaLKKIaL)*	+s   ,G:HHc                    t        | t              r| j                  ||      S t        |       st	        |       r(t        t        t        |       |t                           S t        dt        |        d      )a  Repartition dataframe along new divisions

    Dask.DataFrame objects are partitioned along their index.  Often when
    multiple dataframes interact we need to align these partitionings.  The
    ``repartition`` function constructs a new DataFrame object holding the same
    data but partitioned on different values.  It does this by performing a
    sequence of ``loc`` and ``concat`` calls to split and merge the previous
    generation of partitions.

    Parameters
    ----------

    divisions : list
        List of partitions to be used
    force : bool, default False
        Allows the expansion of the existing divisions.
        If False then the new divisions lower and upper bounds must be
        the same as the old divisions.

    Examples
    --------

    >>> df = df.repartition([0, 5, 10, 20])  # doctest: +SKIP

    Also works on Pandas objects

    >>> ddf = dd.repartition(df, [0, 5, 10, 20])  # doctest: +SKIP
    )r   r  )r   r{   z#repartition is not implemented for r  )r   r   r4  r0   r1   r   rn   rf   r{   r   r
  )rw  r   r  s      r   r4  r4  C  sr    : "i ~~	~??	2	."4R #(?(A
 	
 "$GRzQR"STTr   c           	         t        |      r| j                  j                  vrt        d      t        |      r| j                  j                  vrt        d      t	        j
                   j                  |         st        d      t         j                  |         st        d      t        |      r| j                  j                  v s*t        |      st         fd|D              st        d      g d}t        |      r||vr$t        dd	j                  d
 |D              z         t        t         ||||            S )a  
    Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
    must have category dtype to infer result's ``columns``.
    ``index``, ``columns``, and ``aggfunc`` must be all scalar.
    ``values`` can be scalar or list-like.

    Parameters
    ----------
    df : DataFrame
    index : scalar
        column to be index
    columns : scalar
        column to be columns
    values : scalar or list(scalar)
        column(s) to aggregate
    aggfunc : {'mean', 'sum', 'count', 'first', 'last'}, default 'mean'

    Returns
    -------
    table : DataFrame

    See Also
    --------
    pandas.DataFrame.pivot_table
    z.'index' must be the name of an existing columnz0'columns' must be the name of an existing columnz 'columns' must be category dtypez$'columns' must have known categoriesc              3  h   K   | ])  }t        |      xr |j                  j                  v  + y wr   )r   r   r   )r   r  rw  s     r   r   zpivot_table.<locals>.<genexpr>  s,     G1	!6bhh&6&6!66Gs   /2z4'values' must refer to an existing column or columns)rR  r  r  r   r#  zaggfunc must be either r  c              3  (   K   | ]
  }d | d   yw)r  Nr   )r   r  s     r   r   zpivot_table.<locals>.<genexpr>  s     1WqAaS(1Ws   )r   r   ry  r>  )r   r   r   r   r  rq   rv   r^  r  r   r[   )rw  r   r   ry  r>  available_aggfuncss   `     r   r=  r=  n  s'   4 UuBHH,<,<<IJJW0@0@!@KLL''(9:;<< 12?@@ 	&bhh&&& GGGOPPBW0B!B%		1WDV1W(WW
 	
 2UGFGT r   downcastr  c                   |dvrt        d      t        |       r1|t        d       t        t        j
                  d      | ||      S t        |       rIt        t        t        |       j                  | j                        ||            j                  |      S t        |       rt        t        | |||            S t        d	      )
z
    Return type depends on input. Delayed if scalar, otherwise same as input.
    For errors, only "raise" and "coerce" are allowed.
    )r  coercezinvalid error value specifiedz/``meta`` is not allowed when input is a scalar.T)pure)r  r  r  )r   r  r  rQ  zEarg must be a list, tuple, dask.array.Array, or dask.dataframe.Series)r   pd_is_scalarr  r}   r  
to_numericr   r   rF   r  r  r   r  r1   r  )r   r  r  rQ  s       r   r  r    s     ((899CLMM0wr}}40VhWWC3&&syy1&8
 -T-
"		# cCM
 	
 O r   c                   |j                  d      rddini }t        | g      \  } |*t        | t              rz t	        |       j
                  g fi |}| j                  |_        t        r|j                  t	        |       j                  t        | j                              j                        }nt        |       st        |       st        d       t!        |       t#        j$                  di |g      }|j&                  j)                  | j&                  j*                        |_        | j&                  j                  |j&                  _        nt-        |      }|j/                  dd        t1        t3        | ||            S )NutctzzSdask.dataframe.to_datetime does not support non-index-able arguments (like scalars)infer_datetime_format)r   r   rQ  )2000)r0  rj   r   r  r   DatetimeIndexr   re   as_unitto_datetimers   r   r  r0   r1   r   rz   r  r  r   r  r   rr   rz  r   rE   )r   rQ  r   tz_kwargs       r   r  r    s3    &

5 1e}rH&FS|c5!6#C(66rFXFDDI||$S)55mCII6NOTT $C(N3,?%: 
 0*3/1Q1Q0RSD**399??;DJ!iinnDJJO
JJ&-*3vDIJJr   c                f    t        | t              st        d      t        t	        | ||            S )Nzarg must be a Series)r   r  r  )r   r   r  r   rG   )r   r  r  s      r   to_timedeltar    s,    c6".//+Cd6JKKr   c                .    t        t        ||g|        S r   )r   ro   )scalarsrQ  namess      r   rj  rj    s    +dE<G<==r   r  c                  |r
t               |D 
cg c]  }
t        |
t              rt        |
      n|
! }}
i }g }|	j	                         D ]V  \  }}t        |t              r<t        |      }|j                  |       t        |j                         d         ||<   R|||<   X ~	t        j                  |d   | ||||||||j                  dd      t        |      t        |      dz
  g|dd | }t        |      S c c}
w )a  Apply Python function on each DataFrame partition.

    Parameters
    ----------
    func : function
        Function applied to each partition.
    args, kwargs :
        Arguments and keywords to pass to the function.  At least one of the
        args should be a Dask.dataframe. Arguments and keywords may contain
        ``Scalar``, ``Delayed`` or regular python objects. DataFrame-like args
        (both dask and pandas) will be repartitioned to align (if necessary)
        before applying the function (see ``align_dataframes`` to control).
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work,
        but it won't raise if dtypes don't match.
    transform_divisions : bool, default True
        Whether to apply the function onto the divisions and apply those
        transformed divisions to the output.
    align_dataframes : bool, default True
        Whether to repartition DataFrame- or Series-like args
        (both dask and pandas) so their divisions align before applying
        the function. This requires all inputs to have known divisions.
        Single-partition inputs will be split into multiple partitions.

        If False, all inputs must have either the same number of partitions
        or a single partition. Single-partition inputs will be broadcast to
        every partition of multi-partition inputs.
    required_columns : list or None, default None
        List of columns that ``func`` requires for execution. These columns
        must belong to the first DataFrame argument (in ``args``). If None
        is specified (the default), the query optimizer will assume that
        all input columns are required.
    $META
    r   r>  Nr   )r   r   r|   rH   ri  ru  r    rB  r   MapPartitionsrz  r   r   r   )r  rQ  r  r  r   r  r  r  r   r   r+  	newkwargsdelayed_kwargsr   r{  dexprnew_exprs                    r   r  r    s"   d  "##FJKz!W5LO1<KDKIN 1a! OE!!%("5#6#6#8#;<IaLIaL 	!!Qgt$YD	A 
ab 
H  (##9 Ls   $C>r  c                  t        |t              rt        j                  |      }t        |t              rt        j                  |      }t        |t        j
                        st        |t        j
                        r]t        |t              r!|j                  j                  j                  }n|j                  j                  }t        |      s@t        d      t        |t              r|dk\  rt        |t              r|dk\  st        d      t        |g      d   }t        |	      }	|r|g|	z   }|D cg c]  }t        |t              s| }}t        |      dkD  rKt!        j"                  | s9t%        t!        j&                  | ||||||||
j)                  dd      |
g|	       S t!        j*                  | ||||||||
j)                  dd      |
g|	 }t%        |      S c c}w )a]	  Apply a function to each partition, sharing rows with adjacent partitions.

    Parameters
    ----------
    func : function
        The function applied to each partition. If this function accepts
        the special ``partition_info`` keyword argument, it will receive
        information on the partition's relative location within the
        dataframe.
    df: dd.DataFrame, dd.Series
    args, kwargs :
        Positional and keyword arguments to pass to the function.
        Positional arguments are computed on a per-partition basis, while
        keyword arguments are shared across all partitions. The partition
        itself will be the first positional argument, with all other
        arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``,
        or regular Python objects. DataFrame-like args (both dask and
        pandas) will be repartitioned to align (if necessary) before
        applying the function; see ``align_dataframes`` to control this
        behavior.
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work,
        but it won't raise if dtypes don't match.
    before : int, timedelta or string timedelta
        The rows to prepend to partition ``i`` from the end of
        partition ``i - 1``.
    after : int, timedelta or string timedelta
        The rows to append to partition ``i`` from the beginning
        of partition ``i + 1``.
    transform_divisions : bool, default True
        Whether to apply the function onto the divisions and apply those
        transformed divisions to the output.
    align_dataframes : bool, default True
        Whether to repartition DataFrame- or Series-like args
        (both dask and pandas) so their divisions align before applying
        the function. This requires all inputs to have known divisions.
        Single-partition inputs will be split into multiple partitions.

        If False, all inputs must have either the same number of partitions
        or a single partition. Single-partition inputs will be broadcast to
        every partition of multi-partition inputs.
    $META

    See Also
    --------
    dd.DataFrame.map_overlap
    zMMust have a `DatetimeIndex` when using string offset for `before` and `after`r   z*before and after must be positive integersr   r>  N)r   r  r  r  r  r  r   r   r   r  r   r  r	   r   rj   r   r   r   r   MapOverlapAlignrz  
MapOverlap)r  rw  r  r  rQ  r  r  r   r  r   r   r  r  r  s                 r   r  r  G  s   @ &#(%&&(,,-E8CUCU1Vb)$HH33AAMHH22M&}5+  	68$aKuh'QJEFF	RD	!!	$Bd#DdTk=b:b)#<r==s8a< 3 3S 9!$$$'#$JJw-  " 


7D! 
H (##C >s   7G'G'c                t    t        | t              r| j                         S t        |       j                         S r   )r   r   rq  r  )r   s    r   rq  rq    s,    #y!xxz3$$&&r   )rQ  r/  r  c          	     R   t        |      }|D cg c]  }t        |t              s| }}t        |      dk  st	        j
                  | r't        t	        j                  |d   | |||g|       }n%t        t	        j                  |d   | ||g|       }t        ||      S c c}w )a  Elementwise operation for Dask dataframes

    Parameters
    ----------
    op: callable
        Function to apply across input dataframes
    *args: DataFrames, Series, Scalars, Arrays,
        The arguments of the operation
    meta: pd.DataFrame, pd.Series (optional)
        Valid metadata for the operation.  Will evaluate on a small piece of
        data if not provided.
    transform_divisions: boolean
        If the input is a ``dask.dataframe.Index`` we normally will also apply
        the function onto the divisions and apply those transformed divisions
        to the output.  You can pass ``transform_divisions=False`` to override
        this behavior
    out : ``dask.array`` or ``None``
        If out is a dask.DataFrame, dask.Series or dask.Scalar then
        this overwrites the contents of it with the result
    **kwargs: scalars

    Examples
    --------
    >>> elemwise(operator.add, df.x, df.y)  # doctest: +SKIP
    r   r   )
rj   r   r   r   r   r   r   UFuncElemwise
UFuncAlign
handle_out)	r   rQ  r/  r  r   r   rw  r  r   s	            r   r~  r~    s    6 d#D
:"
2y 92
:C
:
3x1}++S1s1vr41DfTtT
  AD& P4 PQc6"" ;s
   B$B$c           
        t        | t              r/t        |       dk(  r| d   } nt        |       dkD  rt        d      d} | N| j                  |j                  k7  r5t        dt        t        |             dt        t        |                  t        | t              rtt        | j                        t        |j                        k7  rIt        dt        t        | j                              dt        t        |j                                    t        | t        t        t        f      r|j                  | _        y| 8dt        t        |             d	t        t        |            d
}t        |      |S )a1  Handle out parameters

    If out is a dask.DataFrame, dask.Series or dask.Scalar then
    this overwrites the contents of it with the result. The method
    replaces the expression of the out parameter with the result
    from this operation to perform something akin to an inplace
    modification.
    r   r   z4The `out` parameter with length > 1 is not supportedNz7Mismatched types between result and out parameter. out=z	, result=z?Mismatched columns count between result and out parameter. out=z8The out parameter is not fully supported. Received type z, expected  )r   r  r   r   r   r  r  r
  r   r   r   r   r3  r4   r   )r/  r   r   s      r   r  r    s6    #us8q=a&CX\%F  C
3==F,<,<<#&tCy>3tF|3DF
 	

 #y!s{{s6>>22'*3s{{+;'<c#fnnBU>VX 
 #	623LL		
 c#f&	 	 "#&&r   c                   | j                  t        j                  |       }| j                  t        j                  |       }| j                  t        |       }t        |||      \  }}}|j                         j                         }|j                         j                         }t        ||      D cg c]  \  }}|dk7  s| }}}t        ||      D cg c]  \  }}|dk7  s| }}}t        |      |k7  st        |      |k7  rFt        d| j                  xs d dd| j                  xs d dt        t        |||                   |sdt        d t        |d	d
 |d
d       D              rBt        j                  d| j                  xs d dt        t        |||             t                t#        j                  |      }|s|||fS |||fS c c}}w c c}}w )zFor a given column, compute the min, max, and len of each partition.

    And make sure that the partitions are sorted relative to each other.
    NOTE: this does not guarantee that every partition is internally sorted.
    r  r   z'Partitions are not sorted ascending by z	the indexz. z.In your dataset the (min, max, len) values of z for each partition are: c              3  ,   K   | ]  \  }}||k    y wr   r   )r   r+  r%  s      r   r   z+_compute_partition_stats.<locals>.<genexpr>?  s      !1aQ!s   r   Nr  zPartitions have overlapping values, so divisions are non-unique. Use `set_index(sorted=True)` with no `divisions` to allow dask to fix the overlap. In your dataset the (min, max, len) values of z for each partition are : )r  r   r  rW  r   r   r  r  r  rO  r   r   r  r   r   r   r  r  )	r  r  r  r  r  mr  non_empty_minsnon_empty_maxess	            r   r  r  &  s      V 4D!!!%%f!5E  6 2DeT2D%::< DKKM  "E),T4HIAvFaKaHNH*-eT*:JYQfkqJOJ~.0/"o55fkk6P[5QQST<V[[=WK<X Y''+CeT,B'C&DF
 	

 S !~ab1?3B3GH!  	==C[[=WK<X Y((,Sud-C(D'EG 	
 >>$DeT""663 IJs   6GGG(Gc                ,    t        | j                        S r   )rp   r   )r  s    r   get_parallel_type_framer  P  s    QWW%%r   r   r  r*  )iP  NNr  )r  r   )NNNNNNNFFNr  rd  Nr  fsspecNN)r   rk  FFFr  r  )NNNFFNNNr  NTr  r#  r  )r  NN)Nr  )r  r   r  r/  r  ztuple[list, list, list[int]])
__future__r   r  rZ  r  r   r  r   r   r   r   r   numbersr	   r
   typingr   r   r   numpyr  r  r  r  r  fsspec.utilsr   packaging.versionr   r  r   r   r   r   r   r   r  r   pandas.core.dtypes.commonr   r   r  tlzr   r  r'  r   r  r=  dask.dataframe.methods	dataframer  r   r   dask._collectionsr   
dask._exprr   dask._task_specr   r    r!   	dask.baser"   r#   r$   	dask.corer%   dask.dataframe._compatr&   r'   r(   dask.dataframe.accessorr)   dask.dataframe.corer*   r+   r,   r-   r.   r/   r0   r1   r2   dask.dataframe.dask_exprr4   r   r6   %dask.dataframe.dask_expr._categoricalr7   r8   r9    dask.dataframe.dask_expr._concatr:   "dask.dataframe.dask_expr._datetimer;   "dask.dataframe.dask_expr._describer<   r=   dask.dataframe.dask_expr._exprr>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   dask.dataframe.dask_expr._mergerJ   rK   "dask.dataframe.dask_expr._quantilerL   #dask.dataframe.dask_expr._quantilesrM   $dask.dataframe.dask_expr._reductionsrN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   %dask.dataframe.dask_expr._repartitionr^   r_   !dask.dataframe.dask_expr._shuffler`   ra   rb   rc   &dask.dataframe.dask_expr._str_accessorrd   dask.dataframe.dask_expr._utilre   rf   rg   rh   ri   rj   rk   rl   rm   r  rn   ro   dask.dataframe.dispatchrp   rq   rr   rs   dask.dataframe.multirt   dask.dataframe.utilsru   rv   rw   rx   ry   rz   r{   dask.delayedr|   r}   dask.tokenizer~   
dask.utilsr   r   r   r   r   r   r   r   r   r   r   r   dask.widgetsr   r   r   r   r   r   r   r  r   setattrpartialmethodr   r   r   r  r  r3  r-  r  r  rN  register_inplacer  r   r  r/  r  r  r  r  r4  r=  r  r  r  rj  r  r  rq  r~  r  r  registerr  r   r   r   <module>r5     s-   "     A A  $ ) )    ' 4 # U U 6 1 >    ) ( ( ) , % )  L L  O O 2
 
 
 3 J 
 4 ? R    A = D    $ O  B
 
 
 I  5	 	 	 * 1    & $( 	G2\~4  ]$  ]$@ID  JB@ Ir2922=RHIA JD PB
 Ir29223F2NOPu	 up+KBY KB\ DD& y$'--'It7iHIvt$*d*$FD4T6BC/D4WIF WItDY DDE`F0"f .--h7 
1 81h1Gh .--h7	
7#o 8oh	 
	"@J ( rxx 	Y Yx r}} R< R<p 
	
[|(UV4n b:,' (< r~~K K< rL L>  
U$ U$p  
~$ ~$B' (Tt ##L-b +0'7'7#''7!'7T I&& '&r   