
    bi,=                       d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z&  ejN                  ded         Z( ejR                  dd       d(dZ*d Z+d Z,d Z-d Z.d(dZ/d(dZ0d)dZ1d Z2d Z3d*dZ4d+dZ5d,dZ6d,dZ7ddde$fd Z8d! Z9d" Z:d# Z;d$ Z<d-d%Z=d& Z>d.d'Z?y)/    )annotationsN)Iterator)firstunique)core)named_schedulers)methods)PANDAS_GE_300)get_parallel_type)	check_matching_columnshas_known_categoriesis_dataframe_likeis_index_like	is_scalaris_series_likemeta_frame_constructormeta_series_constructorvalid_divisions)
no_default)Mthreadssynczcompute.use_numexprFc                   | s| S t        t        t        j                  |             t        j
                        rt        j                  j                  |       S t        | d         s	 t        j                  |       S | D cg c]  }t        |      s| }}|s| d   S t        j                  |d|      S # t        $ r | cY S w xY wc c}w )Nr   T)uniformignore_index)
isinstancer   r   flattennpndarraydaconcatenate3has_parallel_typepdSeries	Exceptionlenr	   concat)argsr   iargs2s       N/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/core.py_concatr,   #   s    %T*+RZZ8ww##D))T!W%	99T?" '1AQ'E'  	Q ^^E4lK  	K	 (s   )B8 C	C	8CCc           	         t        j                  dt        |       |dz         j                  t              }t        |      D ci c]  }|| j                  ||   ||dz        c}S c c}w )z*Split dataframe into k roughly equal partsr      )r   linspacer&   astypeintrangeiloc)dfk	divisionsr)   s       r+   split_evenlyr7   9   s\    As2wA.55c:IAFqJAArwwy|iA&677JJJs    A'c                   ddl m} | r|d   j                  }nt        d |D        t              }|rt        |d   |      rt	        |      dk(  r	  ||D cg c]*  }||d   u rt        j                  |j                        n|, c}i |}t        |t        j                        rt        j                  |      }t        |      sdg|d   j                  dz   z  }|S |S c c}w # t        $ r Y |S w xY w)zL
    Helper to get divisions for map_partitions and map_overlap output.
    r   )Indexc              3  4   K   | ]  }|j                     y wN)r6   ).0ds     r+   	<genexpr>z0_get_divisions_map_partitions.<locals>.<genexpr>L   s     22s   )keyr.   N)dask.dataframer9   r6   maxr&   r   r#   r	   tolistr   npartitionsr%   )	align_dataframestransform_divisionsdfsfuncr(   kwargsr9   r6   as	            r+   _get_divisions_map_partitionsrJ   ?   s     %F$$	 2c2<	z#a&%8SX]
	>GKL!1A;"((1;;'A=LPVI )RXX.#NN95	 #9-!Fc!f&8&81&<=	9 M  	 	s$   C& /C!6C& !C& &	C32C3c                 T   |j                  d      }|j                  d      } || i |}t        d ||fD              rit        |      rt        j                  |      }t        |      s|S t        |      rt        ||       |j                  }n|j                  }t        ||      S |S )zsApply a function, and enforce the output to match meta

    Ensures the output has the same columns, even if empty._func_metac              3  x   K   | ]2  }t        t        |      xs t        |      xs t        |             4 y wr;   )boolr   r   r   )r<   objs     r+   r>   z$apply_and_enforce.<locals>.<genexpr>d   s7       	s#P~c':PmC>PQs   8:)popanyr   r#   r$   r&   r   r   columnsname_rename)r(   rH   rG   metar4   cs         r+   apply_and_enforcerX   \   s     ::gD::gD	t	v	B
 :  R=2B2wKR "4,A		Aq"~I    c                   | t         u r|S t        | t              rt        |       } t	        |      rt	        |       r| j
                  } t        | t        j                        st        j                  |       } t        |       t        |j
                        k(  r`t        |       t        |j
                        u r@| j                  |j
                  j                  k(  r| j                  |j
                        r|S |j                  d      }| |_        |S t        |      st        |      rDt        |       st        |       r| j                  } |j                  | k(  r|S |j!                  |       S |S )au  
    Rename columns of pd.DataFrame or name of pd.Series.
    Not for dd.DataFrame or dd.Series.

    Parameters
    ----------
    columns : tuple, string, pd.DataFrame or pd.Series
        Column names, Series name or pandas instance which has the
        target column names / name.
    df : pd.DataFrame or pd.Series
        target DataFrame / Series to be renamed
    F)deep)r   r   r   listr   rS   r#   r9   r&   typedtypeequalscopyr   r   rT   rename)rS   r4   s     r+   rU   rU   u   s    *	'8$w-W%ooG'288,hhw'GLC

O+Wbjj!11!1!11rzz* IWW%W 
			}R0'"mG&<llG77gIyy!!IrY   c                   | j                   d   | j                   d   f}t        ri nddi} | j                  di |} t        j                  | j
                  |      }t        j                  | j
                  |      }t        t        | j                              D ]c  }| j                  dd|f   j                         }| |   j                         j
                  ||<   | |   j                         j
                  ||<   e | j                   d   dk(  r%t        j                  |t        j                        }n| j                         j
                  }d|j                   fd|j                   fd	|j                   fg}	|r<t#        j$                  d
      5  t#        j&                  d       ||z  j(                  }
ddd       t        j                  | j
                  |      }| j+                         j
                  }t        t        | j                              D ]t  }t        j,                  | j                  dd|f   j
                  dddf   
|   dddf         dz  }t        j                  ||<   t        j.                  |d      ||<   v |j(                  }|	j1                  d|j                   f       ||||dz
  z  d}|r|d<   |S # 1 sw Y   xY w)z5Chunk part of a covariance or correlation computationr.   r`   F)shapeNr   sumcountcovT)recordalways   axismrd   re   rf   )float64)rc   r
   r0   r   
zeros_likevaluesr2   r&   rS   r3   notnullrd   re   	full_likenanrf   r^   warningscatch_warningssimplefilterTisnullsubtractnansumappend)r4   corrrc   rH   sumscountsidxmaskrf   r^   murl   mu_discrepancyouts                 r+   _cov_corr_chunkr      s[   XXa["((1+&E RvuoF		'	'B==%0D]]299E2FS_% .wwq#v&&(tHLLN))S	hnn&--s.
 
xx{all4(ffhooTZZ 7FLL"9E399;MNE$$D1 	#!!(+-""B	# MM"))51yy{!!RZZ) 	7C BGGAsFO221d7;RWT1W=MNRSS  $&66N4 YY~A6AcF	7 CCc177^$vz0B
CCCJ%	# 	#s   %KKc                   d d d d}|rd |d<   |j                         D ]b  }| D cg c]  }||   	 c}||<   t        j                  ||         j                  t	        ||         f||   d   j
                  z         ||<   d t        j                  |d         }|d   }t        j                  |d      }t        j                  |d      }|d d }	|dd  }
|d d }|dd  }t        j                  d	      5  |
|z  |	|z  z
  }t        j                  ||z  ||z   z  ||j                  d
      z  z  d      t        j                  |d   d      z   }d d d        |d   |d   d}|rt        j                  |d   |d   t        j                        }|d   |z  }t        j                  ||t        j                        }t        j                  |d   |||z  |z
  dz  z  z   d      }||d<   |S c c}w # 1 sw Y   xY w)Nrm   rl   r   rd   re   r.   ignore)invalid)r   ri   r.   rf   ri   rj   )keysr   concatenatereshaper&   rc   
nan_to_numcumsumerrstaterz   	transposewherers   )data_inr|   datar5   r=   r}   r~   cum_sums
cum_countss1s2n1n2Cr   nobsr   	counts_narl   s                      r+   _cov_corr_combiner      s   $t4DS	YY[ V!()A1Q4)Q..a)113tAw</DGAJDTDT2TUQV ==e%D']Fyyq!H61%J	#2B	abB	CRB	B	X	& &"Wb!II"Wb!QY)?%?@!
IId5k1%&& 2,B
BCxx
2
2?b\D HHVVRVV4	IId3i&D9,<r,Aa+G"GGaPCJ7 *& &s   G.6AG33G<c                   t        | |      }|d   }|d   }t        j                  |||k  <   |r(|d   }	t        j                  |	|	j                  z        }
n(t        j
                  ||t        j                        dz
  }
t        j                  dd      5  ||
z  }d d d        |rt        d         S  |t        j                  n
t        |      ||      S # 1 sw Y   ?xY w)	Nre   rf   rl   r.   r   )r   divide)r   r.   )rS   index)r   r   rs   sqrtrw   r   r   floatr#   	DataFramer   )r   colsmin_periodsr|   scalarlike_dfr   r~   r   m2denmats               r+   _cov_corr_aggr      s    
D$
'C\FE
A ffAf{Xggb244i hhvvrvv.2	Xh	7 #gSYPGOBLL1G1PT 	 s   CC#c           
     8   t        | t        t        f      st        d      t        |       } t	        |       dk(  rt        d      | t        |       k7  rt        d      t	        | d d       t	        t        t        | d d                   k7  rd}t        |      y )Nz"New division must be list or tupler   zNew division must not be emptyzNew division must be sortedr   z8New division must be unique, except for the last element)r   r\   tuple
ValueErrorr&   sortedr   )r6   msgs     r+   check_divisionsr     s    i$/=>>YI
9~9::F9%%677
9Sb>c$vin'=">??Ho @rY   c                   t        | t              s| S t        j                  j                  j                  |       }t        |      j                  }|j                  d      s| S |dt        d        dz   }	 t        t        j                  j                  |      }d| v r| j                  d      \  }}d|z   }nd}|j                  dk7  rt        |j                        nd}| |j                   | S # t        $ r | cY S w xY w)a  Ensure that the frequency pertains to the **start** of a period.

    If e.g. `freq='M'`, then the divisions are:
        - 2021-31-1 00:00:00 (start of February partition)
        - 2021-2-28 00:00:00 (start of March partition)
        - ...

    but this **should** be:
        - 2021-2-1 00:00:00 (start of February partition)
        - 2021-3-1 00:00:00 (start of March partition)
        - ...

    Therefore, we map `freq='M'` to `freq='MS'` (same for quarter and year).
    EndNBegin- r.   )r   strr#   tseriesfrequencies	to_offsetr]   __name__endswithr&   getattroffsetssplitn_prefixAttributeError)freqoffsetoffset_type_name
new_offsetnew_offset_type_anchorr   s           r+   _map_freq_to_period_startr     s      dC ZZ##--d3FF|,,$$U+!-SZK07:J
!"**"4"4jA$;

3IAv6\FF#XX]CM_,,-fX66 s   7A;C3 3D DTc                b    | j                  ||      }t        |      r|j                         }|S )N)r   r[   )memory_usager   rd   )r4   r   r[   	mem_usages       r+   total_mem_usager   5  s-    e$7Ii MMO	rY   c                8   t        |       ri nd|i}|dk(  rdnd}t        |       dkD  r) t        | |      d
d|i|} t        | |      d
d|i|}n t        |       g d      x}}t        |      r t	        |       ||d	      S  t	        |       |g|gd	      S )Nnumeric_onlyidxmaxrA   minr   skipnai8r^   r   value )r   r&   r   r   r   )xfnr   r   numeric_only_kwargsminmaxr   r   s           r+   idxmaxmin_chunkr   <  s     .q 1"7UHnU%F
1vzganBFB.AB"6"H&H4GH0-a04@@ec(%a(u)EFF$!!$cUeW%EFFrY   c                   |dk(  rdnd}t        |       dkD  rT| j                  d      } | j                  j                         } t	        ||      |      g} t	        ||      |      g}n t        |       g d      x}} t        |        t        |       || j                  j                         t        |       || j                  j                  d         d	      S )
Nr   rA   r   r   r   r   r   r   r   )r&   	set_indexr   infer_objectsr   r   r   r   r^   dtypesr3   )r   r   r   r   r   r   s         r+   idxmaxmin_rowr   I  s    HnU%F
1vzKK%%'!wub!01''v670-a04@@e$!!$-*1-cG/,Q/QXX]]1=MN	
 rY   c                    t        |       dk  r| S | j                  d      j                  t        ||      j	                  dd      S )Nr.   r   )level)r   r   T)r   drop)r&   groupbyapplyr   reset_index)r   r   r   s      r+   idxmaxmin_combiner   [  sC    
1v{					}F	3	14	(rY   c                v    t        | ||      d   }t        |      dk(  rt        d      |r|d   S d |_        |S )Nr   r   r   z*attempt to get argmax of an empty sequence)r   r&   r   rT   )r   r   r   r   r   ress         r+   idxmaxmin_aggr   e  sD    
Ar&
1%
8C
3x1}EFF1vCHJrY   c                    | j                         }|j                  |      }|||k(     j                  j                         j	                         j                  d      }|S )Nr   T)r   )rd   rA   r   	to_seriessort_valuesr   )r4   dropnavalue_count_seriesmax_valmode_seriess        r+   _mode_aggregater   o  sZ     $$F$3G-89	yy{		$		  rY   c                    t        j                  | |      }t        |      |k7  r%t        j                  d| dt        |       d       |S )Nz"Insufficient elements for `head`. z elements requested, only z@ elements available. Try passing larger `npartitions` to `head`.)r   headr&   rt   warn)r4   r   rs      r+   	safe_headr   {  sO    	r1A
1v{03McRSfX VN N	
 HrY   c                   t        |      dz
  }t        | j                  t        j                        rt        |       rd}nd}nt        | j                        }t        j                  |gdg|z  z   || j                        S )z1A helper for creating the ``_repr_data`` propertyr.   zcategory[known]zcategory[unknown]z...)r   rT   )	r&   r   r^   r#   CategoricalDtyper   r   r$   rT   )sr   rC   r^   s       r+   _repr_data_seriesr     sg    e*q.K!''2../"%E'EAGG99eWw44EOOrY   c                (    ddl m} t        |       |uS )z2Does this object have a dask dataframe equivalent?r   )Scalar)$dask.dataframe.dask_expr._collectionr   r   )r   r   s     r+   r"   r"     s    ;Qv--rY   c                P   t        |       rE| j                  j                         j                         D ci c]  \  }}|t	        |       }}}n/t        |       r"| j                  t	        | j                        f}nd}d| d}|r|d| d| dt	        |      z  z  }|S c c}}w )zS
    Provide an informative message when the user is asked to provide metadata
    Na  
You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.
To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the z function that you are using.z
  Before: .z(func)
  After:  .z(func, meta=%s)
)r   r   to_dictitemsr   r   rT   r^   )r4   methodr5   vmeta_strr   s         r+   meta_warningr    s     *,))*;*;*=*C*C*EF$!QAs1vIFF		GGS]+	C DJ( K		    " !2469(mD	

 J' Gs   B"c                    |r| j                         j                  d      S | j                  d      j                  | j                         t        j
                        S )Nr   )r   r0   r   rx   r   rs   )seriesr   s     r+   _convert_to_numericr
    sD    }}%%d++ ==##FMMORVV<<rY   c                   |dk(  rt        j                         5  t        j                  dt        d       |j	                  dd       }t        j                  t        j                  | g|d|i||      }|||j                  |      }|cd d d        S |d   |d	   }}t        j                  |       }	|sMt        j                  |	|j                  d            }|j                  d      |j                  |d         }|S |	j                  j                  |      }
|	|
   }t!        |      d
kD  r|	j                  t"              }	|j                  d      }|d gt!        |      z  }t%        |||      D ]5  \  }}}t        j                  ||      }||j                  |      }||	|<   7 ||	j                  |      }	|	S # 1 sw Y   7xY w)Nr.   r   z!invalid value encountered in cast)categorymessageunitrk   )r  
is_df_like	time_colsr   units)rt   ru   filterwarningsRuntimeWarningrQ   r#   to_timedeltar   stdr0   r   r   getas_unitr   isinr&   objectzip)	partitionrk   r^   r(   rH   r  resultr  r  r   time_col_maskmatching_valsr  time_colmatching_vals                  r+   _sqrt_and_convert_to_timedeltar!    s   qy$$& 	##';
 ::fd+D__i<$<T<V<4F E$5u-	 	 #<0&2E	J779DFJJv,>?::f)^^F6N3FJJOOI.M'M
9~{{6"JJwE}Y'(+I}e(L  $,D9^^D)FX	  {{5!KQ	 	s   A5GG)F)ri   FFN)TF)NTF)NT)r   r;   )@
__future__r   rt   collections.abcr   numpyr   pandasr#   tlzr   r   
dask.arrayarrayr    daskr   	dask.baser   r@   r	   dask.dataframe._compatr
   dask.dataframe.dispatchr   dask.dataframe.utilsr   r   r   r   r   r   r   r   r   dask.typingr   
dask.utilsr   r  DEFAULT_GET
set_optionr,   r7   rJ   rX   rU   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r  r
  r!  r   rY   r+   <module>r2     s    "  $      & " 0 5
 
 
 # """9.>v.FG #U +,K:2+\$N!H&
$N
G$ T%j 	
P.6=*rY   