
    bikD                    T   U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ d d	lmZ g d
Zdddifdddifdddifdi fdddifdZded<   e	 G d d             Ze	 G d d             Ze	 G d d             Z e	 G d d             Z!d?dZ"de#ddf	 	 	 	 	 	 	 	 	 	 	 d@d Z$g d!Z%dAdBd"Z&dCd#Z'dDd$Z(e)e"e#e$e*e'e+e'd%e'd&e'd'e(d(e$d)e$d*e$d+e$d,e"d-e"d.e"d/e"iZ,d0ed1<    G d2 d3e      Z-d4 Z.dEd5Z/dFd6Z0erd7nd8Z1d9d:dd;d<e1 dfd=Z2dGdHd>Z3y)I    )annotationsN)Callable)asdict	dataclassfield)Anycast)PANDAS_GE_220PANDAS_GE_300)is_object_string_dtype)DataFrameIOFunction)random_state_data)make_timeseries	with_spec
ColumnSpecRangeIndexSpecDatetimeIndexSpecDatasetSpec lam  scalehigh)r   g      ?)r   )poissonnormaluniformbinomialrandomz1dict[str, tuple[tuple[Any, ...], dict[str, Any]]]default_int_argsc                      e Zd ZU dZdZded<   	 dZded<   	 dZded	<   	 dZd
ed<   	  e	e
      Zded<   	 dZd
ed<   	 dZd
ed<   	 dZd
ed<   	 dZded<   	 dZded<   	  e	e      Zded<   	  e	e      Zded<   y)r   a  Encapsulates properties of a family of columns with the same dtype.
    Different method can be specified for integer dtype ("poisson", "uniform",
    "binomial", etc.)

    Notes
    -----
    This API is still experimental, and will likely change in the futureN
str | Noneprefixzstr | type | Nonedtype   intnumber
int | Nonenuniquedefault_factorylistchoiceslowr   lengthFboolr   methodtuple[Any, ...]argszdict[str, Any]kwargs)__name__
__module____qualname____doc__r"   __annotations__r#   r&   r(   r   r+   r,   r-   r   r.   r   r0   tupler2   dictr3   r       Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/io/demo.pyr   r   $   s    L FJE#E#6FCO[ GZI$/GT/AC D*.FJYFD FJJ "%8D/8&"48FN82r;   r   c                  0    e Zd ZU dZeZded<   	 dZded<   y)r   zProperties of the dataframe RangeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the future
str | typer#   r$   r%   stepN)r4   r5   r6   r7   r%   r#   r8   r?   r   r;   r<   r   r   W   s#    L E:D#Mr;   r   c                  P    e Zd ZU dZeZded<   	 dZded<   	 dZded	<   	 dZ	ded
<   y)r   zProperties of the dataframe DatetimeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the futurer>   r#   Nr!   start1Hstrfreqpartition_freq)
r4   r5   r6   r7   r%   r#   r8   rA   rD   rE   r   r;   r<   r   r   f   s>    L E:E:"D#4!%NJ%0r;   r   c                  l    e Zd ZU dZdZded<   	 dZded<   	  ee      Z	ded	<   	  ee
      Zd
ed<   y)r   zDefines a dataset with random data, such as which columns and data types to generate

    Notes
    -----
    This API is still experimental, and will likely change in the futurer$   r%   npartitionsr   nrecordsr)   z"RangeIndexSpec | DatetimeIndexSpec
index_speczlist[ColumnSpec]column_specsN)r4   r5   r6   r7   rG   r8   rH   r   r   rI   r+   rJ   r   r;   r<   r   r   {   sU    L K& Hc-5:&6J2  "%*4%@L"@$r;   r   Fc                    |j                  dd        |j                  dd        |r |j                  dd| i|S |j                  |       dz  dz
  S )Nr#   r2   size   r$   r   )popr   rand)nrstater   r3   s       r<   
make_floatrR      sR    
JJw
JJvtv}}.!.v..;;q>A!!r;   r   c                    fd}|r' |d      \  }}	d|	v rd} |j                   |d| i|	}
|
S t        |t              r# ||      \  }}	t        ||      } ||d| i|	}
|
S  ||| d}
|
S )Nc                    t         j                  | di f      \  }}|j                         } |j                  di  rn|}||fS )Nr   )r   getcopyupdate)_methodhandler_argshandler_kwargsr2   r3   s      r<   _with_defaultsz make_int.<locals>._with_defaults   sS    '7';';Gb"X'N$n',,.''#t^++r;   r   r-   r   rL   )staterL   )randint
isinstancerC   getattr)rP   rQ   r   r#   r0   r2   r3   r[   rY   rZ   datahandlers        ``     r<   make_intrb      s    , '5h'?$nN"Lv~~|F!F~F K fc"+9&+A(L.ff-GLCqCNCD K 4vA@@DKr;   )AliceBobCharlieDanEdithFrankGeorgeHannahIngridJerryKevinLauraMichaelNorbertOliverPatriciaQuinnRaySarahTimUrsulaVictorWendyXavierYvonneZeldac           	         t        t        j                  t        j                  z   t        j                  z   dz         }t        |       D cg c]$  }dj                  |j                  ||            & c}S c c}w )N  rL   )r+   stringascii_lettersdigitspunctuationrangejoinchoice)rP   rQ   r.   r,   _s        r<   make_random_stringr      sY    6''&--7&:L:LLsRSGBG(KQBGGFMM'M78KKKs   )A8c                    |j                  dd        |rt        | ||      S |xs t        }|j                  ||       S )Nr2   )r.   r   )rN   r   namesr   )rP   rQ   r,   r   r.   r3   s         r<   make_stringr      s?    
JJvt!!VF;;G==q=))r;   c                N   |j                  dd        |Ht        t        |            }t        |      D cg c]  }t        |dz         j	                  |      ! }}n
|xs t
        }t        j                  j                  |j                  dt        |      |       |      S c c}w )Nr2   r$   r   r   )
rN   lenrC   r   zfillr   pdCategorical
from_codesr]   )rP   rQ   r,   r(   r3   cat_lenxs          r<   make_categoricalr      s    
JJvtc'l#6;GnE3q1u:##G,EE"U>>$$V^^As7|!^%LgVV Fs   $B"zstring[python]zstring[pyarrow]categoryint8int16int32int64float8float16float32float64zdict[type | str, Callable]makec                  4    e Zd ZdZddZed        Zd Zd Zy)MakeDataframePartzU
    Wrapper Class for ``make_dataframe_part``
    Makes a timeseries partition.
    Nc                r    || _         |xs t        |j                               | _        || _        || _        y N)index_dtyper+   keys_columnsdtypesr3   )selfr   r   r3   columnss        r<   __init__zMakeDataframePart.__init__
  s/    &64#6r;   c                    | j                   S r   )r   )r   s    r<   r   zMakeDataframePart.columns  s    }}r;   c                ~    || j                   k(  r| S t        | j                  | j                  | j                  |      S )zUReturn a new MakeTimeseriesPart object with
        a sub-column projection.
        r   )r   r   r   r   r3   )r   r   s     r<   project_columnsz!MakeDataframePart.project_columns  s<     dll"K KKKK	
 	
r;   c           	         |\  }}t        | j                  |d   |d   | j                  | j                  || j                        S )Nr   r$   )make_dataframe_partr   r   r   r3   )r   part	divisions
state_datas       r<   __call__zMakeDataframePart.__call__!  sH     $	:"aLaLKKLLKK
 	
r;   r   )	r4   r5   r6   r7   r   propertyr   r   r   r   r;   r<   r   r     s*    
  


r;   r   c                T   t         j                  j                  |      }t        j                  j
                  j                  |       r)t        j                  |||j                  d      d      }nst        j                  j
                  j                  |       r<|j                  d      }	t        j                  |||	z   |	      j                  |       }nt        d|        t        |||||      }
|
j                  d   |k\  r"|
j                  d d }
|
j                  d   |k\  r"|
S )NrD   	timestamp)rA   endrD   name)rA   stopr?   zUnhandled index dtype: )npr   RandomStater   apitypesis_datetime64_any_dtype
date_rangerU   is_integer_dtype
RangeIndexastype	TypeErrormake_partitionindexiloc)r   rA   r   r   r   r   r3   r\   r   r?   dfs              r<   r   r   .  s    II!!*-E	vv||++K8Svzz&'9
 
	&	&{	3zz&!Ed
FMM
 1+?@@		>B
((2,#
WWSb\ ((2,#
Ir;   c                    t         j                  j                  j                  | |      xs t	        |       xr t	        |      S )zOSame as pandas.api.types.is_dtype_equal, but also returns True for str / object)r   r   r   is_dtype_equalr   )abs     r<   same_astyper   B  s7    66<<&&q!, q!?&<Q&?r;   c           	     &   i }|j                         D ]v  \  }}|j                         D 	ci c]3  \  }}	|j                  dd      d   |k(  r|j                  dd      d   |	5 }
}}	t        |   t        |      |fi |
}|| v sr|||<   x t	        j
                  |||       }|j                         D 	ci c]%  \  }}	|| v rt        |	||   j                        s||	' }}}	|rt        ri nddi} |j                  |fi |}|S c c}	}w c c}	}w )Nr   r$   r   )r   r   rV   F)
itemsrsplitr   r   r   	DataFramer   r#   r   r   )r   r   r   r3   r\   r`   kdtkkvkwsresultr   update_dtypess                 r<   r   r   I  s0   D 2  
Ayya #q( IIc1a !#
 
 b#e*e3s3<DG 
d%	9B LLNAq<Ar!u{{ ; 	
1M 
 $65/RYY}//I+
s   8D7*DMEMz
2000-01-01z
2000-12-3110s1c           
     ,    ddl m}  |d| |||||d|S )aS  Create timeseries dataframe with random data

    Parameters
    ----------
    start: datetime (or datetime-like string)
        Start of time series
    end: datetime (or datetime-like string)
        End of time series
    dtypes: dict (optional)
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq: string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq: string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed: int (optional)
        Randomstate seed
    kwargs:
        Keywords to pass down to individual column creation functions.
        Keywords should be prefixed by the column name and then an underscore.

    Examples
    --------
    >>> import dask.dataframe as dd
    >>> df = dd.demo.make_timeseries('2000', '2010',
    ...                              {'value': float, 'name': str, 'id': int},
    ...                              freq='2h', partition_freq='1D', seed=1)
    >>> df.head()  # doctest: +SKIP
                           id      name     value
    2000-01-01 00:00:00   969     Jerry -0.309014
    2000-01-01 02:00:00  1010       Ray -0.760675
    2000-01-01 04:00:00  1016  Patricia -0.063261
    2000-01-01 06:00:00   960   Charlie  0.788245
    2000-01-01 08:00:00  1031     Kevin  0.466002
    r   )
timeseries)rA   r   r   rD   rE   seedr   )!dask.dataframe.dask_expr.datasetsr   )rA   r   r   rD   rE   r   r3   r   s           r<   r   r   g  s6    X = %  r;   c                	   t        | j                        dk(  rFt        ddddd      t        dt        d      t        d	d
g d      t        dt              g| _        g }i }t        | j                  t              rt        j                  | j                  j                        }| j                  j                  }| j                  j                  }t        j                  | j                  j                        | j                  t        j                  |      z  z   }t        t        j                   |||            }|d   |k  r|j#                  |       ||t        j                  |      z   }
}	nt        | j                  t$              r| j                  j&                  }| j                  |z  | j(                  z  }| j                  |z  dz
  }t        t        j*                  d||            }|d   |dz   k  r|j#                  |dz          d|}
}	nt-        d| j                         d|i}| j                  D ]{  }|j.                  r|j.                  }nt        |j0                  t              r2t3        j4                  dd|j0                         j7                  d      }nCt9        |j0                  d      r|j0                  j:                  }n|j0                  j<                  }t?        |j@                        D ]  }|dz   }| | x}|v r|dz   }| | x}|v r|j#                  |       |j0                  ||<   |jC                  tE        |      jG                         D ci c]  \  }}|dvr|dg fvr| d| | c}}       |jH                  jG                         D ]  \  }}||| d| <     ~ t        |      dz
  }|@tK        t        tL           tN        jP                  jS                  tU        d      |            }ntW        ||      }t?        |      D cg c]  }|||dz    ||   f }}ddl,m-} i }t]        j^                         5  t]        j`                  ddtb                 |te        | j                  j0                  |||!      |ftg        | j                  j0                  |	|
|||d   |      |d"d#d$|cddd       S c c}}w c c}w # 1 sw Y   yxY w)%aL  Generate a random dataset according to provided spec

    Parameters
    ----------
    spec : DatasetSpec
        Specify all the parameters of the dataset
    seed: int (optional)
        Randomstate seed

    Notes
    -----
    This API is still experimental, and will likely change in the future

    Examples
    --------
    >>> from dask.dataframe.io.demo import ColumnSpec, DatasetSpec, with_spec
    >>> ddf = with_spec(
    ...     DatasetSpec(
    ...         npartitions=10,
    ...         nrecords=10_000,
    ...         column_specs=[
    ...             ColumnSpec(dtype=int, number=2, prefix="p"),
    ...             ColumnSpec(dtype=int, number=2, prefix="n", method="normal"),
    ...             ColumnSpec(dtype=float, number=2, prefix="f"),
    ...             ColumnSpec(dtype=str, prefix="s", number=2, random=True, length=10),
    ...             ColumnSpec(dtype="category", prefix="c", choices=["Y", "N"]),
    ...         ],
    ...     ), seed=42)
    >>> ddf.head(10)  # doctest: +SKIP
         p1    p2    n1    n2        f1        f2          s1          s2 c1
    0  1002   972  -811    20  0.640846 -0.176875  L#h98#}J`?  _8C607/:6e  N
    1   985   982 -1663  -777  0.790257  0.792796  u:XI3,omoZ  w~@ /d)'-@  N
    2   947   970   799  -269  0.740869 -0.118413  O$dnwCuq\  !WtSe+(;#9  Y
    3  1003   983  1133   521 -0.987459  0.278154  j+Qr_2{XG&  &XV7cy$y1T  Y
    4  1017  1049   826     5 -0.875667 -0.744359  bJ3E-{:o  {+jC).?vK+  Y
    5   984  1017  -492  -399  0.748181  0.293761  ~zUNHNgD"!  yuEkXeVot|  Y
    6   992  1027  -856    67 -0.125132 -0.234529  j.7z;o]Gc9  g|Fi5*}Y92  Y
    7  1011   974   762 -1223  0.471696  0.937935  yT?j~N/-u]  JhEB[W-}^$  N
    8   984   974   856    74  0.109963  0.367864  _j"&@ i&;/  OYXQ)w{hoH  N
    9  1030  1001  -792  -262  0.435587 -0.647970  Pmrwl{{|.K  3UTqM$86Sg  N
    r   ir   i@B T)r"   r#   r-   r   r   f)r"   r#   r   cr   )r   r   r   d)r"   r#   r,   s)r"   r#   )rA   r   rD   r   r$   )r   r?   zUnhandled index: rD   z[^a-zA-Z0-9]r   r   >   r3   r&   r"   Ng    eAr   rM   )from_mapignorezdask_expr does not)messager   r   zmake-randomF)metar   labelenforce_metadata)4r   rJ   r   floatrC   r^   rI   r   r   	TimestamprA   rD   rE   rH   	Timedeltar+   r   appendr   r?   rG   r   
ValueErrorr"   r#   resubrstriphasattrr   r4   r   r&   rW   r   r   r3   r	   r   r   r   r]   r%   r   dask.dataframer   warningscatch_warningsfilterwarningsUserWarningr   r   )specr   r   r   rA   r?   rE   r   r   
meta_startmeta_endr3   colr"   r   col_ncol_namer   r   kw_namekw_valrG   r   partsr   s                            r<   r   r     s   T 4"caiPTUct<c=QRc-	
 GF $//#45T__223##77ll4??001DMMBLLQUDV4VVU.QR	R=3S!$ebll4.@&@H
	DOO^	4##-1A1AAmmd"Q&qsHI	R=C!G$S1W% $H
,T__,=>??$d^F   9::ZZF		3'VVOSSYYKAHHMFSYY'YY^^FYY''Fszz" 	9AEE"(%118f<	 #)%118f<NN8$"yyF8MM !'s 1 1 31 >>1TSUJCV  j!%q( $'::#3#3#5 928(1WI./9	994 i.1$K|$s)RYY%6%6s3xk%6%RS
&{D9
<A+<NOqiAE"JqM2OEO'
A		 	 	" 
2[	
 doo33VVWU
 %%%1  "
 
	
 
-  P
 
s   (S&SA2SS()F)rP   r%   rQ   r   r   r/   r#   r>   r0   zstr | Callabler2   r1   )   )r.   r%   returnz	list[str])NFN)NN)r   r>   r   r>   )r   r+   r   zdict[str, type | str]r   )r   r   r   r'   )4
__future__r   r   r   r   collections.abcr   dataclassesr   r   r   typingr   r	   numpyr   pandasr   dask.dataframe._compatr
   r   dask.dataframe._pyarrowr   dask.dataframe.io.utilsr   
dask.utilsr   __all__r   r8   r   r   r   r   rR   r%   rb   r   r   r   r   r   rC   objectr   r   r   r   r   _MEr   r   r   r;   r<   <module>r     s   " 	   $ 0 0    ? : 7 ( UDM"GT?#VTN#b!fd^$G C  /3 /3 /3d       1 1 1( % % %." &
  	
  B	<L
*W 
:
Kk{ 
HXXXjzzz$  &'
+ '
T(6 d 	se9	6rE
r;   