
    bi                        d dl mZ d dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d	gZ G d
 dee      Zg dZd Zd Zd ZddZeeeeeeeededeiZ  G d d      Z!	 	 	 	 	 	 ddZ"y)    )annotationsN)Task)new_collection)ArrowStringConversion)BlockwiseIOPartitionsFiltered)pyarrow_strings_enabled)Key
timeseriesc            
          e Zd ZdZg dZdddeeedddd	i d	d
d	Zej                  d        Z
d Zed        Zej                  d        Zej                  d        ZddZy	)
TimeseriesT)
startenddtypesfreqpartition_freqseedkwargscolumns_partitions_series
2000-01-01
2000-01-31stringnameidxy1s1dNF)	r   r   r   r   r   r   r   r   r   c                    | j                  ddd      j                  d d }| j                  r||j                  d      S |S )N2000r   )_make_timeseries_partilocr   r   )selfresults     \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/dask_expr/datasets.py_metazTimeseries._meta.   sC    ++FFA>CCBQG<<&..+,,    c                n    t        j                  | j                  | j                  | j                        S )N)r   r   r   )pd
date_ranger   r   r   r&   s    r(   
_divisionszTimeseries._divisions5   s#    }}4::488$BUBUVVr*   c                t    | j                  d      }| j                  d      D ci c]  }|||   
 c}S c c}w )Nr   r   )operand)r&   r   cols      r(   _dtypeszTimeseries._dtypes8   s6    h',0LL,CDSVC[ DDDs   5c                   t        | j                               dz
  }t        t        | j                  d            d      }t        j
                  j                  | j                        }||z  }|j                  |dz        }t        t	        j                  |t        j                        j                  |f            }t        |      |k(  sJ |S )N   r      )dtype)lenr/   maxr1   nprandomRandomStater   byteslist
frombufferuint32reshape)r&   npartitionsndtypesrandom_statenrandom_datals          r(   rD   zTimeseries.random_state=   s    $//+,q0c$,,x0115yy,,TYY7'!"((Q/{"))<DDaTJK1v{{r*   c                    t        | j                  d      t        | j                  j	                               | j
                  | j                        S )Nr   )MakeTimeseriesPartr1   r>   r3   keysr   r   r.   s    r(   r$   z Timeseries._make_timeseries_partH   s>    !LL"""$%IIKK	
 	
r*   c           	     F   | j                         }t        t        | j                  d            d      }t	        || j
                  ||   ||dz      | j                  ||z           }| j                  r.t	        |t        j                  || j                  d      d         S |S )Nr   r5   r   r   )
r/   r9   r8   r1   r   r$   rD   r   operatorgetitem)r&   r   indexfull_divisionsrC   tasks         r(   _filtered_taskzTimeseries._filtered_taskQ   s    *c$,,x0115&&5!519%ego.
 <<h..dll96Ma6PQQr*   )r   r
   rN   intreturnr   )__name__
__module____qualname___absorb_projections_parametersrR   float	_defaults	functoolscached_propertyr)   r/   propertyr3   rD   r$   rQ    r*   r(   r   r      s    K #3UG
I  W E E   
 
r*   r   )AliceBobCharlieDanEdithFrankGeorgeHannahIngridJerryKevinLauraMichaelNorbertOliverPatriciaQuinnRaySarahTimUrsulaVictorWendyXavierYvonneZeldac                0    |j                  t        |       S Nsize)choicenamesrE   rstates     r(   make_stringr   ~   s    ==Q=''r*   c                    t         j                  j                  |j                  dt	        t
              |       t
              S )Nr   r{   )r,   Categorical
from_codesrandintr8   r~   r   s     r(   make_categoricalr      s,    >>$$V^^As5z^%JERRr*   c                0    |j                  |       dz  dz
  S )N   r5   )randr   s     r(   
make_floatr      s    ;;q>A!!r*   c                (    |j                  ||       S rz   )poisson)rE   r   lams      r(   make_intr      s    >>#A>&&r*   r   categoryc                      e Zd Zd Zd Zy)rI   c                <    || _         || _        || _        || _        y )N)r   r   r   r   )r&   r   r   r   r   s        r(   __init__zMakeTimeseriesPart.__init__   s    	r*   c           	     j   | j                   }| j                  }| j                  }| j                  }t        j
                  j                  |      }t        j                  |||d      }	i }
|j                         D ]v  \  }}|j                         D ci c]3  \  }}|j                  dd      d   |k(  r|j                  dd      d   |5 }}}t        |   t        |	      |fi |}||v sr||
|<   x t        j                  |
|	|      }|j                  d   |k(  r|j                  d d }|S c c}}w )N	timestamp)r   r   r   r   _r5   r   )rN   r   )r   r   r   r   r:   r;   r<   r,   r-   itemsrsplitmaker8   	DataFramerN   r%   )r&   r   r   
state_datar   r   r   r   staterN   datakdtkkvkwsr'   dfs                     r(   __call__zMakeTimeseriesPart.__call__   s*   ,,yy		%%j1EsKP\\^ 	!EAr $\\^B99S!$Q'1, 		#q!!$a'C  "Xc%j%737FG| Q	! \\$eW=88B<3"B	s   8D/N)rT   rU   rV   r   r   r^   r*   r(   rI   rI      s    r*   rI   c                   |dt         t        t        d}|t        j                  j	                  d      }t        | ||||||t        |j                                     }t               rt        t        |            S t        |      S )aX  Create timeseries dataframe with random data

    Parameters
    ----------
    start: datetime (or datetime-like string)
        Start of time series
    end: datetime (or datetime-like string)
        End of time series
    dtypes: dict (optional)
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq: string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq: string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed: int (optional)
        Randomstate seed
    kwargs:
        Keywords to pass down to individual column creation functions.
        Keywords should be prefixed by the column name and then an underscore.

    Examples
    --------
    >>> from dask.dataframe.dask_expr.datasets import timeseries
    >>> df = timeseries(
    ...     start='2000', end='2010',
    ...     dtypes={'value': float, 'name': str, 'id': int},
    ...     freq='2h', partition_freq='1D', seed=1
    ... )
    >>> df.head()  # doctest: +SKIP
                           id      name     value
    2000-01-01 00:00:00   969     Jerry -0.309014
    2000-01-01 02:00:00  1010       Ray -0.760675
    2000-01-01 04:00:00  1016  Patricia -0.063261
    2000-01-01 06:00:00   960   Charlie  0.788245
    2000-01-01 08:00:00  1031     Kevin  0.466002
    r   r   g    eA)r   )rR   rY   r:   r;   r   r   r>   rJ   r	   r   r   )r   r   r   r   r   r   r   exprs           r(   r   r      s    \ ~"#EF|yy  %V[[]#	D  3D9::$r*   )i  )r   r   r    1DNN)#
__future__r   r[   rL   numpyr:   pandasr,   dask._task_specr   $dask.dataframe.dask_expr._collectionr   dask.dataframe.dask_expr._exprr   dask.dataframe.dask_expr.ior   r   dask.dataframe.utilsr	   dask.typingr
   __all__r   r~   r   r   r   r   rY   rR   strobjectr   rI   r   r^   r*   r(   <module>r      s    "       ? @ G 8 .J#[ JZ	<(S"'
 
:
Kk  F 		@ r*   