
    bi3                        d dl mZ d dlZd dlmZ d dlmZ d dlZd dl	Z
d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ er	  e       ZddZddZddZd ZddZddZ y)    )annotationsN)Lock)TYPE_CHECKING)BlockwiseDepDict	blockwise)meta_lib_from_arraytolist)pyarrow_strings_enabled)HighLevelGraph)tokenize)ensure_dictis_series_likec           	          j                   dkD  rt        d j                        |)ddlm} t        ||      st        d      |j                  }|t               j                         }t         j                  dd      | t         j                  j                        }nt        j                  |      rt        d      t         fd	|D              sRt!        t#        |      j%                   j                  j                              }t        d
 j                   d|        j                  j&                  }|D cg c]  }||v r||   d   nd }}nP j                   dk(  rt        j                  |      s||j)                  g | j                  |      S t+        |      dk(  r3|j-                  t        j.                  g  j                        ||      S t        d      t        j0                   j                  d         rt        d      |4 j                   dk(  r!t        t3         j                  d               ndg}nCt+        |       j                  d   k7  r(t        dt+        |       d j                  d    d       j                  gt+        |      z  }t5        ||      D 	
ci c]  \  }	}
|	t        j.                  g |
       }}	}
|j-                  |||      S c c}w c c}
}	w )z8Create empty DataFrame or Series which has correct dtype   zCfrom_array does not input more than 2D array, got array with shape Nr   )Indexz3'index' must be an instance of dask.dataframe.Indexnamesz+For a struct dtype, columns must be a list.c              3  N   K   | ]  }|j                   j                  v   y wN)dtyper   ).0ixs     O/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/io/io.py	<genexpr>z#_meta_from_array.<locals>.<genexpr>1   s     9AQ!''--'9s   "%zdtype z doesn't have fields f8   )namer   index)r   )columnsr   z?For a 1d array, columns must be a scalar or single element listz Shape along axis 1 must be knownz:Number of column names must match width of the array. Got z names for z columns)ndim
ValueErrorshapedask.dataframer   
isinstance_metar   	DataFramegetattrr   listr   npisscalarallsortedset
differencefields_constructor_slicedlen_constructorarrayisnanrangezip)r   r   r   metar   extrar/   ndtypescdtdatas   `           r   _meta_from_arrayr>      s    	vvz&'gg0
 	

 (%'RSS|"1%//1qww&2?177==)G[[!JKK9993w<22177==ABEvaggY.CE7KLLAHIA!v+&)A,47II	
1;;w7?++u ,   \Q$$177+WE %   M
 	
 88AGGAJ?@@?121d5,-1#G\QWWQZ'7|nK
|8E  ''S\)58&5IJ'1bArxx"%%JDJT7%@@5 J2 Ks   K9 "K>c                \    t        |t              rt        j                  | } || fd|i|S )a  Create a Dask partition for either a DataFrame or Series.

    Designed to be used with :func:`dask.blockwise.blockwise`. ``data`` is the array
    from which the partition will be created. ``index`` can be:

    1. ``None``, in which case each partition has an independent RangeIndex
    2. a `tuple` with two elements, the start and stop values for a RangeIndex for
       this partition, which gives a continuously varying RangeIndex over the
       whole Dask DataFrame
    3. an instance of a ``pandas.Index`` or a subclass thereof

    The ``kwargs`` _must_ contain an ``initializer`` key which is set by calling
    ``type(meta)``.
    r   )r$   tuplepd
RangeIndex)r=   r   initializerkwargss       r   _partition_from_arrayrE   R   s1     %u%t353F33    c           	        t        | |||      }dt        | |      z   }| g}| j                  | j                  dk(  rdndg}| j                  | j                  i}||j
                  | j                  d   k7  r4dj                  |j
                  | j                  d         }t        |      |j                  }	|j                  |       |j                  |j                  dg       |j
                  f||j                  <   nt        j                  t        | j                              r dgt!        | j"                  d         d	z   z  }	nt        | j"                  d         }
dg}	d}i }t%        | j"                  d         D ]0  \  }}||z  }|	|   |f||f<   ||
k(  r|d	z  }|	j                  |       2 |j                  t'        |
      dg       t)        |      r$| j*                  |j                  t-        |      d}n|j.                  t-        |      d}t1        t2        |dg||dd|}t5        j6                  |||      }ddlm} ddlm}m } ddl!m"} ddl#m$} tK        t!        |	      d	z
        D cg c]  }||f }} | |tM        |      |      ||	| ||            }tO               r | ||jP                              S |S c c}w )au  Create a Dask DataFrame from a Dask Array.

    Converts a 2d array into a DataFrame and a 1d array into a Series.

    Parameters
    ----------
    x : da.Array
    columns : list or string
        list of column names if DataFrame, single string if Series
    index : dask.dataframe.Index, optional
        An optional *dask* Index to use for the output Series or DataFrame.

        The default output index depends on whether `x` has any unknown
        chunks. If there are any unknown chunks, the output has ``None``
        for all the divisions (one per chunk). If all the chunks are known,
        a default index with known divisions is created.

        Specifying `index` can be useful if you're conforming a Dask Array
        to an existing dask Series or DataFrame, and you would like the
        indices to match.
    meta : object, optional
        An optional `meta` parameter can be passed for dask
        to specify the concrete dataframe type to be returned.
        By default, pandas DataFrame is used.

    Examples
    --------
    >>> import dask.array as da
    >>> import dask.dataframe as dd
    >>> x = da.ones((4, 2), chunks=(2, 2))
    >>> df = dd.io.from_dask_array(x, columns=['a', 'b'])
    >>> df.compute()
         a    b
    0  1.0  1.0
    1  1.0  1.0
    2  1.0  1.0
    3  1.0  1.0

    See Also
    --------
    dask.bag.to_dataframe: from dask.bag
    )r7   zfrom-dask-array-r   ijr   Nr   z@The index and array have different numbers of blocks. ({} != {})r   )mapping)r   r   rC   )r   rC   T)	numblocksconcatenate)dependencies)optimize)
from_graphnew_collection)ArrowStringConversion)	key_split))r>   r   r   r    rJ   npartitionsformatr!   	divisionsappendextend_namer)   r4   sumr"   r1   chunks	enumerater   r   r   typer   r   rE   r   from_collectionsdask.array.optimizationrM   $dask.dataframe.dask_expr._collectionrN   rO   dask.dataframe.dask_expr._exprrP   
dask.utilsrQ   r5   r   r
   expr)r   r   r   r7   r   graph_dependenciesarrays_and_indicesrJ   msgrT   
n_elementsstopindex_mappingr   	incrementrD   blkgraphrM   rN   rO   rP   rQ   keysresults                            r   from_dask_arrayrm   f   s   V AwD9DG 44D&&!&&A+$3?%I A.#VE$5$5q{{1~F  S/!OO	!!%(!!5;;"45"'"3"3!5	%++	#agg,	 Fc!((1+.23	 !%
C	%ahhqk2 	#LAyID#,Q<"6M1$ z!	T"	# 	!!#3M#JC"PQd77DIId4jQ!\\$t*E
	 
		
 	 	C ++D#DVWE0OD$$S^a%789!T1I9D9U#T*$F  3FKK@AAM :s   9Kc                     y)zA dummy function to link results together in a graph

    We use this to enforce an artificial sequential ordering on tasks that
    don't explicitly pass around a shared resource
    N )tokenrl   s     r   _linkrq      s     rF   c                    t        | t        j                        r|dk(  r(t        t	        t
        | j                  |                  S |dk(  rP|r<t        | j                  d      | j                        D cg c]  \  }}d|i| c}}S | j                  d      S y t        | t        j                        rQ|dk(  r&|rt        | j                               S t        |       S |dk(  r | j                         j                  d      S y y c c}}w )Nr@   dictrecordsr   )orient)r$   rA   r&   r(   mapr@   
itertuplesr6   to_dictr   Seriesitemsto_frame)dfr   rS   valuesidxs        r   
_df_to_bagr      s    "bll#WE2==#7899v (+2::i+@"(('K# c,V, 
 zzz33  
B			"W',4
#:$r(:v;;=((	(::  
#s   7D
c           	     n   |du du k(  rt        d      t        |       } t        t        |             }t	        |      t	        |       k  }d}|r6|D cg c]  }t        j                  | |       }}|xr t	        |      |k\  }ndx}}dd}|rt	        |       |z  t	        |       |z  d}fd}	| d   g}
dg} |	d      }d}d}|r|t	        |
      z
  nd}|t	        |       k  r| |   }|rQ|t        j                  || |         }|r%t	        |      |z
  }||kD  r|||z
  z  }||   }| |   }t        ||         }n|}||
d   k  r5|r-|dz  }|t	        |      k  rt        ||         n
t	        |       }nt|dz  }nn|r |||d   z
   |	t	        |
      dz
        z
  z   }|r|dz  }|t        d |	t	        |
            |z
        z   }|
j                  |       |j                  |       d}|t	        |       k  r|
j                  | d          |j                  t	        |              |
|fS c c}w )	ai  Find division locations and values in sorted list

    Examples
    --------

    >>> L = ['A', 'B', 'C', 'D', 'E', 'F']
    >>> sorted_division_locations(L, chunksize=2)
    (['A', 'C', 'E', 'F'], [0, 2, 4, 6])

    >>> sorted_division_locations(L, chunksize=3)
    (['A', 'D', 'F'], [0, 3, 6])

    >>> L = ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'C']
    >>> sorted_division_locations(L, chunksize=3)
    (['A', 'B', 'C', 'C'], [0, 4, 7, 8])

    >>> sorted_division_locations(L, chunksize=2)
    (['A', 'B', 'C', 'C'], [0, 4, 7, 8])

    >>> sorted_division_locations(['A'], chunksize=2)
    (['A', 'A'], [0, 1])
    Nz;Exactly one of npartitions and chunksize must be specified.Fr   Tc                &    t        | k        z   S r   )int)ind	chunksizeresiduals    r   
chunksizesz-sorted_division_locations.<locals>.chunksizes2  s    3sX~...rF   r   )
r!   r	   r,   r-   r1   bisectbisect_leftr   maxrU   )seqrR   r   
seq_unique
duplicatesenforce_exactr   offsetssubtract_driftr   rT   	locationsr   r   driftdivs_remaindivoffs_remainposr   s     `                @r   sorted_division_locationsr      s   . 	td!23VWW +CC!JZ3s8+JM7AB!6%%c1-BB#CG(C##*
 HNH+	s8k)/ QII1A
CE2?+I.TK
c#h, !f {((SV< "'lS0,;44CAa&Cgcl#CC)B- q),s7|);C%SQ
 #	""5C	NUVDV9W!WXq c!ZI7%?@@AS!S!CW c#h,^ SWSXi] Cs   H2)NNN)NN)Fr@   )!
__future__r   r   	threadingr   typingr   numpyr)   pandasrA   dask.blockwiser   r   dask.dataframe.dispatchr   r	   dask.dataframe.utilsr
   dask.highlevelgraphr   dask.tokenizer   r`   r   r   lockr>   rE   rm   rq   r   r   ro   rF   r   <module>r      s[    "       6 ? 8 . " 2 v7At4({|;&q rF   