
    biuR                       d dl mZ d dlZd dlZd dlZd dlmZ	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ 	 	 	 	 	 	 	 dd	Z	 	 	 	 	 	 	 	 	 dd
Zd ZddZddZ	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 ddZeddd       Zy)    )annotationsN)compute)methods)PANDAS_GE_300)pyarrow_strings_enabled)delayedtokenize)parse_bytesc
           	     	   ddl }t        |t              s#t        dt        t	        |            z   dz         |t        d      t        |t        |j                  |j                  j                  j                  f      s t        dt        t	        |            z         |dkD  s|t        d      ||t        d      |r|rt        d	      |	i n|	}	 |j                  |fi |	}t        |t              r|j                  |      n%|j                  |j                  |j                        }|j                  |
d
<   |dkD  r| j                  |      }t        j                  ||fi |
}t        |      dk(  rt!        j"                  |d      S t%               rddlm}  ||      }|j+                  dd      j-                         |z  }||j.                  dd }|,||j                  j1                  |j                  j2                  j5                  |      |j                  j2                  j7                  |            j9                  | j;                               }t        j                  ||      }|j.                  d   \  }}|j<                  d   }n$|\  }}t        j>                  |      j@                  }||j                  j1                  |j                  j2                  jC                  |            j9                  | j;                               }t        j                  ||      d   d   }tE        tG        |z  tI        |      z              xs d}|jJ                  dk(  rMtM        jN                  t        jP                  ||d||z
  jS                         |z  z              }||d<   ||d<   nS|jJ                  dv r+tU        jV                  |||dz   |      jO                         }nt        djY                  |            g }|dd |dd }}t[        t]        ||            D ]z  \  }\  }}|t        |      dz
  k(  r||k  n||k  }| j_                  |j                  ja                  ||k\  |            }|jc                   te        tf              |||fd|	i|
       | |ji                          t!        jj                  |||      S )ar	  
    Read SQL query into a DataFrame.

    If neither ``divisions`` or ``npartitions`` is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    sql : SQLAlchemy Selectable
        SQL query to be executed. TextClause is not supported
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override ``npartitions`` and ``bytes_per_chunk``. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if ``divisions`` is not given. Will split the values
        of the index column linearly between ``limits``, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with ``npartitions``;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    bytes_per_chunk : str or int
        If both ``divisions`` and ``npartitions`` is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, and memory per row
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_table : Read SQL database table into a DataFrame.
    r   Nz'con' must be of type str, not 8Note: Dask does not support SQLAlchemy connectables herez)Must specify index column to partition onz2'index_col' must be of type str or sa.Column, not z'Must provide 'meta' if 'head_rows' is 0z=Must provide 'divisions' or 'npartitions' if 'head_rows' is 0z9Must supply either 'divisions' or 'npartitions', not both	index_col   )npartitions)to_pyarrow_stringT)deepindexmax_1count_1Mz%is)startendfreq)iuf)dtypezwProvided index column is of type "{}".  If divisions is not provided the index column type must be numeric or datetime.engine_kwargs)6
sqlalchemy
isinstancestr	TypeErrortype
ValueErrorColumnsqlelementsColumnClausecreate_enginenamelimitpdread_sqllenddfrom_pandasr   dask.dataframe._pyarrowr   memory_usagesumilocselectfuncmaxminselect_fromsubquerydtypesSeriesr   countintroundr
   kindr   tolist
date_rangetotal_secondsnplinspaceformat	enumeratezipwhereand_appendr   _read_sql_chunkdisposefrom_delayed)r&   conr   	divisionsr   limitsbytes_per_chunk	head_rowsmetar   kwargssaenginer   qheadr   bytes_per_rowminmaxmaximinir   r=   partslowersuppersr   lowerupperconds                                 P/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/dataframe/io/sql.pyread_sql_queryre      si   H c3-$s)nHI
 	

 DEEi#ryy"&&//2N2N!OP@3tICWW
 	
 q=<FGG!4O  [STT'/B]MRc3]3F i% 			)YYy~~y~~6 
  **F;1}IIi {{1f//t9>>>$A66"$A %T*D**D*AFFH9T<99Ra=D>&(>k#,,.)  [[F+FQJD$MM'*EJD$IIf%++Ebffkk//67CCCLLNSAKK6*95a8EE%-/+o2NNOPUTU  ::4$;"="="?+"MNI  IaL IbMZZ?*D$auMTTVIAAG 
 Es^Yqr]FF&s66':; 
>E5!"c&kAo!5u~55=IIbffkk%5.$78$GO$3,9=C	

 NN??5$	22    c                ,   ddl }ddl m} d|v r+t        j                  dt               |j                  d      } d|v r+t        j                  dt               |j                  d      }t        | t              s t        dt        t        |             z         |C|D ]>  }t        ||j                  t        f      r t        d	t        t        |            z          t        |t              s#t        d
t        t        |            z   dz         |i n|} |j                  |fi |}|j                         }t        | t              r|j                  | |||	      } n t        dt        t        |             z         |j                          |rm|D cg c]a  }t        |t              r)|j                  || j                  |   j                        n%|j                  |j                   |j                        c c}n=| j                  D cg c](  }|j                  |j                   |j                        * c}}t        |t              r)|j                  || j                  |   j                        n%|j                  |j                   |j                        }|j                   |D cg c]  }|j                    c}vr|j#                  |        |j$                  | j'                  |       }t)        d|||||||||
|d
|S c c}w c c}w c c}w )a  
    Read SQL database table into a DataFrame.

    If neither ``divisions`` or ``npartitions`` is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    table_name : str
        Name of SQL table in database.
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.
    columns : sequence of str or SqlAlchemy column or None
        Which columns to select; if None, gets all. Note can be a mix of str and SqlAlchemy columns
    schema : str or None
        Pass this to sqlalchemy to select which DB schema to use within the
        URI connection
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override ``npartitions`` and ``bytes_per_chunk``. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if ``divisions`` is not given. Will split the values
        of the index column linearly between ``limits``, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with ``npartitions``;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    bytes_per_chunk : str or int
        If both ``divisions`` and ``npartitions`` is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, and memory per row
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_query : Read SQL query into a DataFrame.

    Examples
    --------
    >>> df = dd.read_sql_table('accounts', 'sqlite:///path/to/bank.db',
    ...                  npartitions=10, index_col='id')  # doctest: +SKIP
    r   N)r&   tablezWThe `table` keyword has been replaced by `table_name`. Please use `table_name` instead.urizGThe `uri` keyword has been replaced by `con`. Please use `con` instead.z&`table_name` must be of type str, not z8`columns` must be of type List[str], and cannot contain z`con` must be of type str, not r   )autoload_withschema)
r&   rO   r   rP   r   rQ   rR   rS   rT   r    )r   r&   warningswarnDeprecationWarningpopr    r!   r"   r#   r%   r)   MetaDataTablerM   columnsr*   rK   r5   r9   re   )
table_namerO   r   rP   r   rQ   rs   rR   rS   rk   rT   r   rU   rV   r&   colrW   mcr   querys                        rd   read_sql_tablery      s   ` &e	
 ZZ(
U	
 jjj#&4s4
;K7LL
 	
  	CcBIIs#34N$s)n% 	 c3-$s)nHI
 	
 (/B]MRc3]3F
A*c"XXj!6&XQ
4s4
;K7LL
 	
 NN  	
  a% 		!Z//2778YYqvvqvv./	
 2<1C1CDAbii'D  i% 			)Z//	:??@YYy~~y~~6 
 zz'2Q!&&22uCJJ ,,Z8E '#  -	
 E 3s   A&L-L+Lc                Z    t        | t              rt        | ||fi |S t        | ||fi |S )a  
    Read SQL query or database table into a DataFrame.

    This function is a convenience wrapper around ``read_sql_table`` and
    ``read_sql_query``. It will delegate to the specific function depending
    on the provided input. A SQL query will be routed to ``read_sql_query``,
    while a database table name will be routed to ``read_sql_table``.
    Note that the delegated function might have more specific notes about
    their functionality not listed here.

    Parameters
    ----------
    sql : str or SQLAlchemy Selectable
        Name of SQL table in database or SQL query to be executed. TextClause is not supported
    con : str
        Full sqlalchemy URI for the database connection
    index_col : str
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        ``npartitions`` or ``bytes_per_chunk``; otherwise must supply explicit
        ``divisions``.

    Returns
    -------
    dask.dataframe

    See Also
    --------
    read_sql_table : Read SQL database table into a DataFrame.
    read_sql_query : Read SQL query into a DataFrame.
    )r    r!   ry   re   )r&   rO   r   rU   s       rd   r-   r-   d  s7    B #sc3	<V<<c3	<V<<rf   c                h   dd l }|xs i } |j                  |fi |}t        j                  | |fi |}|j	                          t        |      dk(  r|S t        |j                  j                               dk(  r|S t        ri nddi} |j                  |j                  j                         fi |S )Nr   copyF)
r   r)   r,   r-   rM   r.   r;   to_dictr   astype)rX   ri   rT   r   rU   rV   rW   dfs           rd   rL   rL     s    !'RMRc3]3F	Q	)&	)B
NN
2w!|	T[[  "	#q	( 	$65/ryy,,.9&99rf   c                    dd l }|xs i } |j                  |fi |} | j                  dd|i|}|j                          |S )Nr   rO   rl   )r   r)   to_sqlrM   )dri   r   rU   rV   rW   rX   s          rd   _to_sql_chunkr     sM    !'RMRc3]3F&V&v&A
NNHrf   c                <   t        |t              st        dt        |       d      t	        ||||||||||	
      } t        t              | j                  fi |}t	        |d      }|r@| j                         D cg c]&  }t        t        |fd|i|ddt        |fi |z  i( }}nQg }|}| j                         D ]:  }|j                  t        t        |fd|i|ddt        |fi |z  i       |d	   }< t        |      }|
rt        |       y
|S c c}w )a  Store Dask Dataframe to a SQL table

    An empty table is created based on the "meta" DataFrame (and conforming to the caller's "if_exists" preference), and
    then each block calls pd.DataFrame.to_sql (with `if_exists="append"`).

    Databases supported by SQLAlchemy [1]_ are supported. Tables can be
    newly created, appended to, or overwritten.

    Parameters
    ----------
    name : str
        Name of SQL table.
    uri : string
        Full sqlalchemy URI for the database connection
    schema : str, optional
        Specify the schema (if database flavor supports this). If None, use
        default schema.
    if_exists : {'fail', 'replace', 'append'}, default 'fail'
        How to behave if the table already exists.

        * fail: Raise a ValueError.
        * replace: Drop the table before inserting new values.
        * append: Insert new values to the existing table.

    index : bool, default True
        Write DataFrame index as a column. Uses `index_label` as the column
        name in the table.
    index_label : str or sequence, default None
        Column label for index column(s). If None is given (default) and
        `index` is True, then the index names are used.
        A sequence should be given if the DataFrame uses MultiIndex.
    chunksize : int, optional
        Specify the number of rows in each batch to be written at a time.
        By default, all rows will be written at once.
    dtype : dict or scalar, optional
        Specifying the datatype for columns. If a dictionary is used, the
        keys should be the column names and the values should be the
        SQLAlchemy types or strings for the sqlite3 legacy mode. If a
        scalar is provided, it will be applied to all columns.
    method : {None, 'multi', callable}, optional
        Controls the SQL insertion clause used:

        * None : Uses standard SQL ``INSERT`` clause (one per row).
        * 'multi': Pass multiple values in a single ``INSERT`` clause.
        * callable with signature ``(pd_table, conn, keys, data_iter)``.

        Details and a sample callable implementation can be found in the
        section :ref:`insert method <io.sql.method>`.
    compute : bool, default True
        When true, call dask.compute and perform the load into SQL; otherwise, return a Dask object (or array of
        per-block objects when parallel=True)
    parallel : bool, default False
        When true, have each block append itself to the DB table concurrently. This can result in DB rows being in a
        different order than the source DataFrame's corresponding rows. When false, load each block into the SQL DB in
        sequence.
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy

    Raises
    ------
    ValueError
        When the table already exists and `if_exists` is 'fail' (the
        default).

    See Also
    --------
    read_sql : Read a DataFrame from a table.

    Notes
    -----
    Timezone aware datetime columns will be written as
    ``Timestamp with timezone`` type with SQLAlchemy if supported by the
    database. Otherwise, the datetimes will be stored as timezone unaware
    timestamps local to the original timezone.

    .. versionadded:: 0.24.0

    References
    ----------
    .. [1] https://docs.sqlalchemy.org
    .. [2] https://www.python.org/dev/peps/pep-0249/

    Examples
    --------
    Create a table from scratch with 4 rows.

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> df = pd.DataFrame([ {'i':i, 's':str(i)*2 } for i in range(4) ])
    >>> ddf = dd.from_pandas(df, npartitions=2)
    >>> ddf  # doctest: +SKIP
    Dask DataFrame Structure:
                       i       s
    npartitions=2
    0              int64  object
    2                ...     ...
    3                ...     ...
    Dask Name: from_pandas, 2 tasks

    >>> from dask.utils import tmpfile
    >>> from sqlalchemy import create_engine, text
    >>> with tmpfile() as f:
    ...     db = 'sqlite:///%s' %f
    ...     ddf.to_sql('test', db)
    ...     engine = create_engine(db, echo=False)
    ...     with engine.connect() as conn:
    ...         result = conn.execute(text("SELECT * FROM test")).fetchall()
    >>> result
    [(0, 0, '00'), (1, 1, '11'), (2, 2, '22'), (3, 3, '33')]
    z!Expected URI to be a string, got .)
r*   ri   r   rk   	if_existsr   index_label	chunksizer   methodrK   )r   extrasdask_key_namez	to_sql-%sr   N)r    r!   r$   r#   dictr   r   _meta
to_delayed_extra_depsr	   rK   dask_compute)r   r*   ri   rk   r   r   r   r   r   r   r   parallelr   rU   	meta_taskworker_kwargsr   resultlasts                      rd   r   r     sq   z c3<T#YKqIJJ #F '&rxx:6:I 84M ]]_	
   !  	
 *HQ,H-,HH	
 	
  
	AMM!   $	
 #.0Lm0L"L ":D
	 V_FV=	
s   ?+D)r   c                    | |i |S Nrl   )r6   r   argsrU   s       rd   r   r   _  s       rf   )NNN256 MiB   NN)	NNNNr   r   NNNr   )
NfailTNNNNTFN)r*   r!   ri   r!   r   r!   r   bool)
__future__r   rm   numpyrD   pandasr,   dask.dataframe	dataframer/   	dask.baser   r   r   dask.dataframe._compatr   dask.dataframe.utilsr   dask.delayedr   r	   
dask.utilsr
   re   ry   r-   rL   r   r   r   rl   rf   rd   <module>r      s    "     - " 0 8 * " 	l3f 	aH$=N:$	  
s
s 
s
 s sl 	$( ! 	!rf   