
    bim              	          d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZ  ed
      Zddeddd
dddf	dZddZd Zd Zy)    )annotationsN)partial)
open_files)concat)from_delayed)
read_bytes)delayed)parse_bytessystem_encodingT)pureinferstrictFc
                   ||t        d      t        |t              rt        |      }||dv r|}
d}nd}
t	        | fd||||
d|xs i }|C|D cg c]6  } t        t               t        t        t        |	|            |            8 }}nBg }t        dt        |      |      D ]T  }||||z    } t        t               t        t              t        t        |	|      |            }|j                  |       V nt        | f||j                         nd	|d
||	d|xs i }|d   }t        |      D cg c]  } t        t               ||||       }}|	rut        t        t#        |d         D cg c]  \  }}|gt        ||         z   c}}            }t%        ||      D cg c]  \  }} t        t&              ||       }}}|st        d|       |rt)        |      }|S c c}w c c}w c c}}w c c}}w )a\	  Read lines from text files

    Parameters
    ----------
    urlpath : string or list
        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
        to read from alternative filesystems. To read from multiple files you
        can pass a globstring or a list of paths, with the caveat that they
        must all have the same protocol.
    blocksize: None, int, or str
        Size (in bytes) to cut up larger files.  Streams by default.
        Can be ``None`` for streaming, an integer number of bytes, or a string
        like "128MiB"
    compression: string
        Compression format like 'gzip' or 'xz'.  Defaults to 'infer'
    encoding: string
    errors: string
    linedelimiter: string or None
    collection: bool, optional
        Return dask.bag if True, or list of delayed values if false
    storage_options: dict
        Extra options that make sense to a particular storage connection, e.g.
        host, port, username, password, etc.
    files_per_partition: None or int
        If set, group input files into partitions of the requested size,
        instead of one partition per file. Mutually exclusive with blocksize.
    include_path: bool
        Whether or not to include the path in the bag.
        If true, elements are tuples of (line, path).
        Default is False.

    Examples
    --------
    >>> b = read_text('myfiles.1.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt.gz')  # doctest: +SKIP
    >>> b = read_text('s3://bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('s3://key:secret@bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('hdfs://namenode.example.com/myfiles.*.txt')  # doctest: +SKIP

    Parallelize a large file by providing the number of uncompressed bytes to
    load into each partition.

    >>> b = read_text('largefile.txt', blocksize='10MB')  # doctest: +SKIP

    Get file paths of the bag by setting include_path=True

    >>> b = read_text('myfiles.*.txt', include_path=True) # doctest: +SKIP
    >>> b.take(1) # doctest: +SKIP
    (('first line of the first file', '/home/dask/myfiles.0.txt'),)

    Returns
    -------
    dask.bag.Bag or list
        dask.bag.Bag if collection is True or list of Delayed lists otherwise.

    See Also
    --------
    from_sequence: Build bag from Python sequence
    Nz7Only one of blocksize or files_per_partition can be setN 
z
r   rt)modeencodingerrorscompressionnewline)	delimiterr      
F)r   	blocksizesampler   include_path      zNo files found)
ValueError
isinstancestrr
   r   r	   listr   file_to_blocksrangelenr   mapappendr   encodedecode	enumeratezipattach_pathr   )urlpathr   r   r   r   linedelimiter
collectionstorage_optionsfiles_per_partitionr   r   filesfilblocksstartblock_filesblock_lineso
raw_blocksbipathpathsentrys                           H/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/bag/text.py	read_textrB      sy   P !4!@RSS)S!	*	::#G MG
#
 $"
 & !  GVF  Fq#e*.AB +#EU5H-HJ-gfo GCLV# k*+ 
0=0Im**,u#%
 $"
 qT
 J'
 GFOAx?
 
 9QqT?S4Z]!33STE FIQVEW6AeT$$UD1F  )733f%MgB
 Ts   ;G2=G7;G<2 Hc              #  @   K   5 }|]|j                         }|sg cd d d        S |j                  |      } fd|d d D cg c]  }||z   	 c}|dd  z   D        E d {    n|D ]  } r|j                  fn|  d d d        y c c}w 7 .# 1 sw Y   y xY ww)Nc              3  B   K   | ]  }r|j                   fn|  y wN)r>   ).0liner   	lazy_files     rA   	<genexpr>z!file_to_blocks.<locals>.<genexpr>   s)       +7y~~&D@s   )readsplitr>   )r   rH   r   ftextpartsrG   s   ``     rA   r%   r%      s     	 Ga 668D	G G
 JJy)E:?*E$TI-Ebc
R  
  G0<tY^^,$FGG G FG GsD   BB
BBBB!B" B	BBBBc              #  &   K   | D ]  }||f 
 y wrE    )blockr>   ps      rA   r.   r.      s      $is   c                   | j                  ||      }|dv r"t        j                  ||      }t        |      S |sg S |j	                  |      }|d d D cg c]  }||z   	 c}|j                  |      s	|dd  z   }|S g z   }|S c c}w )Nr   )r   rJ   )r+   ioStringIOr$   rL   endswith)	rR   r   r   line_delimiterrN   linesrO   touts	            rA   r+   r+      s    <<&)D77D.9E{I

>*+0":6aq>!6"mmN;E"#J
 
 BD
 
 7s   BrE   )
__future__r   rU   	functoolsr   fsspec.corer   tlzr   dask.bag.corer   
dask.bytesr   dask.delayedr	   
dask.utilsr
   r   rB   r%   r.   r+   rQ       rA   <module>re      s^    " 	  "  & !   3
t

 PfG"
rd   