
    bi+                     :	   d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
l m!Z" ddl#Z#ddl$Z$ddl%Z$ddl&Z&ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl2m9Z9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ 	 ddlAmBZC  e:j                  eG      ZHdZI edeJe      ZKdeJdeLfdZMdeJdeLfd ZNdeJdeLfd!ZOd"eKdeKfd#ZPd$eJd%eJdeJfd&ZQd'eJdeJfd(ZRd|d)ZS	 d|deJfd*ZTd|d+eeeJeUf      deJfd,ZVd|d-eJd.eeeJeLf      deUfd/ZWd|d0eeJ   fd1ZXd|d2ZYd3 ZZ G d4 d5e#j                  j                        Z\d}d7Z]	 	 	 	 	 	 	 	 d~deJfd8Z^d9 Z_d: Z`d; Zad<ej                  fd=Zcg d>Zdi e8D  ci c]/  } | j                  D ]  }|j                  d?      | j                    1 c}} d@d@iZhe8D  ci c]/  } | j                  D ]  }|j                  d?      | j                    1 c}} Zie8D  ch c]  } | j                   c} Zj ej                  dA      Zlemj                  dB      d@emj                  dC      d@emj                  dD      d@emj                  dE      dFemj                  dG      dHemj                  dI      dJemj                  dK      dLemj                  dM      dNiZodOdPiZp eqdQ  eeoep      D              Zr G dR dSeE      Zsd"eJdeJfdTZtdeeJ   fdUZud|dVeJdWee6   deeJ   fdXZvdY ZwdZ Zxd|dVeJdWee6   fd[Zyd\ Zzd] Z{d^ Z|d|dWee6   deLfd_Z}d|dWee6   de~fd`Zd|dWee6   deLfdaZd|dbZdc Z	 d|dVeJdWee6   deeJeUeJeUeJef   f   f   fddZ	 d|dVeJdWee6   deeJeUeJeUeJef   f   f   fdeZdddfdgeJdWee6   fdhZd|d"eJdWee6   deeJ   fdiZd6ddjdWee6   fdkZd|dWee6   fdlZ G dm dn e e                   Zd"eeJeef   fdoZddfdWee6   fdpZddfdWee6   fdqZd|dWee6   fdrZd|dWee6   fdsZd|dWee6   fdtZd|dWee6   fduZddWee6   fdvZd|dWee6   fdwZ G dx dye@      Z G dz d{e@      Zy# eD$ r  G d deE      ZCY w xY wc c}} w c c}} w c c} w )z
Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
    N)	Generator)BytesIO)chain)PathPurePosixPath)AnyOptionalTypeVarUnion)patch)urlparse)ElementTree)strip_protocol	url_to_fs)can_be_local)EntryNotFoundErrorget_sessioninsecure_hashlib)version   )__version__config)DownloadConfig)COMPRESSION_FILESYSTEMS   )_tqdmlogging)FileLock)ExtractManager)TrackedIterableFromGenerator)ClientErrorc                       e Zd Zy)_AiohttpClientErrorN__name__
__module____qualname__     T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/utils/file_utils.pyr#   r#   5   s    r)   r#   .incompleteTurl_or_filenamereturnc                     t        |       j                  dk7  xr6 t        j                  j	                  t        |       j                  dz          S N z:/r   schemeospathismountr-   s    r*   is_remote_urlr8   @   s?    O$++r1r"''//(SbJcJjJjmqJq:r6rrr)   c                     t        |       j                  dk(  xs5 t        j                  j	                  t        |       j                  dz         S r0   r2   r7   s    r*   is_local_pathr:   D   s=     O$++r1mRWW__XoE^EeEehlEl5mmr)   c                 v    t        |       j                  dk(  xr  t        j                  j	                  |        S )Nr1   )r   r3   r4   r5   isabsr7   s    r*   is_relative_pathr=   K   s-    O$++r1X"''--:X6XXr)   r5   c           	          t         j                  j                  t         j                  j                  t         j                  j	                  t        |                         }t        | t              rt        |      S |S )z'Convert relative path to absolute path.)r4   r5   abspath
expanduser
expandvarsstr
isinstancer   )r5   abs_path_strs     r*   relative_to_absolute_pathrE   O   sO    77??277#5#5bgg6H6HT6S#TUL!+D$!74I\Ir)   	base_name	pathnamesc                     t        |       rt        j                  | gd |D         S t        | g| j	                         S )Nc              3      K   | ]<  }t        |      j                  t        j                  d       j	                  d        > yw/N)rB   replacer4   seplstrip).0pathnames     r*   	<genexpr>z#url_or_path_join.<locals>.<genexpr>W   s2     *t^f3x=+@+@+M+T+TUX+Y*ts   AA)r8   	posixpathjoinr   as_posix)rF   rG   s     r*   url_or_path_joinrU   U   s>    Y~~iu*tjs*tuuI*	*3355r)   url_or_pathc                 ~    t        |       r| d | j                  d       S t        j                  j	                  |       S )NrK   )r8   rindexr4   r5   dirname)rV   s    r*   url_or_path_parentrZ   \   s6    [!4[//455ww{++r)   c                    | j                  d      }t        j                  |      }|j                         }|r<|j                  d      }t        j                  |      }|d|j                         z   z  }| j	                  d      r|dz  }|S )a  
    Convert `url` into a hashed filename in a repeatable way.
    If `etag` is specified, append its hash to the url's, delimited
    by a period.
    If the url ends with .h5 (Keras HDF5 weights) adds '.h5' to the name
    so that TF 2.0 can identify it as a HDF5 file
    (see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
    utf-8.z.py)encoder   sha256	hexdigestendswith)urletag	url_bytesurl_hashfilename
etag_bytes	etag_hashs          r*   hash_url_to_filenameri   c   s     

7#I&&y1H!!#H[[)
$++J7	C)--///
||EEOr)   c                 Z   |t        di |}|j                  xs t        j                  }t	        |t
              rt        |      }t	        | t
              rt        |       } t        |       rt        |       } t        |       rLt        | |      \  } }| j                  d      rt        j                  t        j                  |j                        j!                  |       }	 t        j"                  t        j                  |j                  dt$        t'        |j(                              j+                  |j,                  |j.                  |j0                  |j2                  |j4                  |j6                        }ntE        | ||j4                  |j(                  |jF                  |j                  ||jH                  |jJ                  	      }nKtL        jN                  jQ                  |       r| }n)tS        |       rtC        d	|  d
      tU        d|  d      ||S |jV                  r|jX                  rt[        ||      }t]        | j_                  d      d         }	|r|	dvr| j_                  d      d   ja                  d      satc        |      }|td        v rEtL        jN                  jg                  |      }
d|
v r|
d|
ji                  d       n|
}
| d|
 d| }|S | d| }|S tk        |j                        jm                  ||jn                        }tc        |      S # t        j8                  j:                  t        j8                  j<                  t        j8                  j>                  t        j8                  j@                  f$ r}tC        t        |            |d}~ww xY w)a  
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
        ValueError: if it couldn't parse the url or filename correctly
        requests.exceptions.ConnectionError: in case of internet connection issue
    Ndownload_confighf://)endpointtokendatasets)rn   ro   library_namelibrary_version
user_agent)repo_id	repo_typerevisionrf   force_downloadproxies)	cache_dirrw   rs   use_etagro   storage_optionsdownload_descdisable_tqdmzLocal file z doesn't existzunable to parse z as a URL or as a local path::r   tgztarz.tar.gzz.tar.bz2z.tar.xzr]   ://z://::)ry   )force_extractr(   )8r   ry   r   DOWNLOADED_DATASETS_PATHrC   r   rB   r   r   r8   !_prepare_path_and_storage_options
startswithhuggingface_hubHfFileSystemHF_ENDPOINTro   resolve_pathHfApir   get_datasets_user_agentrs   hf_hub_downloadrt   ru   rv   path_in_reporw   rx   utilsRepositoryNotFoundErrorr   RevisionNotFoundErrorGatedRepoErrorFileNotFoundErrorget_from_cacherz   r|   r}   r4   r5   existsr:   
ValueErrorextract_compressed_fileextract_on_the_fly_get_extraction_protocol_get_path_extensionsplitra   rE   !SINGLE_FILE_COMPRESSION_PROTOCOLSbasenamerX   r   extractr   )r-   rl   download_kwargsry   r{   resolved_pathoutput_patheprotocol	extension
inner_files              r*   cached_pathr   {   sn   , (;?;))LV-L-LI)T"	N	/4(o. O$(9_%+L_,
( %%g.+88++?3H3Hl?+ 7-33#//)//!+$/67Q7QR "/)11+55*33*77#2#A#A+33 "  . )#.==*55(11%++ /-;;,99
K 
	(%		'+o->n MNN +O+<<XYZZ..-- 0_]H+O,A,A$,G,JKI^3'--d3A6??@bc7D@@!#!1!1+!>JILPZIZ,Dj.?.?.D!E`jJ%-Jc*R}"MK #" &.JeK="AK"" %/H/HIQQ'D'D R 
 %[11s  %%==%%88%%;;%%44	 7 (A/Q67s   $BL' 'A)N*N%%N*rs   c                 ,   dt          }|dt        j                   z  }|dt        j                    z  }|dt        j                   z  }t        j
                  r|dt        j                   z  }t        j                  r|dt        j                   z  }t        j                  r|dt        j                   z  }t        | t              r.|ddj                  d	 | j                         D               z  }|S t        | t              r|d| z   z  }|S )
Nz	datasets/z	; python/z; huggingface_hub/z
; pyarrow/z; torch/z; tensorflow/z; jax/z; c              3   0   K   | ]  \  }}| d |   ywrJ   r(   )rO   kvs      r*   rQ   z*get_datasets_user_agent.<locals>.<genexpr>  s     GDAqs!A3ZGs   )r   r   
PY_VERSIONr   PYARROW_VERSIONTORCH_AVAILABLETORCH_VERSIONTF_AVAILABLE
TF_VERSIONJAX_AVAILABLEJAX_VERSIONrC   dictrS   itemsrB   )rs   uas     r*   r   r      s   [M	"BIf''(
))B::;
<<BJv--.
//B
--.//
f//011
v))*++*d#
499GJ4D4D4FGGHII I 
J	$
dZIr)   rb   ro   c                     | j                  t        j                        r&t        j                  j                  |dt              S i S )zHandle the HF authenticationrp   )ro   rq   rr   )r   r   r   r   r   build_hf_headersr   )rb   ro   s     r*   "get_authentication_headers_for_urlr     s@    
~~f(()$$55j+ 6 
 	
 	r)   msgc                     t         j                  r3t        j                  j	                  | d      dt        |       z         y)z\Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_HUB_OFFLINE is True.NzOffline mode is enabled.zOffline mode is enabled. )r   HF_HUB_OFFLINEr   errorsOfflineModeIsEnabledrB   )r   s    r*   !_raise_if_offline_mode_is_enabledr     sJ    $$99*-+&
 	
;VY\]`Ya;a
 	
 r)   c                 f    t        d|         t        | fi |xs i \  }}|j                  |      S )NTried to reach )r   r   info)rb   r{   fsr5   s       r*   fsspec_headr     s7    %u&=>8!6B8HB774=r)   c                  N    t        j                  t        j                  ddi      S )N8HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS1)r   r   r4   environr(   r)   r*   ,stack_multiprocessing_download_progress_barsr      s      ::bjj#]_b"cddr)   c                         e Zd Zd fd	Z xZS )TqdmCallbackc                     t         j                  t        j                  d      k  rt	        |   |g|i | t        | _        y t        j                  |d<   t	        |   |g|i | y )Nz2024.2.0tqdm_cls)r   FSSPEC_VERSIONr   parsesuper__init__r   tqdm)selftqdm_kwargsargskwargs	__class__s       r*   r   zTqdmCallback.__init__'  s]      7==#<<G[:4:6:DJ!&F:G[:4:6:r)   N)r%   r&   r'   r   __classcell__r   s   @r*   r   r   &  s    ; ;r)   r   Fc                 p   t        d|         t        | fi |xs i \  }}t        |xs dddt        j                  j                  d      dk(  r?t        j                         j                  r!t        j                         j                  d   nd |d	      }|j                  ||j                  |
       y )Nr   DownloadingBTr   r   )descunit
unit_scalepositiondisable)r   )callback)r   r   r   r4   r   getmultiprocessingcurrent_process	_identityget_filename)rb   	temp_filer{   r   r}   r   r5   r   s           r*   
fsspec_getr   0  s    %u&=>8!6B8HB)Mzz~~XY]``//1;; (779CCBG #	
H KKinnxK8r)   c	           	         |i }|t         j                  }t        |t              rt	        |      }t        j                  |d       d}	d}
t        | d      }t
        j                  j                  ||      }t
        j                  j                  |      r|s|s|S t        | |      }|||d<   t        | |      }	|r&|	j                  dd      xs |	j                  d	d      nd}
t        | |
      }t
        j                  j                  ||      }t
        j                  j                  |      r|s|S |d
z   }t        |      5  t
        j                  j                  |      r|s|cddd       S |dz   }t        |d      5 }t         j#                  |  d|j$                          t'        | ||||       ddd       t         j#                  d|  d|        t)        j*                  j$                  |       t         j#                  d|        | |
d}|dz   }t        |dd      5 }t-        j.                  ||       ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   |S xY w)a  
    Given a URL, look for the corresponding file in the local cache.
    If it's not there, download it. Then return the path to the cached file.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
    NT)exist_ok)rc   )ro   z
user-agent)r{   ETagrc   z.lockr+   zw+bzB not found in cache or force_download set to True, downloading to )r{   r   r}   zstoring z in cache at zcreating metadata file for )rb   rc   z.jsonwr\   )encoding)r   HF_DATASETS_CACHErC   r   rB   r4   makedirsri   r5   rS   r   r   r   r   r   openloggerr   r   r   shutilmovejsondump)rb   ry   rw   rs   rz   ro   r{   r|   r}   responserc   rf   
cache_pathheaders	lock_pathincomplete_pathr   meta	meta_path	meta_files                       r*   r   r   B  sM   0 ,,	)T"	N	KK	D)HD $Cd3Hi2J	ww~~j!. 1EBG *3@HIQHLL&D(,,vt*DW[D $C.Hi2J	ww~~j!. W$I	)	 '77>>*%n' '
 %}4 /5) 	wYKK3%abkbpbpaqrssI]iuv	w
 	hse==>INNJ/1*>?D)(	)S73 	'yIIdI&	')'. 	w 	w	' 	')'. sC   	#I06I05I<A3I0/I$I0I!	I0$I-	)I00I:c                        fd}|S )Nc                 p    dj                        dz   | j                  | j                  ndz   | _        | S Nr1   z

)rS   __doc__fndocstrs    r*   docstring_decoratorz1add_start_docstrings.<locals>.docstring_decorator  s1    WWV_v-rzz?U[]^
	r)   r(   r  r	  s   ` r*   add_start_docstringsr         r)   c                        fd}|S )Nc                 p    | j                   | j                   nddz   dj                        z   | _         | S r  )r  rS   r  s    r*   r	  z/add_end_docstrings.<locals>.docstring_decorator  s1    $&JJ$:bjjfLrwwW]^
	r)   r(   r
  s   ` r*   add_end_docstringsr    r  r)   c                 &    t        d | D              S )Nc              3   P   K   | ]  }|j                         j                     y wr   )statst_size)rO   r5   s     r*   rQ   z(estimate_dataset_size.<locals>.<genexpr>  s     5ttyy{""5s   $&)sum)pathss    r*   estimate_dataset_sizer    s    5u555r)   fc                     t               }	 | j                  d      }|s	 t        |      S ||z  }|j                  d      r	 t        |      S B)Nr      
)	bytearrayreadra   bytes)r  resbs      r*   readliner    sT    
+C
FF1I : 	q<<: r)   )txtcsvr   jsonltsvconllconlluorigparquetpklpicklerelxmlarrowr]   zipz(?<!:):/504B0304504B0506504B0708425A68bz21F8BgzipFD377A585A00xz04224D18lz428B52FFDzstds   Rar!rarc              #   2   K   | ]  }t        |        y wr   )len)rO   magic_numbers     r*   rQ   rQ     s       s   c                       e Zd Zy)NonStreamableDatasetErrorNr$   r(   r)   r*   r@  r@    s    r)   r@  c                 d    | j                  d      d   }dD ]  }|j                  |      d   } |S )Nr]   r   z?-_r   )r   )r5   r   symbs      r*   r   r     s>    

3#I  -OOD)!,	-r)   c                    	 | j                  d       | j	                  t
              }| j                  d       t        t
              D ]W  }t        j                  |dt
        |z
         }||c S t        j                  |dt
        |z
         }|Jt        d| d       y# t        t        j                  f$ r Y yw xY w)zQread the magic number from a file-like object and return the compression protocolr   NzCompression protocol 'z' not implemented.)seekAttributeErrorioUnsupportedOperationr  MAGIC_NUMBER_MAX_LENGTHrange$MAGIC_NUMBER_TO_COMPRESSION_PROTOCOLr   0MAGIC_NUMBER_TO_UNSUPPORTED_COMPRESSION_PROTOCOLNotImplementedError)r  r>  icompressions       r*   *_get_extraction_protocol_with_magic_numberrO    s    	q	 6612LFF1I*+ `:>>|LiNehiNi?jk"FJJ<XuZqtuZuKvw"%(>{mK]&^__`	 B334 s   B# #B?>B?urlpathrl   c                    t        |       } | j                  d      d   }t        |      }|t        v s|dv s|j	                  d      ry |t
        v r	t
        |   S t        | |      \  } }	 t        j                  | fi |xs i 5 }t        |      cd d d        S # 1 sw Y   y xY w# t        $ r0 | j                  t        j                        rt        | dz         d  w xY w)Nr~   r   r   r   rk   S
If the repo is private or gated, make sure to log in with `huggingface-cli login`.)rB   r   r   BASE_KNOWN_EXTENSIONSra   !COMPRESSION_EXTENSION_TO_PROTOCOLr   fsspecr   rO  r   r   r   r   )rP  rl   r5   r   r{   r  s         r*   r   r     s    'lG==q!D#D)I**&==;<	7	70;;@ZijG_	[[<_%:< 	A=a@	A 	A 	A f001#pp s*   +B& B	B& B#B& #B& &9Cc                     t        |       j                  d      ^} }t        |       r t        j                  j
                  | g| S t        j
                  | g| } dj                  | g|z         S )u#  
    This function extends os.path.join to support the "::" hop separator. It supports both paths and urls.

    A shorthand, particularly useful where you have multiple hops, is to “chain” the URLs with the special separator "::".
    This is used to access files inside a zip file over http for example.

    Let's say you have a zip file at https://host.com/archive.zip, and you want to access the file inside the zip file at /folder1/file.txt.
    Then you can just chain the url this way:

        zip://folder1/file.txt::https://host.com/archive.zip

    The xjoin function allows you to apply the join on the first path of the chain.

    Example::

        >>> xjoin("zip://folder1::https://host.com/archive.zip", "file.txt")
        zip://folder1/file.txt::https://host.com/archive.zip
    r~   )rB   r   r:   r4   r5   rS   rR   )apr  s      r*   xjoinrY  .  sb    & FLLEAQww||A"""NN1!q!yy!q!!r)   c                 >   t        |       j                  d      ^} }t        |       r7t        j                  j                  t        |       j                               } nt        j
                  |       } | j                  d      r| dz  } dj                  | g|z         S )u#  
    This function extends os.path.dirname to support the "::" hop separator. It supports both paths and urls.

    A shorthand, particularly useful where you have multiple hops, is to “chain” the URLs with the special separator "::".
    This is used to access files inside a zip file over http for example.

    Let's say you have a zip file at https://host.com/archive.zip, and you want to access the file inside the zip file at /folder1/file.txt.
    Then you can just chain the url this way:

        zip://folder1/file.txt::https://host.com/archive.zip

    The xdirname function allows you to apply the dirname on the first path of the chain.

    Example::

        >>> xdirname("zip://folder1/file.txt::https://host.com/archive.zip")
        zip://folder1::https://host.com/archive.zip
    r~   ://)rB   r   r:   r4   r5   rY   r   rT   rR   ra   rS   rW  r  s     r*   xdirnamer^  I  s|    & FLLEAQGGOODG,,./a  	zz#	T	99aS1Wr)   c                    t        |       j                  d      ^}}t        |      rt        j                  j                  |      S t        | |      \  } }| j                  d      ^}}t        | fi |^}}|j                  |      S )a  Extend `os.path.exists` function to support both local and remote files.

    Args:
        urlpath (`str`): URL path.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `bool`
    r~   rk   )_as_strr   r:   r4   r5   r   r   r   )rP  rl   main_hop	rest_hopsr{   r   _s          r*   xexistsrd  h  s     #7+11$7HyXww~~h''#DW^m#n &}}T2976o6Qyy""r)   c                     t        |       j                  d      ^} }t        |       r6t        j                  j                  t        |       j                               S t        j
                  |       S )u  
    This function extends os.path.basename to support the "::" hop separator. It supports both paths and urls.

    A shorthand, particularly useful where you have multiple hops, is to “chain” the URLs with the special separator "::".
    This is used to access files inside a zip file over http for example.

    Let's say you have a zip file at https://host.com/archive.zip, and you want to access the file inside the zip file at /folder1/file.txt.
    Then you can just chain the url this way:

        zip://folder1/file.txt::https://host.com/archive.zip

    The xbasename function allows you to apply the basename on the first path of the chain.

    Example::

        >>> xbasename("zip://folder1/file.txt::https://host.com/archive.zip")
        file.txt
    r~   )	rB   r   r:   r4   r5   r   r   rT   rR   r]  s     r*   	xbasenamerf  }  sU    & FLLEAQwwQ 0 0 233!!!$$r)   c                 F   t        |       j                  d      ^} }t        |       r6t        j                  j                  t        |       j                               S t        j                  |       \  } }dj                  | j                  d      r| dz   n| g|z         |fS )u,  
    This function extends os.path.split to support the "::" hop separator. It supports both paths and urls.

    A shorthand, particularly useful where you have multiple hops, is to “chain” the URLs with the special separator "::".
    This is used to access files inside a zip file over http for example.

    Let's say you have a zip file at https://host.com/archive.zip, and you want to access the file inside the zip file at /folder1/file.txt.
    Then you can just chain the url this way:

        zip://folder1/file.txt::https://host.com/archive.zip

    The xsplit function allows you to apply the xsplit on the first path of the chain.

    Example::

        >>> xsplit("zip://folder1/file.txt::https://host.com/archive.zip")
        ('zip://folder1::https://host.com/archive.zip', 'file.txt')
    r~   r[  r\  )
rB   r   r:   r4   r5   r   rT   rR   rS   ra   )rW  r  tails      r*   xsplitri    s    & FLLEAQww}}T!W--/00//!$4yyajjo!d(1=ABDHHr)   c                    t        |       j                  d      ^} }t        |       r6t        j                  j                  t        |       j                               S t        j
                  |       \  } }dj                  | g|z         |fS )u8  
    This function extends os.path.splitext to support the "::" hop separator. It supports both paths and urls.

    A shorthand, particularly useful where you have multiple hops, is to “chain” the URLs with the special separator "::".
    This is used to access files inside a zip file over http for example.

    Let's say you have a zip file at https://host.com/archive.zip, and you want to access the file inside the zip file at /folder1/file.txt.
    Then you can just chain the url this way:

        zip://folder1/file.txt::https://host.com/archive.zip

    The xsplitext function allows you to apply the splitext on the first path of the chain.

    Example::

        >>> xsplitext("zip://folder1/file.txt::https://host.com/archive.zip")
        ('zip://folder1/file::https://host.com/archive.zip', '.txt')
    r~   )
rB   r   r:   r4   r5   splitextr   rT   rR   rS   )rW  r  exts      r*   	xsplitextrm    sr    & FLLEAQwwQ 0 0 233##A&3yy!q!3&&r)   c                    t        |       j                  d      ^}}t        |      rt        j                  j                  |       S t        | |      \  } }| j                  d      ^}}t        | fi |^}}|j                  |      S )zExtend `os.path.isfile` function to support remote files.

    Args:
        path (`str`): URL path.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `bool`
    r~   rk   )rB   r   r:   r4   r5   isfiler   r   )r5   rl   ra  rb  r{   r   rc  s          r*   xisfilerp    s}     t9??40HyXww~~d## A$Xg ho#zz$/943?3Qyy""r)   c                    t        |       j                  d      ^}}t        |      rt        j                  j                  |       S t        | |      \  } }| j                  d      ^}}t        | fi |x^}}^}}	 |j                  |      }|1t        | |      5 }t        |j                               }ddd       |S |S # t        $ r t        d|        w xY w# 1 sw Y   |S xY w)zExtend `os.path.getsize` function to support remote files.

    Args:
        path (`str`): URL path.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `int`: optional
    r~   rk   zNo such file: N)rB   r   r:   r4   r5   getsizer   r   sizer   r   xopenr=  r  )	r5   rl   ra  rb  r{   r   rc  rs  r  s	            r*   xgetsizeru    s     t9??40HyXwwt$$ A$Xg ho#zz$/9#D<O<<Qa	=778$D <t_= %1668}%t " 	=#nTF$;<<	=%s   =C CCC(c                 f   t        |       j                  d      ^}}t        |      rt        j                  j                  |       S t        | |      \  } }| j                  d      ^}}t        | fi |x^}}^}}|j                  d      d   }|j                  d      sy|j                  |      S )zExtend `os.path.isdir` function to support remote files.

    Args:
        path (`str`): URL path.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `bool`
    r~   rk   r   r   rK   T)	rB   r   r:   r4   r5   isdirr   r   strip)r5   rl   ra  rb  r{   r   rc  
inner_paths           r*   xisdirrz    s     t9??40HyXww}}T"" A$Xg ho#zz$/9#D<O<<Qa^^E*2.
$xx
##r)   c                 z   t        |       j                  d      ^}}t        |      rB|r!t        j                  j                  ||      S t        j                  j                  |      S |r2t        j
                  |t        |      j                  d      d         S t        j                  j                  |      S )zExtend `os.path.relpath` function to support remote files.

    Args:
        path (`str`): URL path.
        start (`str`): Start URL directory path.

    Returns:
        `str`
    r~   )startr   )rB   r   r:   r4   r5   relpathrR   )r5   r|  ra  rb  s       r*   xrelpathr~    s     t9??40HyX9>rwwxu5]BGGOOT\D]]OTy  U1A1A$1G1JKsZ\ZaZaZiZijrZssr)   c                     | j                   t        j                  fd}	 || _         | S # t        $ r+ | t	        j
                         } || _         fd| _        Y | S w xY w)Nc                     d }t        ddz         D ]  }	  | i |} |S  t        d      |# t        t        j                  t        j
                  j                  t        j
                  j                  f$ r\}|}t        j                  dt        j                   d| d d       t        j                  t        j                         Y d }~d }~ww xY w)Nr   z4Got disconnected from remote data host. Retrying in zsec [rK   ]zServer Disconnected)rI  r#   asyncioTimeoutErrorrequests
exceptionsConnectionErrorTimeoutr   warningr   STREAMING_READ_RETRY_INTERVALtimesleep)r   r   disconnect_errretryouterrmax_retriesr  s         r*   read_with_retriesz?_add_retries_to_file_obj_read_method.<locals>.read_with_retries)  s    1kAo. 	MEAD+F+ 
#	M  ""78nL $$$##33##++	 
A "%J6KoKoJppuv{u||}  J  ~K  KL  M 

6??@@
As   /AC:ACCc                     t        |      S r   )getattr)rc  attrorig_file_objs     r*   <lambda>z6_add_retries_to_file_obj_read_method.<locals>.<lambda>D  s    w}d/K r)   )r  r   STREAMING_READ_MAX_RETRIESrE  rF  	RawIOBase__getattr__)file_objr  r  r  r  s     @@@r*   $_add_retries_to_file_obj_read_methodr  %  sg    ==D33K*L) O  L <<>)KOLs   0 0A$#A$c                     g }i }| j                  d      D ]4  }t        ||      \  }}|j                  |       |j                  |       6 dj	                  |      fS )Nr~   rk   )r   ,_prepare_single_hop_path_and_storage_optionsappendupdaterS   )rP  rl   prepared_urlpathprepared_storage_optionshopr{   s         r*   r   r   H  sm     !}}T" 9KCapq_$ ''89 99%&77r)   c                 8   |dn|j                   }| j                  t        j                        r7d| v r3d| t	        t        j                        dz   d j                  ddd      z   } d| v r| j                  d      d   nd}|,||j                  v r|j                  |   j                         }nV|R||j                  vrD|j                  j                         D ci c]  \  }}|t        j                         vr||  }}}ni }|d	v r|j                  d
i       }ddi||d
<   d| v rrt               j                  | d      }|j                  j                         D ]1  \  }	}
|	j                  d      s| d|
z   z  } |j                  }d|i|}3 d| vr| dz  } | j                  d      ra|j                  di       }ddi||d<   nE|dk(  r@|t        j                  d|}t        j                   t#        j$                  d      k  rd|d<   |r||i}| |fS c c}}w )aT  
    Prepare the URL and the kwargs that must be passed to the HttpFileSystem or HfFileSystem

    In particular it resolves google drive URLs
    It also adds the authentication headers for the Hugging Face Hub, for both https:// and hf:// paths.

    Storage options are formatted in the form {protocol: storage_options_for_protocol}
    Nz	/resolve/rm   r   @r   r   file>   httphttpsclient_kwargs	trust_envTzdrive.google.com
   )timeoutdownload_warningz	&confirm=cookieszconfirm=z
&confirm=tz"https://raw.githubusercontent.com/r   zAccept-Encodingidentityhf)ro   rn   z0.21.0default
block_size)ro   r   r   r   r=  rL   r   r{   copyr   rU  available_protocolspopr   headr  HF_HUB_VERSIONr   r   )rP  rl   ro   r   r{   option_nameoption_valuer  r   r   r   r  r   s                r*   r  r  T  sd    $+D1F1FE&,,-+2HGC(:(:$;a$?$ABJJ;X[]^__*/7*:w}}U#A&H"x?3R3R'R)99(CHHJ		$9X9X)X .=-L-L-R-R-T
)\&"<"<">> %
 
 $$'++OR@,7+O+O(("}))'2)>H ((..0 N1<< 23{Q.G&..G'0'&M_&MO	N (<'BC%)))R8G*;Z)S7)SOI&	T	**
 
   7==#::,5OL)#_5O##G
s   #Hrk   r  c                    t        |       }|j                  d      ^}}t        |      r"|j                  dd       t	        ||g|i |S t        ||      \  } }i ||xs i }	 t        j                  | g|d|i|j	                         }	t        |	      }	|	S # t        $ r }
t        |
      dk(  rt        d      |
 d}
~
wt        $ r0 | j                  t        j                        rt        | dz         d w xY w)	a  Extend `open` function to support remote files using `fsspec`.

    It also has a retry mechanism in case connection fails.
    The `args` and `kwargs` are passed to `fsspec.open`, except `token` which is used for queries to private repos on huggingface.co

    Args:
        file (`str`): Path name of the file to be opened.
        mode (`str`, *optional*, default "r"): Mode in which the file is opened.
        *args: Arguments to be passed to `fsspec.open`.
        download_config : mainly use token or storage_options to support different platforms and auth types.
        **kwargs: Keyword arguments to be passed to `fsspec.open`.

    Returns:
        file object
    r~   r  Nrk   modezCannot seek streaming HTTP filezStreaming is not possible for this dataset because data host server doesn't support HTTP range requests. You can still load this dataset in non-streaming mode by passing `streaming=False` (default)rR  )r`  r   r:   r  r   r   rU  r   rB   r@  r   r   r   r   r  )r  r  rl   r   r   file_strra  rb  r{   r  r   s              r*   rt  rt    s#   " t}H#>>$/HyX

<&Hd4T4V44=hXghD/22?0b2F;;t@@$@@EEG  4H=HO!  q666+y 
  ??6--.#mm s   ')B 	C=&C<C=c                    t        |       j                  d      ^}}t        |      rt        j                  |       S t        | |      \  } }| j                  d      ^}}t        | fi |^}}|j                  d      d   }|j                  d      r|j                  |      st        d|        |j	                  |d      }|D  cg c]0  } t        j                  j                  | j                  d            2 c} S c c} w )	zExtend `os.listdir` function to support remote files.

    Args:
        path (`str`): URL path.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `list` of `str`
    r~   rk   r   r   rK   zDirectory doesn't exist: F)detail)r`  r   r:   r4   listdirr   r   rx  rw  r   r5   r   rstrip)	r5   rl   ra  rb  r{   r   rc  ry  r  s	            r*   xlistdirr    s     #4=..t4HyXzz$ !B$Xg ho#zz$/943?3Q^^E*2.
C *)=#&?v$FGG

:e
4?DEt  S!12EEEs   	5D)	recursiverl   c                   t        |       j                  d      ^}}t        |      rt        j                  ||      S t	        | |      \  } }| j                  d      ^}}t        | fi |^}}|j                  d      d   }|j                  |      }	t        |j                  t              r|j                  n|j                  d   }
|	D cg c]  }dj                  |
 d| g|z          c}S c c}w )a  Extend `glob.glob` function to support remote files.

    Args:
        urlpath (`str`): URL path with shell-style wildcard patterns.
        recursive (`bool`, default `False`): Whether to match the "**" pattern recursively to zero or more
            directories or subdirectories.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `list` of `str`
    r~   )r  rk   r   r   r   )
r`  r   r:   globr   r   rC   r   rB   rS   )rP  r  rl   ra  rb  r{   r   rc  ry  globbed_pathsr   globbed_paths               r*   xglobr    s     #7+11$7HyXyyY77 $EW^m#n &}}T2976o6Q^^E*1-

+",R[[#">2;;BKKPRO\ijL		hZs<.9:YFGjjjs   !C2c              +   N  K   t        |       j                  d      ^}}t        |      rt        j                  |fi |E d{    yt        | |      \  } }| j                  d      ^}}t        | fi |^}}|j                  d      d   }|j                  d      r|j                  |      sg S t        |j                  t              r|j                  n|j                  d   }	 |j                  |fi |D ]%  \  }
}}dj                  |	 d|
 g|z         ||f ' y7 ߭w)au  Extend `os.walk` function to support remote files.

    Args:
        urlpath (`str`): URL root path.
        download_config : mainly use token or storage_options to support different platforms and auth types.
        **kwargs: Additional keyword arguments forwarded to the underlying filesystem.


    Yields:
        `tuple`: 3-tuple (dirpath, dirnames, filenames).
    r~   Nrk   r   r   rK   )r`  r   r:   r4   walkr   r   rx  rw  rC   r   rB   rS   )rP  rl   r   ra  rb  r{   r   rc  ry  r   dirpathdirnames	filenamess                r*   xwalkr    s%     #7+11$7HyX778.v... $EW^m#n &}}T2976o6Q^^E*2.
C *)=I",R[[#">2;;BKKPRO,3BGGJ,I&,I 	Z(GXy))zWI67)CDhPYYY	Z 	/s   AD%D#C D%c                        e Zd ZdZ fdZddee   fdZddee   fdZd Z	e
dd       Ze
defd	       Ze
defd
       Ze
defd       Zd Zdeedf   dd fdZdedd fdZ fdZ xZS )xPathzHExtension of `pathlib.Path` to support both local paths and remote URLs.c                     t         |          }|j                  d      ^}}t        |      r|S |j	                  dd      }t
        j                  d|      }||j                  d      rdz  }|S dz  }|S )Nr~   \rK   r   r[  r\  r1   )r   __str__r   r:   rL   #SINGLE_SLASH_AFTER_PROTOCOL_PATTERNsubra   )r   path_strra  rb  path_as_posixr   s        r*   r  zxPath.__str__  s    7?$'~~d39"O ((s3;??}U!7!7!<D CEDr)   rl   c                 .    t        t        |       |      S )zExtend `pathlib.Path.exists` method to support both local and remote files.

        Args:
            download_config : mainly use token or storage_options to support different platforms and auth types.

        Returns:
            `bool`
        rk   )rd  rB   )r   rl   s     r*   r   zxPath.exists  s     s4y/BBr)   c              #   (  K   | j                         }|j                  d      ^}}t        |      r#t        |      j	                  |      E d{    y|rD|d   }t        ||      \  }}|j                  d      d   |i}dj                  ||g|dd       }nd}t        t        ||      fi |xs i ^}}	|j	                  t        ||            }
|
D ]7  } t        |       dj                  |j                   d| g|z                9 y7 w)a]  Glob function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Args:
            pattern (`str`): Pattern that resulting paths must match.
            download_config : mainly use token or storage_options to support different platforms and auth types.

        Yields:
            [`xPath`]
        r~   Nr   rk   r   r   )rT   r   r:   r   r  r   rS   r   rY  typer   )r   patternrl   
posix_pathra  rb  rP  r{   r   rc  r  r  s               r*   r  z
xPath.glob"  s      ]]_
)//59"H~**7333 #A,+LWfu+v(#*==#7#:O"L!YY''JIabM'JK
"&uZ9Uo>SQSUFBGGE(G$<=M - ] d4jr{{m3|n,M+NQZ+Z![\\] 4s   ADDCDc                 .     | j                   d|z   fi |S )zRglob function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Args:
            pattern (`str`): Pattern that resulting paths must match.

        Yields:
            [`xPath`]
        z**/)r  )r   r  r   s      r*   rglobzxPath.rglob>  s     tyy3F33r)   r.   c                 R     t        |       t        | j                                     S )zName function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Returns:
            [`xPath`]
        )r  r^  rT   r   s    r*   parentzxPath.parentI  s      tDz(4==?344r)   c                 l    t        | j                         j                  d      d         j                  S )zName function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Returns:
            `str`
        r~   r   )r   rT   r   r   r  s    r*   r   z
xPath.nameR  +     T]]_2248;<AAAr)   c                 l    t        | j                         j                  d      d         j                  S )zStem function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Returns:
            `str`
        r~   r   )r   rT   r   stemr  s    r*   r  z
xPath.stem[  r  r)   c                 l    t        | j                         j                  d      d         j                  S )zSuffix function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

        Returns:
            `str`
        r~   r   )r   rT   r   suffixr  s    r*   r  zxPath.suffixd  s+     T]]_2248;<CCCr)   c                 2    t        t        |       g|i |S )a  Extend :func:`xopen` to support argument of type :obj:`~pathlib.Path`.

        Args:
            **args: Arguments passed to :func:`fsspec.open`.
            **kwargs: Keyword arguments passed to :func:`fsspec.open`.

        Returns:
            `io.FileIO`: File-like object.
        )rt  rB   )r   r   r   s      r*   r   z
xPath.openm  s     SY0000r)   rX  .c                 T     t        |       t        | j                         g|       S )zExtend :func:`xjoin` to support argument of type :obj:`~pathlib.Path`.

        Args:
            *p (`tuple` of `str`): Other path components.

        Returns:
            [`xPath`]
        )r  rY  rT   r   rX  s     r*   joinpathzxPath.joinpathy  s%     tDz%4!455r)   c                 $    | j                  |      S r   )r  r  s     r*   __truediv__zxPath.__truediv__  s    }}Qr)   c           	      R   t        |       j                  d      ^}}t        |      r' t        |       t        t        |   |                  S  t        |       dj                   t        |       t        |      j                  |            j                         g|z               S )Nr~   )	rB   r   r:   r  r   with_suffixrS   r   rT   )r   r  ra  rb  r   s       r*   r  zxPath.with_suffix  s    "4yt49"4:c%'"5f"=>??tDz$))ZT$Zh0G0S0STZ0[%\%e%e%g$hkt$tuvvr)   r   )r.   r  )r%   r&   r'   r  r  r	   r   r   r  r  propertyr  rB   r   r  r  r   tupler  r  r  r   r   s   @r*   r  r  
  s    R	Ch~&> 	C]Xn-E ]8	4 5 5 Bc B B Bc B B D D D
1	65c? 	6w 	6 S  W  w wr)   r  c                 r    t        | t              rt        |       S t        t        t        |                   S r   )rC   r  rB   )r5   s    r*   r`  r`    s(    "4/3t9JSs4y9I5JJr)   c                    dd l }t        | d      r |j                  | g|i |S t        |       }  |j                  t	        | d|      g|i |S Nr   r  rbrk   )r4  hasattrr   rB   rt  )filepath_or_bufferrl   r   r   r4  s        r*   
xgzip_openr    s_    !6*tyy+=d=f== !34tyy14Yk\`kdjkkr)   c                    dd l }t        | d      r |j                  | g|i |S t        |       }  |j                  t	        | d|      g|i |S r  )numpyr  loadrB   rt  )r  rl   r   r   nps        r*   xnumpy_loadr    s_    !6*rww);D;F;; !34rwwu/WiZ^ibhiir)   c                     dd l }t        | d      r |j                  | fi |S t        |       } |j	                  dd      dk(  rt        | |      |d<    |j                  t        | d|      fi |S )Nr   r  rN  inferrk   r  )pandasr  read_csvrB   r   r   rt  r  rl   r   pds       r*   xpandas_read_csvr    sz    !6*r{{-888 !34::mW-8$<=Oap$qF=!r{{5!3T?[f_effr)   c           
         dd l }t        | d      r	  |j                  | fi |S t        |       } 	  |j                  t        | d|      fi |S # t        $ r-  |j                  t	        | j                               fi |cY S w xY w# t        $ r9  |j                  t	        t        | d|      j                               fi |cY S w xY wr  )r  r  
read_excelr   r   r  rB   rt  r   s       r*   xpandas_read_excelr    s    !6*	O 2==!3>v>> !!34	 2=='94Q`!alekll  	O 2==);)@)@)B!CNvNN	O  	 2==0$X]]_`dj 	s"   A B	 3BB	?C
Cc                     dd l m} t        | d      r |j                  | fi |S t	        |       }  |j                  t        | d|      fi |S )Nr   r  r  )r  rl   )pyarrow.parquetr'  r  
read_tablerB   rt  )r  rl   r   pqs       r*   xpyarrow_parquet_read_tabler
    sU     !6*r}}/:6:: !34r}}U#5DRabmflmmr)   c                     dd l m} t        | d      r |j                  | fi |S  |j                  t	        | d|      fi |S r  )scipy.iorF  r  loadmatrt  )r  rl   r   sios       r*   xsio_loadmatr    sH    !6*s{{-888s{{5!3T?[f_effr)   c                     t        | d      rt        j                  | |      S t        | d|      5 }t        j                  ||      cddd       S # 1 sw Y   yxY w)a  Extend `xml.etree.ElementTree.parse` function to support remote files.

    Args:
        source: File path or file object.
        parser (`XMLParser`, *optional*, default `XMLParser`): Parser instance.
        download_config : mainly use token or storage_options to support different platforms and auth types.

    Returns:
        `xml.etree.ElementTree.Element`: Root element of the given source document.
    r  )parserr  rk   N)r  ETr   rt  )sourcer  rl   r  s       r*   	xet_parser    sP     vvxxv..64A 	.Q88Af-	. 	. 	.s   AAc                 
   t        | d      r*t        j                  j                  j                  | fi |S t        | d|      5 }t        j                  j                  j                  |fi |cddd       S # 1 sw Y   yxY w)a  Extend `xml.dom.minidom.parse` function to support remote files.

    Args:
        filename_or_file (`str` or file): File path or file object.
        download_config : mainly use token or storage_options to support different platforms and auth types.
        **kwargs (optional): Additional keyword arguments passed to `xml.dom.minidom.parse`.

    Returns:
        :obj:`xml.dom.minidom.Document`: Parsed document.
    r  r  rk   N)r  r+  domminidomr   rt  )filename_or_filerl   r   r  s       r*   xxml_dom_minidom_parser    sm     (ww$$%5@@@#T?K 	6q77??((5f5	6 	6 	6s   *A99Bc                       e Zd ZdZed        Zed        Zedee	ddf   fd       Z
e	 ddedee   dee	ddf   fd	       Zedd
       Zeddee   dd fd       Zy)ArchiveIterablezIAn iterable of (path, fileobj) from a TAR archive, used by `iter_archive`c              #   $  K   t        j                  | d      }|D ]o  }|j                  }|j                         s |#t        j
                  j                  |      j                  d      rR|j                  |      }||f g |_	        q ~y w)Nzr|*)fileobjr  r]   __)
tarfiler   r   isregr4   r5   r   r   extractfilemembers)r  streamtarinfo	file_pathr  s        r*   	_iter_tarzArchiveIterable._iter_tar  s     ae4 	 GI==? ww	*55kB))'2HX%%FN	  s   BBc              #   ,  K   t        j                  |       }|j                         D ]h  }|j                  }|j	                         r |#t
        j                  j                  |      j                  d      rR|j                  |      }||f j y w)Nr  )
zipfileZipFileinfolistrf   is_dirr4   r5   r   r   r   )r  zipfmemberr&  r  s        r*   	_iter_zipzArchiveIterable._iter_zip  s     q!mmo 
	&FI}} ww	*55kByy(HX%%
	&s   BBr.   Nc              #      K   t        |      }|dk(  r| j                  |      E d {    y | j                  |      E d {    y 7 7 w)Nr-  )rO  r/  r'  )clsr  rN  s      r*   _iter_from_fileobjz"ArchiveIterable._iter_from_fileobj  sE     @C%}}Q'''}}Q''' ('s!   %AAAA	A	ArP  rl   c              #      K   t        ||      }t        |d|d      5 }|dk(  r| j                  |      E d {    n| j                  |      E d {    d d d        y 7 '7 # 1 sw Y   y xY ww)Nrk   r  r   )rl   r  r-  )r   rt  r/  r'  )r1  rP  rl   rN  r  s        r*   _iter_from_urlpathz"ArchiveIterable._iter_from_urlpath&  st      /wX 7D/aP 	,TUe#==+++==+++		, 	,++		, 	,s>   A1A%A!A%A#A%	A1!A%#A%%A.*A1c                 (     | | j                   |      S r   )r2  )r1  r  s     r*   from_bufzArchiveIterable.from_buf3  s    3))733r)   c                 *     | | j                   ||      S r   )r4  )r1  urlpath_or_bufrl   s      r*   from_urlpathzArchiveIterable.from_urlpath7  s    3))>?KKr)   r   )r.   r  )r%   r&   r'   r  staticmethodr'  r/  classmethodr   r  r2  rB   r	   r   r4  r6  r9  r(   r)   r*   r  r    s    S   & & (itT0A&B ( ( GK
,
,,4^,D
,	5$$	%
, 
, 4 4 L8N;S L_p L Lr)   r  c                   v    e Zd ZdZe	 ddeeee   f   dee	   de
eddf   fd       Zeddee	   dd fd       Zy)	FilesIterablez8An iterable of paths from a list of directories or filesNurlpathsrl   r.   c           
   #     K   t        |t              s|g}|D ]  }t        ||      r| t        ||      rt	        ||      D ]  \  }}}t        |D cg c]  }|j                  d      r| c}      |d d  t        |      j                  d      rPt        |      D ]"  }|j                  d      rt        ||       $  t        |       y c c}w w)Nrk   r  )
rC   listrp  rz  r  sortedr   rf  rY  r   )	r1  r>  rl   rP  r  r  r  rY   rf   s	            r*   _iter_from_urlpathsz!FilesIterable._iter_from_urlpaths?  s      (D) zH 	1Gw@A49'Sb4c 
70GXy"()qgQXQcQcdoQp')q"rHQK )44[A $*9$5 7#..{;$#GX66	7
7 (00!	1 *rs   ACC.C2A*Cc                 *     | | j                   ||      S r   )rB  )r1  r>  rl   s      r*   from_urlpathszFilesIterable.from_urlpathsW  s    3**HoFFr)   r   )r%   r&   r'   r  r;  r   rB   r@  r	   r   r   rB  rD  r(   r)   r*   r=  r=  <  s{    BZ^1S$s)^,1?G?W1	3d?	#1 1. Gh~6N GZi G Gr)   r=  r   )NNF)NFNTNNNF)r)NN)r  r  r  rF  r   r   r4   rR   rer   r   r  xml.dom.minidomr+  r)  collections.abcr   r   	itertoolsr   pathlibr   r   typingr   r	   r
   r   unittest.mockr   urllib.parser   	xml.etreer   r  rU  r   huggingface_hub.errorsr  fsspec.corer   r   fsspec.utilsr   huggingface_hub.utilsr   r   r   	packagingr   r1   r   r   download.download_configr   filesystemsr   r   r   	_filelockr   r   r   trackr    aiohttp.client_exceptionsr!   r#   ImportError	Exception
get_loggerr%   r   INCOMPLETE_SUFFIXrB   r,   boolr8   r:   r=   rE   rU   rZ   ri   r   r   r   r   r   r   r   	callbacksr   r   r   r  r  r  r  r  rS  
extensionsrN   r   rT  -SINGLE_FILE_COMPRESSION_EXTENSION_TO_PROTOCOLr   compiler  r  fromhexrJ  rK  maxrH  r@  r   rO  r   rY  r^  rd  rf  ri  rm  rp  intru  rz  r~  r  r  r   r  rt  r@  r  r  r  r  r  r`  r  r  r  r  r
  r  r  r  r  r=  )fs_classr   s   00r*   <module>rf     s     	   	  	      %   ' 0 0  ! '     1 % S S  " 5 1   # /L 
		H	%! Cds3 s4 sn3 n4 nYc Yd YJA J! J6 6 6 6,C ,C ,4 x2 		x2vsDy1A(B c $C sDyAQ8R ^b 
8C= 
e;6##00 ;9( 
T 	Tn6
 
"  	% 0!,,  	x000	% 
5	% ! ,1((1 	 S8,,,11 -
 H_$_8X%6%6$_ !&0bjj&= # 
MM*u	MM*u	MM*u	MM(U	MM&6	MM.!4	MM*u	MM*v	( $ U4 0  BDtu  		 	c c `Xc] `$c H^<T `hil`m 4"6>#S #8N+C #*%4I6'6#8N#; #t #(H^$<  8$(>": $d $.t" H ?C	8	8#+N#;	8
3S$sCx.())*	8 ?C5$5$#+N#;5$
3S$sCx.())*5$p,RV , ,x7O ,^F3 F.)A FTRUY F2 !&SW k8P k4ZH^$< Z8AwDL AwHK%T5() K W[ l8N;S l X\ jH^<T j	g(>:R 	gH^<T $nXnE] ngh~6N g.H^4L .$6h~>V 6$>L2 >LBG0 GW(  i 	~1
 %`s$   Q5 84R:4R6R5R	R	