
    bi                         d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZ ddlmZ  ee      Z G d	 d
e j&                        Zddee   defdZdee   defdZddededefdZd Zy)    N)Optional)insecure_hashlib   )config) ExpectedMoreDownloadedFilesErrorExpectedMoreSplitsErrorNonMatchingChecksumErrorNonMatchingSplitsSizesErrorUnexpectedDownloadedFileErrorUnexpectedSplitsError   )
get_loggerc                       e Zd ZdZdZdZdZy)VerificationModea  `Enum` that specifies which verification checks to run.

    The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
    when generating/downloading a dataset for the first time.

    The verification modes:

    |                           | Verification checks                                                           |
    |---------------------------|------------------------------------------------------------------------------ |
    | `ALL_CHECKS`              | Split checks, uniqueness of the keys yielded in case of the GeneratorBuilder  |
    |                           | and the validity (number of files, checksums, etc.) of downloaded files       |
    | `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
    | `NO_CHECKS`               | None                                                                          |

    
all_checksbasic_checks	no_checksN)__name__
__module____qualname____doc__
ALL_CHECKSBASIC_CHECKS	NO_CHECKS     T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/utils/info_utils.pyr   r      s      J!LIr   r   expected_checksumsrecorded_checksumsc                 &   | t         j                  d       y t        t        |       t        |      z
        dkD  r)t	        t        t        |       t        |      z
              t        t        |      t        |       z
        dkD  r)t        t        t        |      t        |       z
              | D cg c]  }| |   ||   k7  s| }}|d|z   nd}t        |      dkD  rt        d| d| d      t         j                  d|z          y c c}w )	NzUnable to verify checksums.r   z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r	   )r   r   verification_nameurlbad_urlsfor_verification_names         r   verify_checksumsr+   ,   s!   !12
3!"S);%<<=A.s37I3JSQcMd3d/eff
3!"S);%<<=A+C4F0G#N`Ja0a,bcc1h5G5LPbcfPg5ghHh;L;XG&77^`
8}q&$%:$;3j gg
 	

 KK8;PPQ is   5DDexpected_splitsrecorded_splitsc                 J   | t         j                  d       y t        t        |       t        |      z
        dkD  r)t	        t        t        |       t        |      z
              t        t        |      t        |       z
        dkD  r)t        t        t        |      t        |       z
              | D cg c],  }| |   j                  ||   j                  k7  r| |   ||   d. }}t        |      dkD  rt        t        |            t         j                  d       y c c}w )NzUnable to verify splits sizes.r   )expectedrecordedz$All the splits matched successfully.)	r"   r#   r$   r%   r   r&   r   num_examplesr
   )r,   r-   name
bad_splitss       r   verify_splitsr4   ?   s	   45
3#o"667!;%c#o*>_AU*U&VWW
3#o"667!;#CO(<s??S(S$TUU $4 --1F1S1SS %T*8MNJ 
 :)#j/::
KK67s   51D pathrecord_checksumreturnc                 &   |r_t        j                         }t        | d      5 t        fdd      D ]  }|j	                  |        |j                         }ddd       nd}t        j                  j                  |       dS # 1 sw Y   +xY w)z7Compute the file size and the sha256 checksum of a filerbc                  &     j                  d      S )Ni   )read)fs   r   <lambda>z(get_size_checksum_dict.<locals>.<lambda>V   s    affWo r   r   N)	num_byteschecksum)	r   sha256openiterupdate	hexdigestosr5   getsize)r5   r6   mchunkr?   r<   s        @r   get_size_checksum_dictrI   Q   s    ##%$ 	%5s;   {{}H	% 	%
 .HEE	% 	%s   6BBc                 N    | r#t         j                  r| t         j                  k  S y)zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

    Args:
        dataset_size (int): Dataset size in bytes.

    Returns:
        bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
    F)r   IN_MEMORY_MAX_SIZE)dataset_sizes    r   is_small_datasetrM   ^   s#     11f7777r   )N)T)enumrE   typingr   huggingface_hub.utilsr   r!   r   
exceptionsr   r   r	   r
   r   r   loggingr   r   r"   Enumr   dictr+   r4   r&   boolrI   rM   r   r   r   <module>rV      s     	  2     
H	tyy ,R$ RT R&88D> 8D 8$
F 
Ft 
Ft 
Fr   