
    bi8                     ~    d Z ddlmZ ddlmZ deeeeef   defdZ	 G d de
      Z G d	 d
e
      Z G d d      Zy)a  
Hashing function for dataset keys using `hashlib.md5`

Requirements for the hash function:

- Provides a uniformly distributed hash from random space
- Adequately fast speed
- Working with multiple input types (in this case, `str`, `int` or `bytes`)
- Should be platform independent (generates same hash on different OS and systems)

The hashing function provides a unique 128-bit integer hash of the key provided.

The split name is being used here as the hash salt to avoid having same hashes
in different splits due to same keys
    )Union)insecure_hashlib	hash_datareturnc                     t        | t        t        f      r| S t        | t              r| j	                  dd      } n't        | t
              rt        |       } nt        |       | j                  d      S )z|
    Returns the input hash_data in its bytes form

    Args:
    hash_data: the hash salt/key to be converted to bytes
    \/zutf-8)
isinstancebytes	bytearraystrreplaceintInvalidKeyErrorencode)r   s    K/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/keyhash.py	_as_bytesr   &   sh     )eY/0	Is	# %%dC0		Is	#	N	 i((G$$    c                   "     e Zd ZdZ fdZ xZS )r   z6Raises an error when given key is of invalid datatype.c                     d| _         d| dt        |       | _        d| _        t        |   | j                    | j                   | j                          y )Nz7
FAILURE TO GENERATE DATASET: Invalid key type detectedz
Found Key z	 of type z-
Keys should be either str, int or bytes type)prefixtypeerr_msgsuffixsuper__init__)selfr   	__class__s     r   r   zInvalidKeyError.__init__@   sP    P%i[	$y/9JKFDKK=t{{mDEr   __name__
__module____qualname____doc__r   __classcell__r   s   @r   r   r   =   s    @F Fr   r   c                   $     e Zd ZdZd fd	Z xZS )DuplicatedKeysErrorz(Raise an error when duplicate key found.c                 r   || _         || _        || _        d| _        t	        |      dk  rddj                  |       d| | _        n.ddj                  |d d        dt	        |      dz
   d| | _        |rd|z   nd	| _        t        | %  | j                   | j                   | j                          y )
Nz3Found multiple examples generated with the same key   z
The examples at index z, z have the key z... (z more) have the key 
 )
keyduplicate_key_indicesfix_msgr   lenjoinr   r   r   r   )r   r,   r-   r.   r   s       r   r   zDuplicatedKeysError.__init__J   s    %:"K$%+5dii@U6V5WWefiejkDL5dii@UVYWY@Z6[5\\abef{b|  @B  cB  bC  CW  X[  W\  ]DL(/dWnRDKK=t{{mDEr   )r+   r   r%   s   @r   r'   r'   G   s    2
F 
Fr   r'   c                   8    e Zd ZdZdefdZdeeeef   defdZ	y)	KeyHasherz,KeyHasher class for providing hash using md5	hash_saltc                 J    t        j                  t        |            | _        y )N)r   md5r   
_split_md5)r   r3   s     r   r   zKeyHasher.__init__Z   s    *..y/CDr   r,   r   c                     | j                   j                         }t        |      }|j                  |       t	        |j                         d      S )zReturns 128-bits unique hash of input key

        Args:
        key: the input key to be hashed (should be str, int or bytes)

        Returns: 128-bit int hash key   )r6   copyr   updater   	hexdigest)r   r,   r5   byte_keys       r   hashzKeyHasher.hash]   s>     oo""$S>

83==?B''r   N)
r    r!   r"   r#   r   r   r   r   r   r=    r   r   r2   r2   W   s1    6E# E(c3o. (3 (r   r2   N)r#   typingr   huggingface_hub.utilsr   r   r   r   r   r   	Exceptionr   r'   r2   r>   r   r   <module>rB      sY   "   2%sC	9: %u %.Fi FF) F ( (r   