
    bi                         d dl Z d dlmZmZ d dlmZ ddlmZmZ  ej                  e
      Z G d d      Zed        Zd	 Zd
 Zee j                   defd              Zy)    N)PoolRLock)tqdm   )experimentalloggingc                       e Zd ZdZy)ParallelBackendConfigN)__name__
__module____qualname__backend_name     U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/parallel/parallel.pyr
   r
      s    Lr   r
   c	                 n    t         j                  t        | ||||||||	      S t        | ||||||||	      S )a  
    **Experimental.** Apply a function to iterable elements in parallel, where the implementation uses either
    multiprocessing.Pool or joblib for parallelization.

    Args:
        function (`Callable[[Any], Any]`): Function to be applied to `iterable`.
        iterable (`list`, `tuple` or `np.ndarray`): Iterable elements to apply function to.
        num_proc (`int`): Number of processes (if no backend specified) or jobs (using joblib).
        types (`tuple`): Additional types (besides `dict` values) to apply `function` recursively to their elements.
        disable_tqdm (`bool`): Whether to disable the tqdm progressbar.
        desc (`str`): Prefix for the tqdm progressbar.
        single_map_nested_func (`Callable`): Map function that applies `function` to an element from `iterable`.
            Takes a tuple of function, data_struct, types, rank, disable_tqdm, desc as input, where data_struct is an
            element of `iterable`, and `rank` is used for progress bar.
    )r
   r   _map_with_multiprocessing_pool_map_with_joblib)	functioniterablenum_procbatched
batch_sizetypesdisable_tqdmdescsingle_map_nested_funcs	            r   parallel_mapr      sV    " ))1-h':ulTXZp
 	
 (Hgz5,PTVl r   c	                    |t        |      k  r|n
t        |      }g }	t        |      D ][  }
t        |      |z  }t        |      |z  }||
z  t        |
|      z   }||z   |
|k  rdndz   }|	j                  | ||| ||||
||f       ] t        |      t	        d |	D              k7  r*t        dt        |       dt	        d |	D                     t        j                  d| dt        |       d	|	D cg c]  }t        |d          c}        d
\  }}|st               ft        j                  }}t        |||      5 }|j                  ||	      }d d d        t        j                  d| d       D cg c]  }|D ]  }|  }}}t        j                  dt        |       d       |S c c}w # 1 sw Y   cxY wc c}}w )N   r   c              3   8   K   | ]  }t        |d            ywr    Nlen.0is     r   	<genexpr>z1_map_with_multiprocessing_pool.<locals>.<genexpr>7   s     :!C!I:   zHError dividing inputs iterable among processes. Total number of objects z
, length: c              3   8   K   | ]  }t        |d            ywr"   r#   r%   s     r   r(   z1_map_with_multiprocessing_pool.<locals>.<genexpr>;   s     93qt99r)   z	Spawning z processes for z objects in slices of )NN)initargsinitializerz	Finished z
 processesz	Unpacked z objects)r$   rangeminappendsum
ValueErrorloggerinfor   r   set_lockr   map)r   r   r   r   r   r   r   r   r   
split_kwdsindexdivmodstartendr'   r+   r,   poolmappedproc_resobjs                         r   r   r   +   s    $s8}4x#h-HJx r(mx'(mh&ec%o-ck%#+Q158XeC%8':uV[]ikopqr 8}:z:::''*8}o 69j99:<
 	
 KK
H:_S]O;QfpRqabSVWXYZW[S\RqQrs 'Hk!&
DMM+	h{	C >t0*=>
KK)H:Z01"(=hH=Sc=c=F=
KK)CK=12M Sr
> > >s   2F0 F59G5F>c	           	          	 dd l 		j                  t        j                  |      5   	j	                          	fd|D              cd d d        S # 1 sw Y   y xY w)Nr   )n_jobsc              3   \   K   | ]#  } j                        |d dd f       % y w)NT)delayed)r&   r?   r   r   r   joblibr   r   s     r   r(   z#_map_with_joblib.<locals>.<genexpr>U   s?      !
 3FNN12Hc7JX]_ceiko3pq!
s   ),)rD   parallel_backendr
   r   Parallel)
r   r   r   r   r   r   r   r   r   rD   s
   `  ```  `@r   r   r   M   sZ    
 		 	 !6!C!CH	 	U 
 v  !
 !
!
 

 
 
s   $AA#r   c              #      K   | t         _        | dk(  rddlm}  |        	 d dt         _        y# dt         _        w xY ww)a  
    **Experimental.**  Configures the parallel backend for parallelized dataset loading, which uses the parallelization
    implemented by joblib.

    Args:
        backend_name (str): Name of backend for parallelization implementation, has to be supported by joblib.

     Example usage:
     ```py
     with parallel_backend('spark'):
       dataset = load_dataset(..., num_proc=2)
     ```
    sparkr   )register_sparkN)r
   r   joblibsparkrI   )r   rI   s     r   rE   rE   [   s<       *6&w.
2-1*T*s   A1 A>A)
contextlibmultiprocessingr   r   	tqdm.autor   utilsr   r   
get_loggerr   r2   r
   r   r   r   contextmanagerstrrE   r   r   r   <module>rR      sw     '  ) 
		H	%   4D
 23 2  2r   