
    biX                        d dl mZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ ddZddZd Zd	 Zd
 Zd Zd Zd Zd Zd Zy)    )annotationsN)partial)islice)Bagc                J    t        | ||      }|j                  t        |      S )a  Chooses k unique random elements from a bag.

    Returns a new bag containing elements from the population while
    leaving the original population unchanged.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.sample(b, 3).compute())  # doctest: +SKIP
    [1, 3, 5]
    
populationksplit_every)_samplemap_partitions_finalize_sampler	   r
   r   ress       J/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/dask/bag/random.pysampler      s&    0 Z1+
FC.22    c                J    t        | ||      }|j                  t        |      S )a7  
    Return a k sized list of elements chosen with replacement.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.choices(b, 3).compute())  # doctest: +SKIP
    [1, 1, 5]
    r   )_sample_with_replacementr   r   r   s       r   choicesr   (   s&    , #jA;
WC.22r   c                4   g }g }d}| D ];  }|\  }}|j                  |       ||z  }t        |      }	|j                  ||	f       = ||kD  r|s||fS g }
|D ]  \  }}	|	dkD  s||	|z  z  }|
|g|	z  z  }
 |rt        j                  nt
        } |||
|      |fS )aq  
    Reduce function used on the sample and choice functions.

    Parameters
    ----------
    reduce_iter : iterable
        Each element is a tuple coming generated by the _sample_map_partitions function.
    replace: bool
        If True, sample with replacement. If False, sample without replacement.

    Returns a sequence of uniformly distributed samples;
    r   )r	   weightsr
   )extendlenappendrndr   &_weighted_sampling_without_replacement)reduce_iterr
   replacens_kssnis_in_ik_ipp_isample_funcs                r   _sample_reducer*   B   s     E
A	A !
c		S#hc3Z ! 	1uW!t 	A S7q/C#A
 ")#++.TK!Q!4a77r   c                   t        t        |            D cg c]1  }t        j                  t	        j
                               ||   z  |f3 }}t        j                  ||      D cg c]
  }| |d       c}S c c}w c c}w )zk
    Source:
        Weighted random sampling with a reservoir, Pavlos S. Efraimidis, Paul G. Spirakis
       )ranger   mathlogr   randomheapqnlargest)r	   r   r
   r#   eltxs         r   r   r   h   sl    
 >C3w<=P
QDHHSZZ\"WQZ/3
QC
Q&+nnQ&<=Jqt== R=s   6A9'A>c                    |dk  rt        d      | j                  t        t        |      t        t        |d      t
        |      S )Nr   z(Cannot take a negative number of samplesr
   Fr
   r   out_typer   )
ValueError	reductionr   _sample_map_partitionsr*   r   r   s      r   r   r   q   sJ    1uCDD&!,!U3	    r   c                B    | d   }t        |      |k  rt        d      |S )Nr   zSample larger than population)r   r:   )r   r
   r   s      r   r   r   |   s'    ^F
6{Q899Mr   c                   g d}}t        |       }t        ||      D ]  }|j                  |       |dz  } t        j                  t        j
                  t        j                               |z        }|dz
  t        |      z   }t        ||      D ]u  \  }}||k(  rf||t        j                  |      <   |t        j                  t        j
                  t        j                               |z        z  }|t        |      z  }|dz  }w ||fS )z
    Reservoir sampling strategy based on the L algorithm
    See https://en.wikipedia.org/wiki/Reservoir_sampling#An_optimal_algorithm
    r   r,   )iterr   r   r.   expr/   r   r0   
_geometric	enumerate	randrange)	r	   r
   	reservoirstream_lengthstreamewnxtr#   s	            r   r<   r<      s      "1}I*FFA  	#**,'!+,Aq5JqM
!C&!$ 18*+IcmmA&'$((3::<01455A:a= C m##r   c                r    | j                  t        t        |      t        t        |d      t        |      S )Nr6   Tr7   r8   )r;   r   '_sample_with_replacement_map_partitionsr*   r   r   s      r   r   r      s7    71=!T2	    r   c           	     "   t        |       }t        |      }t        |      D cg c]  }| c}d}}t        |      D cg c]  }t        j                          }}|D cg c]  }t        |       }	}t        |	      }
t        |d      D ]p  \  }}||
k(  rat        |	      D ]H  \  }}||
k(  s|||<   ||xx   t        j                         z  cc<   |	|xx   t        ||         z  cc<   J t        |	      }
|dz  }r ||fS c c}w c c}w c c}w )z
    Reservoir sampling with replacement, the main idea is to use k reservoirs of size 1
    See Section Applications in http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
    r,   )r?   nextr-   r   r0   rA   minrB   )r	   r
   rF   rG   _rD   rE   rH   wirI   min_nxtr#   jr"   s                 r   rK   rK      s    *FVA+084a4a}I$Qx(!(A($%
&b:b>
&C
&#hG&!$ 	1<!# /1<#$IaLaDCJJL(DFj1..F	/
 #hG	 m###  5(
&s   	DD Dc                    t        t        j                  t        j                  dd            t        j                  d| z
        z        dz   S )Nr   r,   )intr.   r/   r   uniform)r'   s    r   rA   rA      s7    txxAq)*TXXa!e_<=AAr   )N)r,   N)
__future__r   r1   r.   r0   r   	functoolsr   	itertoolsr   dask.bag.corer   r   r   r*   r   r   r   r<   r   rK   rA    r   r   <module>r[      sN    "      3834#8L>$2$8Br   