
    bi                         d dl Zd ZddZy)    Nc                    || z  | j                         z  }t        j                  |      }t        ||j                         z
        }|dkD  r||z
  }t        j                  t        j
                  |            ddd   }|D ]^  }t        j                  ||k(        \  }	t        t        |	      |      }
|j                  |	|
d      }	||	xx   dz  cc<   ||
z  }|dk(  s^ n |j                  t        j                        S )a  Computes approximate mode of multivariate hypergeometric.
    This is an approximation to the mode of the multivariate
    hypergeometric given by class_counts and n_draws.
    It shouldn't be off by more than one.
    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.
    Args
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.
    Returns
    -------
    sampled_classes : ndarray of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws

    r   NF)sizereplace   )sumnpfloorintsortuniquewhereminlenchoiceastypeint64)class_countsn_drawsrng
continuousflooredneed_to_add	remaindervaluesvalueindsadd_nows              R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/datasets/utils/stratify.pyapproximate_moder       s    0 <',*:*:*<<Jhhz"G g-.KQ(	9-.tt4  	EhhyE12GT
 #d)[1G::d%:@DDMQM7"Ka	 >>"((##    c           	   #   <  K   t        j                  | d      \  }}|j                  d   }t        j                  |      }t        j                  |      dk  rt        d      ||k  rt        d||fz        ||k  rt        d||fz        t        j                  t        j                  |d	      t        j                  |      d
d       }	t        |      D ]  }
t        |||      }||z
  }t        |||      }g }g }t        |      D ]c  }|j                  ||         }|	|   j                  |d      }|j                  |d
||           |j                  |||   ||   ||   z           e |j                  |      }|j                  |      }||f  y
w)a  

    Provides train/test indices to split data in train/test sets.
    It's reference is taken from StratifiedShuffleSplit implementation
    of scikit-learn library.

    Args
    ----------

    n_train : int,
        represents the absolute number of train samples.

    n_test : int,
        represents the absolute number of test samples.

    random_state : int or RandomState instance, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.

    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.
    T)return_inverser      zMinimum class count errorzLThe train_size = %d should be greater or equal to the number of classes = %dzKThe test_size = %d should be greater or equal to the number of classes = %d	mergesort)kindNr   clip)mode)r	   r   shapebincountr   
ValueErrorsplitargsortcumsumranger    permutationtakeextend)yn_trainn_testr   n_splitsclasses	y_indices	n_classesr   class_indices_n_iclass_counts_remainingt_itraintestir0   perm_indices_class_is                      r   )stratified_shuffle_split_generate_indicesrC   6   s    . 1T:GYa I;;y)L	vvla455Z^egp]qq
 	
 	Y]cen\oo
 	
 HHRZZ	DbiiP\F]^a_aFbcM8_ |Wc:!-!35vsCy! 	HA//,q/:K#0#3#8#86#8#R LL-hA78KK,SVc!fs1voFG		H
 &t$Tk!s   FF)
   )numpyr	   r    rC    r!   r   <module>rG      s    /$d5r!   