
    bi6                     ,    d dl mZ d dlZ G d d      Zy)    )OptionalNc                      e Zd ZdZ	 	 	 	 	 	 	 	 ddee   dee   dee   dee   dedee   d	ed
ee   fdZd Z	d Z
d Zdej                  fdZedeej                  ej                  ej                  f   fd       Zeddeej                  ej                  f   fd       ZddZddZdej                  fdZeddededefd       Zy)IncrementalPCAay  
    An implementation of Incremental Principal Components Analysis (IPCA) that leverages PyTorch for GPU acceleration.
    Adapted from https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/decomposition/_incremental_pca.py

    This class provides methods to fit the model on data incrementally in batches, and to transform new data based on
    the principal components learned during the fitting process.

    Args:
        n_components (int, optional): Number of components to keep. If `None`, it's set to the minimum of the
            number of samples and features. Defaults to None.
        copy (bool): If False, input data will be overwritten. Defaults to True.
        batch_size (int, optional): The number of samples to use for each batch. Only needed if self.fit is called.
            If `None`, it's inferred from the data and set to `5 * n_features`. Defaults to None.
        svd_driver (str, optional): name of the cuSOLVER method to be used for torch.linalg.svd. This keyword
            argument only works on CUDA inputs. Available options are: None, gesvd, gesvdj, and gesvda. Defaults to
            None.
        lowrank (bool, optional): Whether to use torch.svd_lowrank instead of torch.linalg.svd which can be faster.
            Defaults to False.
        lowrank_q (int, optional): For an adequate approximation of n_components, this parameter defaults to
            n_components * 2.
        lowrank_niter (int, optional): Number of subspace iterations to conduct for torch.svd_lowrank.
            Defaults to 4.
        lowrank_seed (int, optional): Seed for making results of torch.svd_lowrank reproducible.
    Nn_componentscopy
batch_size
svd_driverlowrank	lowrank_qlowrank_niterlowrank_seedc	                     || _         || _        || _        || _        || _        || _        || _        || _        d | _        | j                  r| j                          y y )N)
r   r   r   r	   r
   r   r   r   n_features__validate_lowrank_params)	selfr   r   r   r	   r
   r   r   r   s	            U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/utils/incremental_pca.py__init__zIncrementalPCA.__init__.   s_     )	$$"*(<<))+     c                     | j                   ,| j                  t        d      | j                  dz  | _         y | j                   | j                  k  rt        d      y )NzKn_components must be specified when using lowrank mode with lowrank_q=None.   z8lowrank_q must be greater than or equal to n_components.)r   r   
ValueError)r   s    r   r   z'IncrementalPCA._validate_lowrank_paramsG   sY    >>!  ( !noo!..2DN^^d///WXX 0r   c                 Z    t         j                  j                  |d| j                        S )NF)full_matricesdriver)torchlinalgsvdr	   r   Xs     r   _svd_fn_fullzIncrementalPCA._svd_fn_fullO   s"    ||tOOr   c                 J   | j                   d u}t        j                  j                  |      5  |rt        j                  | j                          t        j
                  || j                  | j                        \  }}}|||j                  fcd d d        S # 1 sw Y   y xY w)N)enabled)qniter)	r   r   randomfork_rngmanual_seedsvd_lowrankr   r   mH)r   r   seed_enabledUSVs         r   _svd_fn_lowrankzIncrementalPCA._svd_fn_lowrankR   s    ((4\\""<"8 	!!$"3"34''T^^4CUCUVGAq!a:		 	 	s   A BB"returnc                 :   t         j                  t         j                  g}t        |t         j                        s&t        j
                  |t         j                        }n| j                  r|j                         }|j                  \  }}| j                  nU| j                  |kD  rt        d| j                   d| d      | j                  |kD  rt        d| j                   d|       |j                  |vr|j                  t         j                        }|S )z
        Validates and converts the input data `X` to the appropriate tensor format.

        Args:
            X (torch.Tensor): Input data.

        Returns:
            torch.Tensor: Converted to appropriate format.
        )dtypezn_components=z invalid for n_features=z<, need more rows than columns for IncrementalPCA processing.z6 must be less or equal to the batch number of samples )r   float32float64
isinstanceTensortensorr   cloneshaper   r   r1   to)r   r   valid_dtypes	n_samples
n_featuress        r   _validate_datazIncrementalPCA._validate_dataZ   s     u}}5!U\\*Qemm4AYY	A !	:$+ 1 122J:, WM M  * 1 122hirhst  77,&U]]#Ar   c                 .   | j                   d   dk(  r|||fS |dkD  r|t        d      |t        d      t        j                  | j                   d   g| j                        }||z   }|>t        j
                  | j                   d   t        j                  | j                        }n||z  }| j                  dt        j                        }||z   |z  }||z  }	| |	z
  }
|
j                  dt        j                        j                         }|
j                          |
j                  dt        j                        }|||z  z  }|||z  }n?||z  }|j                         |z  }||z   ||z  ||z  |z
  j                         z  z   }||z  }|||fS )aj  
        Computes the incremental mean and variance for the data `X`.

        Args:
            X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
            last_mean (torch.Tensor): The previous mean tensor with shape (n_features,).
            last_variance (torch.Tensor): The previous variance tensor with shape (n_features,).
            last_sample_count (torch.Tensor): The count tensor of samples processed before the current batch.

        Returns:
            Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Updated mean, variance tensors, and total sample count.
        r   z6last_mean should not be None if last_sample_count > 0.z:last_variance should not be None if last_sample_count > 0.device   )r1   r@   )dimr1   )r8   r   r   r6   r@   zerosr3   sumsquaresquare_double)r   	last_meanlast_variancelast_sample_countnew_sample_countupdated_sample_countlast_sumnew_sumupdated_meanTtemp
correctionnew_unnormalized_varianceupdated_variancelast_unnormalized_variancelast_over_new_countupdated_unnormalized_variances                    r   _incremental_mean_and_varz(IncrementalPCA._incremental_mean_and_var}   s     771:?m->>>q   !YZZ$ !]^^ <<QXXF03CC{{1771:U]]188TH #44H%%AU]]%3 7*.BB&&1uXX!5==X9@@B
$(HH%--H$H!!Z2B%BB! 8;OO)69J)J&"3":":"<?O"O*+,%(<<K^@^ah@h?p?p?rrs *
  =?SS-/CCCr   c                    |r[t        j                  t        j                  |       d      }t        j                  | |t	        | j
                  d         f         }nZt        j                  t        j                  |      d      }t        j                  |t	        |j
                  d         |f         }| |d| j
                  d    j                  dd      z  } ||j                  dd      z  }| |fS )a[  
        Adjusts the signs of the singular vectors from the SVD decomposition for deterministic output.

        This method ensures that the output remains consistent across different runs.

        Args:
            u (torch.Tensor): Left singular vectors tensor.
            v (torch.Tensor): Right singular vectors tensor.
            u_based_decision (bool, optional): If True, uses the left singular vectors to determine the sign flipping.
                Defaults to True.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Adjusted left and right singular vectors tensors.
        r   rB   rA   N)r   argmaxabssignranger8   view)uvu_based_decisionmax_abs_colssignsmax_abs_rowss         r   	_svd_flipzIncrementalPCA._svd_flip   s       <<		!!<LJJquQWWQZ/@!@ABE <<		!!<LJJqqwwqz!2L!@ABE	U<QWWQZ %%a,,	UZZA!tr   c                    |r| j                  |      }|j                  \  }}| j                  
d|z  | _        | j                  || j                  | j                  xs d      D ]  }| j                  ||   d        | S )a\  
        Fits the model with data `X` using minibatches of size `batch_size`.

        Args:
            X (torch.Tensor): The input data tensor with shape (n_samples, n_features).
            check_input (bool, optional): If True, validates the input. Defaults to True.

        Returns:
            IncrementalPCA: The fitted IPCA model.
           r   )min_batch_sizeF)check_input)r=   r8   r   gen_batchesr   partial_fit)r   r   rk   r;   r<   batchs         r   fitzIncrementalPCA.fit   s     ##A&A !	:??"*nDO%%iQUQbQbQgfg%h 	:EQuX59	: r   c                 t   t        | d       }|r| j                  |      }|j                  \  }}|rYd| _        d| _        t        j                  dg|j                        | _        || _	        | j                  st        ||      | _
        || j                  k7  rt        d      | j                  || j                  | j                  | j                        \  }}}|r||z  }nt        j                  |d      }	||	z  }t        j                  | j                  j!                         |z  |z        }
|
| j                  |	z
  z  }t        j"                  | j$                  j'                  d      | j(                  z  ||f      }| j*                  r| j-                  |      \  }}}n| j/                  |      \  }}}| j1                  ||d	      \  }}|d
z  |dz
  z  }|d
z  t        j2                  ||z        z  }|| _        |d| j                   | _        |d| j                   | _        || _        || _        |d| j                   | _        |d| j                   | _        | j                  ||fvr$|| j                  d j                         | _        | S t        j                  d|j                        | _        | S )ak  
        Incrementally fits the model with batch data `X`.

        Args:
            X (torch.Tensor): The batch input data tensor with shape (n_samples, n_features).
            check_input (bool, optional): If True, validates the input. Defaults to True.

        Returns:
            IncrementalPCA: The updated IPCA model after processing the batch.
        components_Nr   r?   z]Number of features of the new batch does not match the number of features of the first batch.rZ   )r[   rA   F)rc   r   rA   g        )hasattrr=   r8   mean_var_r   r6   r@   n_samples_seen_r   r   minr   rX   meansqrtrG   vstacksingular_values_r`   rq   r
   r.   r    rg   rD   explained_variance_explained_variance_ratio_noise_variance_)r   r   rk   
first_passr;   r<   col_meancol_varn_total_samplescol_batch_meanmean_correction_factormean_correctionr+   r,   Vtexplained_varianceexplained_variance_ratios                    r   rm   zIncrementalPCA.partial_fit   s    !}55
##A&A !	: DJDI#(<<AHH#ED )D$$$'	:$>!)))o  .2-K-Ktzz499d&:&:.
*'? MA"ZZq1NA%*ZZ1E1E1L1L1NQ`1`dm0m%n"4

^8STO))..w7$:J:JJ#A <<++A.HAq"((+HAq"q"u=2T_q%89#$a4%))Go4M*N#N .1 1 12 !"5D$5$5 6
	#56I8I8I#J )ABUDDUDU)V&Y
$;;#5d6G6G6I#J#O#O#QD   $)<<AHH#ED r   c                     || j                   z
  }t        j                  |j                         | j                  j
                        j                  |j                        S )a  
        Applies dimensionality reduction to `X`.

        The input data `X` is projected on the first principal components previously extracted from a training set.

        Args:
            X (torch.Tensor): New data tensor with shape (n_samples, n_features) to be transformed.

        Returns:
            torch.Tensor: Transformed data tensor with shape (n_samples, n_components).
        )rs   r   mmrG   rq   rP   r9   r1   r   s     r   	transformzIncrementalPCA.transform-  sC     

Nxx
D$4$4$6$67::177CCr   nrj   c              #      K   d}t        t        | |z              D ]   }||z   }||z   | kD  rt        ||       |}" || k  rt        ||        yyw)a  Generator to create slices containing `batch_size` elements from 0 to `n`.

        The last slice may contain less than `batch_size` elements, when `batch_size` does not divide `n`.

        Args:
            n (int): Size of the sequence.
            batch_size (int): Number of elements in each batch.
            min_batch_size (int, optional): Minimum number of elements in each batch. Defaults to 0.

        Yields:
            slice: A slice of `batch_size` elements.
        r   N)r_   intslice)r   r   rj   start_ends         r   rl   zIncrementalPCA.gen_batches<  sp      s1
?+, 	A*$C^#a's##E	 19q/! s   AA)NTNNFN   N)T)r   )__name__
__module____qualname____doc__r   r   boolstrr   r   r    r.   r   r5   r=   staticmethodtuplerX   rg   ro   rm   r   rl    r   r   r   r      sZ   6 '+#$($(#'&*,sm, tn, SM	,
 SM, , C=, , sm,2YP!5<< !F 6D	u||U\\5<<7	86D 6Dp %ell8R2S  2,ENDell D "s " "S " "r   r   )typingr   r   r   r   r   r   <module>r      s     ~" ~"r   