
    bij                     R    d dl mZ  G d de      Z G d de      Z G d de      Zy)	    )Optimizer1Statec                   6     e Zd Z	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )RMSpropc                 v    |dk(  rt        d      |rt        d      t        | 	  d||||f||||	|
||       y)a  
        Base RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   )RMSprop with alpha==0.0 is not supported!"Centered RMSprop is not supported!rmspropNNotImplementedErrorsuper__init__)selfparamslralphaepsweight_decaymomentumcentered
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__s                U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/rmsprop.pyr   zRMSprop.__init__	   s[    X A:%&QRR%&JKKH	
    ){Gz?Gz?:0yE>r   r   F    N   d   T__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s0     <
 <
r   r   c                   4     e Zd Z	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )RMSprop8bitc                 v    |dk(  rt        d      |rt        d      t        | 	  d||||f||d||	|
|       y)a  
        8-bit RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	      Nr
   r   r   r   r   r   r   r   r   r   r   r   r   r   s               r   r   zRMSprop8bit.__init__I   s[    V A:%&QRR%&JKKH	
r   
r   r   r    r   r   FNr"   r#   Tr$   r)   s   @r   r+   r+   H   s-     ;
 ;
r   r+   c                   4     e Zd Z	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )RMSprop32bitc                 v    |dk(  rt        d      |rt        d      t        | 	  d||||f||d||	|
|       y)a  
        32-bit RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	   r!   Nr
   r.   s               r   r   zRMSprop32bit.__init__   s[    X A:%&QRR%&JKKH	
r   r/   r$   r)   s   @r   r1   r1      s-     <
 <
r   r1   N)bitsandbytes.optim.optimizerr   r   r+   r1    r   r   <module>r5      s0   
 9=
o =
@<
/ <
~=
? =
r   