
    bi=                     R    d dl mZ  G d de      Z G d de      Z G d de      Zy)	    )Optimizer1Statec                   2     e Zd Z	 	 	 	 	 	 	 	 	 d fd	Z xZS )SGDc                 \    |dk(  rt        d      t        | 	  d||||fd||||	|
|       y)a  
        Base SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   &SGD without momentum is not supported!momentum        NNotImplementedErrorsuper__init__)selfparamslrr   	dampeningweight_decaynesterov
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__s               Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/sgd.pyr   zSGD.__init__	   sL    R q=%&NOOy!	
    )	r   r   r   F    N   d   T__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s*    
 7
 7
r   r   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )SGD8bitc                 \    |dk(  rt        d      t        | 	  d||||fd|d|||	|
       y)a+  
        8-bit SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	      Nr
   r   r   r   r   r   r   r   r   r   r   r   r   s              r   r   zSGD8bit.__init__D   sL    L q=%&NOOy!	
r   r   r   r   FNr   r   Tr   r$   s   @r   r&   r&   C   '    
 4
 4
r   r&   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )SGD32bitc                 \    |dk(  rt        d      t        | 	  d||||fd|d|||	|
       y)a,  
        32-bit SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	   r   Nr
   r)   s              r   r   zSGD32bit.__init__|   sL    L q=%&NOOy!	
r   r*   r   r$   s   @r   r-   r-   {   r+   r   r-   N)bitsandbytes.optim.optimizerr   r   r&   r-    r   r   <module>r1      s0   
 98
/ 8
v5
o 5
p5
 5
r   