
    biL$                     |    d dl Z d dlmZ d dlmZ  G d de      Z G d de      Z G d d	e      Z G d
 de      Zy)    N)	Optimizer)Optimizer1Statec                   2     e Zd Z	 	 	 	 	 	 	 	 	 d fd	Z xZS )LARSc                 `    |dk(  rt        d      t        | 	  d||||fd||||	|
|d       y)aG  
        Base LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   'LARS without momentum is not supported!lars        F	max_unorm
block_wiseNNotImplementedErrorsuper__init__)selfparamslrmomentum	dampeningweight_decaynesterov
optim_bitsargsmin_8bit_sizepercentile_clippingr   	__class__s               R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/lars.pyr   zLARS.__init__   sT    R q=%&OPPy! 	 	
    )	r   r   r   F    N   d   {Gz?__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s*    
 8
 8
r   r   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )LARS8bitc                 `    |dk(  rt        d      t        | 	  d||||fd|d|||	|
d       y)	a  
        8-bit LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   r   r	   r
      Fr   Nr   r   r   r   r   r   r   r   r   r   r   r   r   s              r   r   zLARS8bit.__init__H   sT    L q=%&OPPy! 	 	
r   r   r   r   FNr!   r"   r#   r$   r)   s   @r   r+   r+   G   '    
 5
 5
r   r+   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )	LARS32bitc                 `    |dk(  rt        d      t        | 	  d||||fd|d|||	|
d       y)	a  
        32-bit LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   r   r	   r
   r    Fr   Nr   r.   s              r   r   zLARS32bit.__init__   sT    L q=%&OPPy! 	 	
r   r/   r$   r)   s   @r   r2   r2      r0   r   r2   c                   f     e Zd Z	 	 	 	 	 	 d fd	Z fdZ ej                         dd       Z xZS )PytorchLARSc                     |dk  rt        d|       |dk  rt        d|       |dk  rt        d|       t        ||||||      }|r|dk  s|dk7  rt        d      t        	|   ||       y )Nr
   zInvalid learning rate: zInvalid momentum value: zInvalid weight_decay value: )r   r   r   r   r   r   r   z8Nesterov momentum requires a momentum and zero dampening)
ValueErrordictr   r   )
r   r   r   r   r   r   r   r   defaultsr   s
            r   r   zPytorchLARS.__init__   s     86rd;<<c>7zBCC#;L>JKK%
 Q)q.WXX*r   c                 j    t         |   |       | j                  D ]  }|j                  dd        y )Nr   F)r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r;   zPytorchLARS.__setstate__   s5    U#&& 	0EZ/	0r   c                 \   d}|$t        j                         5   |       }ddd       | j                  D ]f  }|d   }|d   }|d   }|d   }|d   }|d   }	|d   D ]<  }
|
j                  | j                  |
   }|
j                  }|d	k7  r|j                  |
|
      }|d	k7  ro|j                  dd      }|)t        j                  |      j                         }||d<   n%|j                  |      j                  |d|z
  
       |r	|||z  z   }n|}d}|dkD  rg|
j                  t         j                  k(  sJ t        j                  |
j                               }t        j                        }|||z  kD  r||z  |z  }|
j                  |	 |z  
       ? i |S # 1 sw Y   xY w)zPerforms a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   r   )alphamomentum_buffer   g      ?r
   )torchenable_gradr<   gradr>   addgetclonedetachmul_add_dtypefloat32norm)r   closurelossr?   r   r   r   r   r   r   pr>   d_pbufupdateupdate_scalepnormunorms                     r   stepzPytorchLARS.step   s    ""$ !y! && '	9E 0LZ(Hk*IZ(Hk*ItB8_ 966>

1ff1$''!<'8Cq=))$5t<C{#kk#.55736/0*//1y=/I!$sX~!5!$"s?77emm333!JJqxxz2E!JJv.Ey500'05'85'@vbS<%78?9'	9R Y! !s   F!!F+)g{Gz?r   r   r   Fr#   )N)	r%   r&   r'   r   r;   rD   no_gradrY   r(   r)   s   @r   r5   r5      s=     +:0
 U]]_5 5r   r5   )	rD   torch.optimr   bitsandbytes.optim.optimizerr   r   r+   r2   r5    r   r   <module>r^      sE   
  ! 89
? 9
x6
 6
r6
 6
rY) Yr   