
    bi]-                         d dl mZ  G d de      Z G d de      Z G d de      Z G d d	e      Z G d
 de      Z G d de      Zy)    )Optimizer1Statec                   2     e Zd Z	 	 	 	 	 	 	 	 	 d fd	Z xZS )Lionc                 <    t         |   d|||d||||||	|
       y)aj  
        Base Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        lion        is_pagedNsuper__init__)selfparamslrbetasweight_decay
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wiser
   	__class__s              R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/lion.pyr   zLion.__init__	   s<    L 	 	 	
    )	-C6?g?gGz?r       N   d   TF__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s*     3
 3
r   r   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )Lion8bitc
                 <    t         
|   d|||d|d|||||	       y)a   
        8-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        r   r      r	   Nr   r   r   r   r   r   r   r   r   r   r
   r   s             r   r   zLion8bit.__init__@   s<    F 	 	 	
r   r   r   r   Nr   r   TFr    r%   s   @r   r'   r'   ?   '     0
 0
r   r'   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )	Lion32bitc
                 <    t         
|   d|||d|d|||||	       y)a  
        32-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        r   r   r   r	   Nr   r*   s             r   r   zLion32bit.__init__t   s<    F 	 	 	
r   r+   r    r%   s   @r   r.   r.   s   r,   r   r.   c                   0     e Zd Z	 	 	 	 	 	 	 	 d fd	Z xZS )	PagedLionc
                 <    t         
|   d|||d||||||	d       y)a  
        Paged Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   Tr	   Nr   )r   r   r   r   r   r   r   r   r   r   r   s             r   r   zPagedLion.__init__   s<    F 	 	 	
r   )r   r   r   r   Nr   r   Tr    r%   s   @r   r1   r1      s'     0
 0
r   r1   c                   .     e Zd Z	 	 	 	 	 	 	 d fd	Z xZS )PagedLion8bitc	                 <    t         	|   d|||d|d||||d       y)a  
        Paged 8-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r)   Tr	   Nr   
r   r   r   r   r   r   r   r   r   r   s
            r   r   zPagedLion8bit.__init__   s<    D 	 	 	
r   r   r   r   Nr   r   Tr    r%   s   @r   r4   r4      $     /
 /
r   r4   c                   .     e Zd Z	 	 	 	 	 	 	 d fd	Z xZS )PagedLion32bitc	                 <    t         	|   d|||d|d||||d       y)a  
        Paged 32-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   Tr	   Nr   r6   s
            r   r   zPagedLion32bit.__init__  s<    D 	 	 	
r   r7   r    r%   s   @r   r:   r:     r8   r   r:   N)bitsandbytes.optim.optimizerr   r   r'   r.   r1   r4   r:    r   r   <module>r>      sW   
 94
? 4
n1
 1
h1
 1
h1
 1
h0
O 0
f0
_ 0
r   