
    bi1v                         d dl mZmZ d dlmZ d dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZ  G d d      Z G d	 d
      Z G d de
j$                  j&                        Z G d de      Z G d de      Zy)    )abcdefaultdict)deepcopy)chain)OptionalN)sync_gpuc                       e Zd Zd Zy)MockArgsc                 2    |D ]  }t        | |||           y N)setattr)selfinitial_datakeys      W/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/optimizer.py__init__zMockArgs.__init__   s"     	2CD#|C01	2    N)__name__
__module____qualname__r    r   r   r
   r
      s    2r   r
   c                   D    e Zd ZdZdZd Zd Zed        Zd Z	d	dZ
d Zy)
GlobalOptimManagerzK
    A global optimizer manager for enabling custom optimizer configs.
    Nc                     t        d      )NzCall get_instance() instead)RuntimeErrorr   s    r   r   zGlobalOptimManager.__init__   s    899r   c                 J    i | _         i | _        d | _        d| _        g | _        y )NF)
pid2configindex2config	optimizeruses_config_overridemodule_weight_config_tripler   s    r   
initializezGlobalOptimManager.initialize    s(    $)!+-(r   c                     | j                   0| j                  |       | _         | j                   j                          | j                   S r   )	_instance__new__r#   )clss    r   get_instancezGlobalOptimManager.get_instance'   s6    == KK,CMMM$$&}}r   c                    t        |      }t        |d   t              sd|ig}t        |      D ]Z  \  }}t        |d         D ]D  \  }}t	        |      | j
                  v s| j
                  t	        |         | j                  ||f<   F \ y )Nr   params)list
isinstancedict	enumerateidr   r   )r   r*   param_groupsgroup_indexgroupp_indexps          r   register_parametersz&GlobalOptimManager.register_parameters.   s    F|,q/40%|45L"+L"9 	WK'h8 W
a5DOO+@DPRSTPU@VD%%{G&<=W	Wr   c                 t   d| _         t        |t        j                  j                        r|g}t        |t        j
                        r|g}|
||J ||i}|_|D ]Y  }t        |      | j                  v r(| j                  t        |         j                  |       B|| j                  t        |      <   [ yy)a  
        Override initial optimizer config with specific hyperparameters.

        The key-values of the optimizer config for the input parameters are overridden
        This can be both, optimizer parameters like `betas` or `lr`, or it can be
        8-bit specific parameters like `optim_bits` or `percentile_clipping`.

        Arguments:
           parameters (`torch.Tensor` or `list(torch.Tensors)`):
             The input parameters.
           key (`str`):
             The hyperparameter to override.
           value:
             The hyperparameter value.
           key_value_dict (`dict`):
             A dictionary with multiple key-values to override.

        Example:

        ```py
        import torch
        import bitsandbytes as bnb

        mng = bnb.optim.GlobalOptimManager.get_instance()

        model = MyModel()
        mng.register_parameters(model.parameters()) # 1. register parameters while still on CPU

        model = model.cuda()
        # use 8-bit optimizer states for all parameters
        adam = bnb.optim.Adam(model.parameters(), lr=0.001, optim_bits=8)

        # 2. override: the parameter model.fc1.weight now uses 32-bit Adam
        mng.override_config(model.fc1.weight, 'optim_bits', 32)
        ```
        TN)	r!   r,   torchnn	ParameterTensorr/   r   update)r   
parametersr   valuekey_value_dictr4   s         r   override_configz"GlobalOptimManager.override_config8   s    J %)!j%(("4"45$Jj%,,/$J?u0!)))!5\N% <a5DOO+OOBqE*11.A-;DOOBqE*	< &r   c                 @    | j                   j                  |||f       y r   )r"   append)r   module
param_nameconfigs       r   register_module_overridez+GlobalOptimManager.register_module_overridem   s    ((//V0LMr   )NNN)r   r   r   __doc__r%   r   r#   classmethodr(   r5   r?   rE   r   r   r   r   r      s>     I:.  W3<jNr   r   c                        e Zd Zd fd	Zd Z fdZddZd Zd Z e	j                         dd       Zd Zd	 Zd
 Ze	j                  fdZd Z xZS )Optimizer8bitc                    t         |   ||       d| _        i | _        || _        t
        j                  j                         | _        t        j                         | _
        h d| _        |dk(  r| j                          yy)az  
        Base 8-bit optimizer class.

        Arguments:
            params (`torch.Tensor`):
                The input parameters to optimize.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        F>   max1max2qmap1qmap2state1state2absmax1absmax2new_max1new_max2	gnorm_vec	unorm_vec   N)superr   initialized	name2qmapis_pagedFGlobalPageManagerr(   page_mngr   mngnon_castable_tensor_keys	fill_qmap)r   r*   defaults
optim_bitsr[   	__class__s        r   r   zOptimizer8bit.__init__r   sr     	*  ++88:%224)
% ?NN r   c                     t        j                  d      | j                  d<   t        j                  d      | j                  d<   y )NT)signeddynamicFudynamic)r\   create_dynamic_maprZ   r   s    r   ra   zOptimizer8bit.fill_qmap   s4    $%$8$8$Ey!%&%9%9%Gz"r   c                 $    t         |   |       y r   )rX   __setstate__)r   staterd   s     r   rk   zOptimizer8bit.__setstate__   s    U#r   c                     t        |      } j                  }|d   }t        |      t        |      k7  rt        d      d |D        }d |D        }t	        d t        ||      D              rt        d      t        t        j                  d |D              t        j                  d |D                    D ci c]  \  }}||
 }	}} fd	t        t              }
|d
   j                         D ]   \  }}||	v r|	|   } ||      |
|<   ||
|<   " d }t        ||      D cg c]  \  }} |||       }}} j                  |
|d       yc c}}w c c}}w )a1  Load an optimizer state.

        Arguments:
            state_dict (`dict`):
                An optimizer state (should be returned from a call to `state_dict`) to load.
            move_to_device (`bool`, defaults to `True`):
                Whether to move the optimizer's state to the device.
        r0   z<loaded state dict has a different number of parameter groupsc              3   8   K   | ]  }t        |d            ywr*   Nlen.0gs     r   	<genexpr>z0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     71c!H+&7   c              3   8   K   | ]  }t        |d            ywro   rp   rr   s     r   ru   z0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     =1c!H+&=rv   c              3   ,   K   | ]  \  }}||k7    y wr   r   )rs   p_lens_lens      r   ru   z0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     N,%u~Ns   z]loaded state dict contains a parameter group that doesn't match the size of optimizer's groupc              3   &   K   | ]	  }|d      ywro   r   rr   s     r   ru   z0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     #FAAhK#F   c              3   &   K   | ]	  }|d      ywro   r   rr   s     r   ru   z0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     #@AAhK#@r|   c                     t        |t        j                        rJ j                         r8|j                  t        j
                  k7  r|j                   j                        }|S t        |t              rV|j                         D ]A  \  }}|j                  v r"s|j                   j                        ||<   6  |      ||<   C |S t        |t        j                        r t        |       fd|D              S |S )zBMake a deep copy of value, casting all tensors to device of param.c              3   0   K   | ]  } |        y wr   r   )rs   vcastparams     r   ru   z>Optimizer8bit.load_state_dict.<locals>.cast.<locals>.<genexpr>   s     "Aa4q>"As   )r,   r7   r:   is_floating_pointdtypeuint8tor-   itemsr`   devicecontainer_abcsIterabletype)r   r=   kr   r   move_to_devicer   s   `   r   r   z+Optimizer8bit.load_state_dict.<locals>.cast   s    %. **,1K!HHU[[1EE4(!KKM 2DAqD999)'(ttELL'9E!H#'q>a2 E>#:#:;"tE{"A5"AAAr   rl   c                     | d   |d<   |S )Nr*   r   )r2   	new_groups     r   update_groupz3Optimizer8bit.load_state_dict.<locals>.update_group   s    "'/Ihr   )rl   r0   N)r   r0   rq   
ValueErroranyzipr   from_iterabler   r-   r   rk   )r   
state_dictr   groupssaved_groups
param_lens
saved_lensold_idr4   id_maprl   r   r   r   r   rt   ngr0   r   s   ` `               @r   load_state_dictzOptimizer8bit.load_state_dict   st    j)
""!.1v;#l++[\\77
==
N#j*2MNNo  !###F#FF###@#@@
 AI
 
	2 D!w'--/ 	DAqF{q	#E1~ea		 :=V\9RS2Q+SSE<HI_
\ Ts   >E,Ec                    t        | j                        D ]  \  }}t        |d         D ]  \  }}|| j                  v s| j                  |   }|j                         D ][  \  }}t	        |t
        j                        s!t        |dd      }|r1|j                  |j                        | j                  |   |<   ]   y )Nr*   r[   F)
r.   r0   rl   r   r,   r7   r:   getattrr   r   )	r   gindexr2   pindexr4   valuesr   r   r[   s	            r   to_gpuzOptimizer8bit.to_gpu   s    &t'8'89 	BMFE&uX7 B	

?!ZZ]F & B1%a6'.q*e'DH#+3444>

1a 0	BB	Br   c                 :   | j                   j                  D ]  \  }}}t        ||      }|J t        |t        j
                        st        |t        j                        sJ d}t        | j                        D ]  \  }}|r ot        |d         D ]  \  }}	|r "t        |	      t        |      k(  s"|| j                   j                  t        |	      <   | j                   j                  t        |	         | j                   j                  ||f<   d}   y )NFr*   T)r_   r"   r   r,   r7   r:   r9   r.   r0   r/   r   r   )
r   rB   attrrD   pmodulefoundr   r2   r   r4   s
             r   check_overrideszOptimizer8bit.check_overrides   s   $(HH$H$H 	% FD&fd+G&&&gu||4
7EOO8\\\E!*4+<+<!= %!*5?!; %IFA!u7+ 6<++BqE2BF((BUBUVXYZV[B\--vv.>? $%%	%r   c                 L   d}|$t        j                         5   |       }ddd       | j                  s'| j                          | j	                          d| _        d}t        | j                        D ]  \  }}t        |d         D ]s  \  }}|j                  | j                  |   }t        |      dk(  r| j                  ||||       | j                  |       | j                  ||||       t        |       u  | j                  r|t        |       |S # 1 sw Y   xY w)zPerform a single optimization step.

        Arguments:
            closure (`Callable`, *optional*, defaults to `None`):
                A closure that reevaluates the model and returns the loss.
        NTr*   r   )r7   enable_gradrY   r   r   r.   r0   gradrl   rq   
init_stateprefetch_stateupdate_stepr   r[   )r   closurelossr4   r   r2   r   rl   s           r   stepzOptimizer8bit.step  s    ""$ !y!   "KKM#D &t'8'89 
	MFE&uX7 		66>

1u:?OOE1ff=##A&  66:	
	 ==Q] QK5! !s   DD#c                 |   i }|d   |d<   |d   |d<   |d   |d<   |d   |d<   |j                  dd      |d<   |j                  dd      |d<   |j                  d	d      |d	<   | j                  j                  |d
<   | j                  j                  |d<   | j                  j                  |d<   | j                  j
                  |d<   | j                  j                  |d<   | j                  j                  |d<   ||f| j                  j                  v r*|j                  | j                  j                  ||f          |S )Nbetasepsweight_decaylralpha        t_alphar   t_beta3rc   min_8bit_sizepercentile_clipping
block_wise	max_unorm
skip_zeros)getargsrc   r   r   r   r   r   r_   r   r;   )r   r   r   r2   rD   s        r   
get_configzOptimizer8bit.get_config,  s0   .weu!&~!6~T{t))GS1w!IIi3y!IIi3y#yy33|"&))"9"9(,		(E(E$%#yy33|"ii11{#yy33|Ftxx444MM$((//0@ABr   c                     t        d      )Nz(init_state method needs to be overriddenNotImplementedErrorr   r2   r4   r   r   s        r   r   zOptimizer8bit.init_state@  s    !"LMMr   c                     t        d      )Nz-The update_step method needs to be overriddenr   r   s        r   r   zOptimizer8bit.update_stepC  s    !"QRRr   c                 R   | j                   r|j                         dk  r"t        j                  |||j                        S t        j                  |j                  ||j                  d}t        j                  |d       | j                  j                  j                  |       |S )Ng     j@r   r   r   )r[   numelr7   
zeros_liker   r\   	get_pagedshapefillr^   paged_tensorsrA   )r   r4   r   buffs       r   get_state_bufferzOptimizer8bit.get_state_bufferF  st    }}	C##AU188DD ;;uQXXFDFF4OMM''..t4Kr   c                     | j                   rZ| j                  |   }|d   }t        |dd      }|r6t        j                  |d          d|v rt        j                  |d          y y y y )NrO   r[   FrP   )r[   rl   r   r\   prefetch_tensor)r   r4   rl   s1r[   s        r   r   zOptimizer8bit.prefetch_stateP  sk    ==JJqMExBr:u5H!!%/2u$%%eHo6 % 	 r   )    F)Tr   )r   r   r   r   ra   rk   r   r   r   r7   no_gradr   r   r   r   float32r   r   __classcell__rd   s   @r   rI   rI   q   sh    #JH$HJT	B%& U]]_# #J(NS ). 7r   rI   c                        e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddee   dee   f fdZ ej                         d        Z ej                         d        Z	 xZ
S )Optimizer2Stater   r   c           	         d|k  st        d|       d|k  st        d|       t        |t              rW|j                  dd      j                  dd      j	                         j                  d      }|D cg c]  }t        |       }}t        t        |            D ]&  }d||   cxk  rdk  rn t        d	| d
||           d|k  st        d|       t        |||||||      }t        | -  ||||       |8i }||d<   |	|d<   |
|d<   ||d<   ||d<   ||d<   t        |      | _        || _        y|| _        || _        yc c}w )ah  
        Base 2-state update optimizer class.

        Arguments:
            optimizer_name (`str`):
                The name of the optimizer.
            params (`torch.Tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-3):
                The learning rate.
            betas (`tuple`, defaults to (0.9, 0.999)):
                The beta values for the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value for the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            max_unorm (`float`, defaults to 0.0):
                The maximum value to normalize each block with.
            skip_zeros (`bool`, defaults to `False`):
                Whether to skip zero values for sparse gradients and models to ensure correct updates.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
            alpha (`float`, defaults to 0.0):
                The alpha value for the AdEMAMix optimizer.
            t_alpha (`Optional[int]`, defaults to `None`):
                Number of iterations for alpha scheduling with AdEMAMix.
            t_beta3 (`Optional[int]`, defaults to `None`):
                Number of iterations for beta scheduling with AdEMAMix.

        r   Invalid learning rate: Invalid epsilon value: ( ),      ? Invalid beta parameter at index : Invalid weight_decay value: )r   r   r   r   r   r   r   Nrc   r   r   r   r   r   )r   r,   strreplacestripsplitfloatrangerq   r-   rX   r   r
   r   optimizer_name)r   r   r*   r   r   r   r   rc   r   r   r   r   r   r   r[   r   r   r   birb   rd   s                        r   r   zOptimizer2State.__init__\  s   x by6rd;<<cz6se<==eS!MM#r*223;AACII#NE',-!U1X-E-s5z" 	UA%((S( #CA3bq
!STT	U l";L>JKKCl%Y`jq
 	:x@<D!+D$1D!*=D&'!+D )D!+D DI - DI,5 .s   ;Ec                 p   | j                  |||      }|d   dk(  rt        j                  }n*|d   dk(  rt        j                  }nt	        d|d          |j                         |d   k  rt        j                  }| j                  |   }d|d<   |t        j                  k(  rJ| j                  |t        j                        |d	<   | j                  |t        j                        |d
<   np|t        j                  k(  r\|d   dk(  rd| j                  vr| j                          | j                  d   j                  |j                        | j                  d<   | j                  d   j                  |j                        | j                  d<   | j                  |t        j                        |d	<   | j                  d   |d<   | j                  |t        j                        |d
<   | j                  d   |d<   |d   rd}|j                         }	|	|z  t        |	|z        z   }
t        j                  |
ft        j                  |j                        |d<   t        j                  |
ft        j                  |j                        |d<   nt        j                  dt        j                  |j                        |d<   t        j                  dt        j                  |j                        |d<   t        j                  dt        j                  |j                        |d<   t        j                  dt        j                  |j                        |d<   |d   dk  r$t        j                  d|j                        |d<   |d   dkD  r%t        j                  d|j                        |d <   y y )!Nrc   r   rW   (Amount of optimizer bits not supported: r   r   r   r   rO   rP   rg   rh   rM   rN   r      r   rQ   rR      rK   rS   rL   rT   r   d   r   r   rU   r   r   rV   r   r7   r   r   r   r   rl   r   rZ   ra   r   r   boolzerosr   r2   r4   r   r   rD   r   rl   	blocksizenblockss              r   r   zOptimizer2State.init_state  s   7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEMM!"33AU]]3KE(O"33AU]]3KE(Oekk!V}!DNN2NN$,0NN9,E,H,H,Ry)-1^^J-G-J-J188-Tz*"33AU[[3IE(O!^^I6E'N"33AU[[3IE(O!^^J7E'Nl#	GGIy.DY,??#(;;yVWV^V^#_i #(;;yVWV^V^#_i  %Dahh Wf$)KKEMMRSRZRZ$[j! %Dahh Wf$)KKEMMRSRZRZ$[j!'(3.!&VAHH!EE++$!&T!((!CE+ %r   c                 r   |j                   j                         |_         |j                  j                         |_        | j                  |   }|j                  }| j	                  |||      }|dxx   dz  cc<   |d   }|d   dk  r#t        j                  ||d   ||d         \  }	}
}nd}|d   j                  t        j                  k(  rt        j                  | j                  |||d   |d   d	   |d
   ||d   |d   |d   d   t        |d         dk\  r|d   d   nd|j                  dd      |d   ||d   dkD  r|d   nd |d   |d          y |d   j                  t        j                  k(  r|d   st        j                  | j                  |||d   |d   |d   d	   |d   d   |d
   ||d   |d   |d   |d   |d   |d   |d   |d   ||d   dkD  r|d   nd |d          |d   |d   c|d<   |d<   |d   |d   c|d<   |d<   y |d   j                  t        j                  k(  r|d   rt        j                   | j                  |||d   |d   |d   d	   |d   d   t        |d         dk\  r|d   d   nd|j                  dd      |d
   ||d   |d   |d   |d   |d   |d   ||d           y y y )!Nr   r   r   r   rU   r   rO   r   r   r   r   rP         r   r   r   r   rV   r   r   r   r   rM   rN   rK   rL   rS   rT   )gnorm_scalerV   r   rQ   rR   r  r   )data
contiguousr   rl   r   r\   r   r   r7   r   optimizer_update_32bitr   rq   r   r   optimizer_update_8bitoptimizer_update_8bit_blockwiser   r2   r4   r   r   rl   r   rD   r   current_gnorm
clip_valuer  s               r   r   zOptimizer2State.update_step  sR    ""$""$

1vv7fV}'(3.565J5Jk",-	62M:{ K?  EKK/$$##hw"uthw"&)&/&:a&?w"S

7C(~&&,[&9C&?k"T -!,/#( 8_""ekk1&:N####hhw"w"utggffj!j!~&'06{0Cc0I%,t -)0 05Z/@%-,E&M5,/4Z/@%-,E&M5,8_""ekk1f\6J--##hhw"w"&)&/&:a&?w"S

7C(utggi i ~&'!,/' 7K1r   )MbP?)?g+?:0yE>r   r   N   r   Tr   FFr   NN)r   r   r   r   intr   r7   r   r   r   r   r   s   @r   r   r   [  s    
 !%!%%]-" ##]-$ #%]-~ U]]_1D 1Df U]]_\ \r   r   c                        e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z ej
                         d        Z ej
                         d        Z xZS )Optimizer1Statec                    d|k  st        d|       d|k  st        d|       t        t        |            D ]&  }d||   cxk  rdk  rn t        d| d||           d|k  st        d|       t        ||||      }t        |   ||||       |8i }||d
<   |	|d<   |
|d<   ||d<   ||d<   ||d<   t        |      | _        || _        y	|| _        || _        y	)a  
        Base 1-state update optimizer class.

        Arguments:
            optimizer_name (`str`):
                The name of the optimizer.
            params (`torch.Tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-3):
                The learning rate.
            betas (`tuple`, defaults to (0.9, 0.0)):
                The beta values for the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value for the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            max_unorm (`float`, defaults to 0.0):
                The maximum value to normalize each block with.
            skip_zeros (`bool`, defaults to `False`):
                Whether to skip zero values for sparse gradients and models to ensure correct updates.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        r   r   r   r   r   r   r   )r   r   r   r   Nrc   r   r   r   r   r   )	r   r   rq   r-   rX   r   r
   r   r   )r   r   r*   r   r   r   r   rc   r   r   r   r   r   r   r[   r   rb   rd   s                    r   r   zOptimizer1State.__init__P  s/   d by6rd;<<cz6se<==s5z" 	UA%((S( #CA3bq
!STT	U l";L>JKK2U,O:x@<D!+D$1D!*=D&'!+D )D!+D DI - DI,r   c                    | j                  |||      }|d   dk(  rt        j                  }n*|d   dk(  rt        j                  }nt	        d|d          |j                         |d   k  rt        j                  }| j                  |   }d|d<   |t        j                  k(  r&| j                  |t        j                        |d	<   nk|t        j                  k(  rW|d   dk(  rSd
| j                  vr| j                          | j                  d
   j                  |j                        | j                  d
<   | j                  |t        j                        |d	<   | j                  d
   |d<   |d   r[d}|j                         }	|	|z  t        |	|z        z   }
t        j                  |
ft        j                  |j                        |d<   nft        j                  dt        j                  |j                        |d<   t        j                  dt        j                  |j                        |d<   |d   dk  r$t        j                  d|j                        |d<   |d   dkD  r%t        j                  d|j                        |d<   y y )Nrc   r   rW   r   r   r   r   r   rO   rg   rM   r   r   r   rQ   r   rK   rS   r   r   r   r   rU   r   r   rV   r   r   s              r   r   zOptimizer1State.init_state  s   7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEMM!"33AU]]3KE(Oekk!V}!DNN2NN$,0NN9,E,H,H,Ry)"33AU[[3IE(O!^^I6E'Nl#	GGIy.DY,??#(;;yVWV^V^#_i  %Dahh Wf$)KKEMMRSRZRZ$[j!'(3.!&VAHH!EE++$!&T!((!CE+ %r   c                 |   |j                   j                         |_         |j                  j                         |_        | j                  |   }|j                  }| j	                  |||      }|dxx   dz  cc<   |d   }|d   dk  r#t        j                  ||d   ||d         \  }	}
}nd}|d   j                  t        j                  k(  r\t        j                  | j                  |||d   |d   d	   |d
   ||d   d |d   d   dd|d   ||d   dkD  r|d   nd |d   |d          y |d   j                  t        j                  k(  r{|d   svt        j                  | j                  |||d   d |d   d	   |d   d   |d
   ||d   |d   d |d   d |d   d |d   ||d   dkD  r|d   nd |d          |d   |d   c|d<   |d<   y |d   j                  t        j                  k(  rZ|d   rTt        j                  | j                  |||d   d |d   d	   |d   d   dd|d
   ||d   |d   d |d   d |d   ||d          y y y )Nr   r   r   r   rU   r   rO   r   r   r   r   r   r   r   rV   r   r  r   rM   rK   rS   )r   rQ   r  )r  r  r   rl   r   r\   r   r   r7   r   r  r   r   r	  r
  r  s               r   r   zOptimizer1State.update_step  s    ""$""$

1vv7fV}'(3.565J5Jk",-	62M:{ K?  EKK/$$##hw"utw"~&&,[&9C&?k"T -!,/#( 8_""ekk1&:N####hw"w"utgfj!~&&,[&9C&?k"T -). 05Z/@%-,E&M5,8_""ekk1f\6J--##hw"w"utgi ~&'!,/' 7K1r   )r  )r  r   r  r   r   Nr  r   Tr   FF)	r   r   r   r   r7   r   r   r   r   r   s   @r   r  r  O  sm    
 K-Z U]]_)D )DV U]]_Z Zr   r  )collectionsr   r   r   copyr   	itertoolsr   typingr   r7   bitsandbytes.functional
functionalr\   bitsandbytes.utilsr   r
   r   optim	OptimizerrI   r   r  r   r   r   <module>r!     sk   
 ;     # '2 2XN XNvg7EKK)) g7Tqm qhUm Ur   