
    bi2                     
   d dl mZ d dlZd dlmZmZ d dlZd dlmZ	 d dl
mZ  G d dej                  j                        Z G d de      Z G d	 d
e      Z G d de      Z G d de      Z G d de      Z G d de      Zy)    )IterableN)LiteralOptional)Optimizer2Statec                        e Zd ZdZ	 	 	 	 	 	 	 ddeej                  j                     dede	eeef   dededede
e   d	e
e   f fd
Z ej                         dd       Z xZS )_ReferenceAdEMAMixz4
    Reference: https://hf.co/papers/2409.03137
    paramslrbetasalphaepsweight_decayt_beta3t_alphac	           	      J    t        |||||||      }	t        
| 	  ||	       y )N)r
   r   r   r   r   r   r   )dictsuper__init__)selfr	   r
   r   r   r   r   r   r   defaults	__class__s             V/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/optim/ademamix.pyr   z_ReferenceAdEMAMix.__init__   s1     e<Y`jq
 	*    c           
         d }|$t        j                         5   |       }d d d        | j                  D ]?  }d|v r|dxx   dz  cc<   nd|d<   |d   }|d   }|d   \  }}}|d   }	|d   }
|d   }|d	   }|d
   D ]  }|j                  |j                  }| j                  |   }t        |      dk(  r>|j                  dg|j                               |d<   t        j                  |      |d<   |d   d   |d   d   |d   }}}d||d   z  z
  }d||d   z  z
  }|
t        |d   |	z  |
z  |	      }	|ct        j                  |      }t        j                  |      }|d   |z  }t        t        j                  ||z  d|z
  |z  ||z  z   z        |      }|j                  |      j                  |d|z
         |j                  |      j                  |d|z
         |j                  |      j                  ||d|z
         |j!                         |dz  z  j#                  |      }|j%                  |      |	|z  z   |z  }|j                  ||       |j                  | |z          B |S # 1 sw Y   [xY w)Nstep   r
   r   r   r   r   r   r   r	   r      m1_m2nu)r   )valueg      ?)torchenable_gradparam_groupsgradstatelen	new_zerossize
zeros_likeminmathlogexpmul_add_addcmul_sqrtadddiv)r   closurelossgroupr
   r   beta1beta2beta3r   r   r   r   pr$   r%   m1m2r   bias_correction1bias_correction2ln_beta1ln_beta3
step_scaledenomupdates                             r   r   z_ReferenceAdEMAMix.step!   s   ""$ !y! && =	%Ef" !ftB,C"'.E5%'NEI&GI&G 0L8_ /%66>vv

1 u:? &'[[!affh%@E'N"'"2"21"5E$K"7^A.gq0A5;B#$uf'=#= #$uf'=#=  &f 5 ?GE &#xxH#xxH!&v!8J(X"5A
Nh;V[ehp[p:q!rsE ##DE	#:##DE	#:''d!e)'D &6&;<AA#F&&!12URZ?5H A\2 sV|$_/%=	%~ E! !s   I::J)MbP?g?g+?gH.?      @:0yE>{Gz?NN)N)__name__
__module____qualname____doc__r   r!   nn	Parameterfloattupler   intr   no_gradr   __classcell__r   s   @r   r   r      s     ,@"!%!%+++,+ + UE5()	+
 + + + #+ #+" U]]_F Fr   r   c                   ,    e Zd Z	 	 	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	ed
   de
def fdZ ej                         d        Z ej                          fd       Zej"                  fdZ xZS )AdEMAMixr	   r
   r   r   r   r   r   r   
optim_bits       min_8bit_sizeis_pagedc                 B    t         |   d||||||	d |
dd||||       y )Nademamixd   Tr	   r
   r   r   r   rW   argsr[   percentile_clipping
block_wiser\   r   r   r   r   r   )r   r	   r
   r   r   r   r   r   r   rW   r[   r\   r   s               r   r   zAdEMAMix.__init__l   sD     	%!' # 	 	
r   c                    | j                  |||      }|d   dk(  rt        j                  }n*|d   dk(  rt        j                  }nt	        d|d          |j                         |d   k  rt        j                  }| j                  |   }d|d<   |t        j                  k(  r!d| j                  vr| j                          | j                  d   j                  |j                        x| j                  d<   |d	<   | j                  d
   j                  |j                        x| j                  d
<   |d<   d}|j                         }	|	|z  t        |	|z        z   }
t        j                  d|
ft        j                  |j                        |d<   t        j                  |
ft        j                  |j                        |d<   | j                  ||      |d<   | j                  ||      |d<   y )NrW   rZ   rY   z(Amount of optimizer bits not supported: r[   r   r   dynamicqmap1udynamicqmap2   r   dtypedeviceabsmax1absmax2)rl   state1state2)
get_configr!   float32uint8NotImplementedErrornumelr%   	name2qmap	fill_qmaptorm   boolzeros_get_state_double_bufferget_state_buffer)r   r6   r:   gindexpindexconfigrl   r%   	blocksizenblockss              r   
init_statezAdEMAMix.init_state   s    7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEKK. 9=	9R9U9UVWV^V^9__DNN9%g:>..:T:W:WXYX`X`:aaDNN:&wI	A9nQ](;;F${{Av;emmTUT\T\]E)${{F9EMMRSRZRZ[E)777Gh///?hr   c                 D   | j                  |||      }|d   |d   t        | 	  ||||       y |j                  j	                         |_        |j
                  j	                         |_        | j                  |   }|j
                  }|dxx   dz  cc<   |d   }|d   \  }	}
}|d   }|d   }|d   }|t        ||z  |z  |      }n|}|at        j                  |	      }t        j                  |      }||z  }t        t        j                  ||z  d|z
  |z  ||z  z   z        |      }n|}|d   j                  t        j                  k(  rSt        j                  | j                   |||d   |	|d   ||d	   |d
   |
|||d   d|d   dkD  r|d   nd |d   |d          y |d   j                  t        j"                  k(  r]t        j$                  | j                   |||d   |d
   |d   d   |d   d   |||d   ||d	   |d   |d   |d   |d   |d   d|d          y y )Nr   r   r   r   r   r   rp   r   r
   rq   r   g      ?	max_unormg        	unorm_vec
skip_zeros)gnorm_scaler   r   r   r   rg   ri   rn   ro   )r   r   )rr   r   update_stepdata
contiguousr$   r%   r*   r+   r,   r-   rl   r!   rs   Foptimizer_update_32bitoptimizer_namert   optimizer_update_8bit_blockwise)r   r6   r:   r~   r   r   r%   r$   r   r7   r8   r9   r   r   r   alpha_tr?   r@   rA   beta3_tr   s                       r   r   zAdEMAMix.update_step   s   7)$	):)BGq&&9 ""$""$

1vvfV}$Wouew## $,0%8GG xxHxxHJ(X-A
Nh3NS]`hSh2ijkmrG G ?  EMM1$$##huth~&06{0Cc0I%,t -!,/#& 8_""ekk1--##hhw"w"utggi i ~&!,/' 2r   c                    | j                   r|j                         dk  r4t        j                  dg|j	                         ||j
                        S t        j                  dg|j	                         ||j
                  d}t        j                  |d       | j                  j                  j                  |       |S )Ng     j@r   rk   r   )r\   rv   r!   r{   r(   rm   r   	get_pagedfillpage_mngpaged_tensorsappend)r   r:   rl   buffs       r   r|   z!AdEMAMix._get_state_double_buffer  s    }}	C;;~AFFH~U188LL;;QVVXeAHHMDFF4OMM''..t4Kr   )
rD   rE   rF   NNrG   rH   rZ      F)rI   rJ   rK   r   r!   rM   rN   rO   rP   r   rQ   r   rz   r   rR   r   r   rs   r|   rS   rT   s   @r   rV   rV   k   s    ,@!%!%"%'!
++,
 
 UE5()	

 
 #
 #
 
 
 EN
 
 
@ U]]_$@ $@L U]]_R Rh 16 r   rV   c                        e Zd Z	 	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	e
d
ef fdZ xZS )AdEMAMix8bitr	   r
   r   r   r   r   r   r   r[   r\   c                 :    t         |   ||||||||d|	|
       y )NrY   
r
   r   r   r   r   r   r   rW   r[   r\   rd   r   r	   r
   r   r   r   r   r   r   r[   r\   r   s              r   r   zAdEMAMix8bit.__init__  s8     	%' 	 	
r   	rD   rE   rF   NNrG   rH   r   FrI   rJ   rK   r   r!   rM   rN   rO   rP   r   rQ   rz   r   rS   rT   s   @r   r   r     s     ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
 
r   r   c                        e Zd Z	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	e
f fd
Z xZS )PagedAdEMAMix8bitr	   r
   r   r   r   r   r   r   r[   c
                 8    t         
|   |||||||||	d
       y NT)	r
   r   r   r   r   r   r   r[   r\   rd   r   r	   r
   r   r   r   r   r   r   r[   r   s             r   r   zPagedAdEMAMix8bit.__init__0  5     	%' 	 	
r   rD   rE   rF   NNrG   rH   r   rI   rJ   rK   r   r!   rM   rN   rO   rP   r   rQ   r   rS   rT   s   @r   r   r   /       ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
r   r   c                        e Zd Z	 	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	ed
   de
f fdZ xZS )PagedAdEMAMixr	   r
   r   r   r   r   r   r   rW   rX   r[   c                 :    t         |   |||||||||	|
d       y )NTr   rd   )r   r	   r
   r   r   r   r   r   r   rW   r[   r   s              r   r   zPagedAdEMAMix.__init__K  s8     	%!' 	 	
r   )	rD   rE   rF   NNrG   rH   rZ   r   )rI   rJ   rK   r   r!   rM   rN   rO   rP   r   rQ   r   r   rS   rT   s   @r   r   r   J  s     ,@!%!%"%'!
++,
 
 UE5()	

 
 #
 #
 
 
 EN
 
 
r   r   c                        e Zd Z	 	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	e
d
ef fdZ xZS )AdEMAMix32bitr	   r
   r   r   r   r   r   r   r[   r\   c                 B    t         |   d|||||dd |	dd|
|||       y )Nr^   rZ   r_   Tr`   rd   r   s              r   r   zAdEMAMix32bit.__init__h  sD     	%' # 	 	
r   r   r   rT   s   @r   r   r   g  s     ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
 
r   r   c                        e Zd Z	 	 	 	 	 	 	 	 ddeej
                  j                     dedeeeef   dede	e
   de	e
   deded	e
f fd
Z xZS )PagedAdEMAMix32bitr	   r
   r   r   r   r   r   r   r[   c
                 8    t         
|   |||||||||	d
       y r   rd   r   s             r   r   zPagedAdEMAMix32bit.__init__  r   r   r   r   rT   s   @r   r   r     r   r   r   )collections.abcr   r+   typingr   r   r!   bitsandbytes.functional
functionalr   bitsandbytes.optim.optimizerr   optim	Optimizerr   rV   r   r   r   r   r    r   r   <module>r      s|    $  $  # 8].. ]@d dN
8 
:
 
6
H 
:
O 
B
 
r   