
    bi7                        d dl mZ d dlZd dlmZ d dlZd dlZd dlmZm	Z	 d dl
mZ d Z G d dej                  j                        Z G d d	ej                  j                        Z G d
 dej                  j                        Zd Z	 	 	 ddej(                  dej(                  dej(                  dej(                  deej(                     dedefdZ	 	 	 ddej(                  dej(                  dej(                  dej(                  deej(                     dedefdZ	 	 	 	 ddej(                  dej(                  deej(                     dee	   fdZy)    )reduceN)Optional)GlobalOutlierPoolerMatmulLtStatec                 8    t        t        j                  | d      S )N   )r   operatormul)iterables    d/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/research/autograd/_functions.pyprodr      s    (,,!,,    c                   .    e Zd Zedd       Zed        Zy)MatMulFP8MixedNc                    d| _         t        |j                        dk(  rd| _         || _        || _        |j                  }|j                  d   |d   k(  r?t        j                  |j                  d d |dd  z   |j                  |j                        S t        j                  |j                  d d |d d z   |j                  |j                        S t        j                  |||      \  }	}
t        j                  |	|
|      j                  |j                        }t        j                  |j                         |	      \  }}
t        j                  ||
      j                  |j                        }t        j                   ||      }|| _        || _        || _        || _        |j                  |j                  c| _        | _        t/        | j0                  d d
       r||f| _        |S d| _        |S )NFr   Tr   dtypedevicecode	blocksizer   r      NN)is_emptyr   shapeABtorchemptyr   r   Fquantize_blockwisedequantize_blockwisetoquantizefloat
dequantizematmulfw_codebw_codebszbsz2dtype_Adtype_Banyneeds_input_gradtensorsctxr   r    outr+   r,   r-   r.   B_shapecAstatefp8AcBfp8Boutputs                  r   forwardzMatMulFP8Mixed.forward   s    =ACLCECEggGwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ ((CH	E%%b%3?BB177KJJqwwyw7	E||B&))!''2dD)
 #$77AGG S[s##BQ'(d)CK  'CKr   c                    | j                   rCt        j                  | j                        t        j                  | j                        d d d d d fS | j
                  \  }}}}}}}| j                  \  }}d\  }}t        j                  || j                  | j                        \  }	}
t        j                  |	|
| j                        j                  |j                        }|rVt        j                  ||j                         j                  |j                              j                  |j                        }|rt!        |j"                        dk(  r!|j%                  dd      j'                         }n |j%                  dd      j'                         }t        j                  |j                  |j                        |      j                  |j                        }||d d d d d fS )Nr   r   r      r   r   r   )r   r!   
zeros_liker   r    r2   r3   r#   r$   r,   r.   r%   r&   r   r*   tlenr   	transpose
contiguous)r5   grad_output	req_gradA	req_gradB_r   r    grad_Agrad_B	cgrad_outr9   fp8outAts                r   backwardzMatMulFP8Mixed.backward?   s   <<##CEE*E,<,<SUU,CT4QUW[]aaa.1.B.B+	9aAq!{{1# //#++Y\YaYab	5''	5CHHMPPQ\QbQbc \\&!##%((6<<*@ADDQWWMF177|q [[A&113[[A&113 \\"%%(9(9":KHKKAGGTFvtT4t;;r   NNN   rQ   __name__
__module____qualname__staticmethodr>   rO    r   r   r   r      +     ' 'R "< "<r   r   c                   .    e Zd Zedd       Zed        Zy)MatMulFP8GlobalNc                    d| _         t        |j                        dk(  rd| _         || _        || _        |j                  }|j                  d   |d   k(  r?t        j                  |j                  d d |dd  z   |j                  |j                        S t        j                  |j                  d d |d d z   |j                  |j                        S t        j                  |j                         |      \  }	}
t        j                  |	|
      j                  |j                        }t        j                  |j                         |      \  }}
t        j                  ||
      j                  |j                        }t        j                  ||      }|| _        || _        || _        || _        |j                  |j                  c| _        | _        t+        | j,                  d d       r||f| _        |S d	| _        |S )
NFr   Tr   r   r   r   r   r   )r   r   r   r   r    r!   r"   r   r   r#   r'   r(   r)   r&   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   s                  r   r>   zMatMulFP8Global.forwardi   s    =ACLCECEggGwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ JJqwwyw7	E||B&))!''2JJqwwyw7	E||B&))!''2dD)
 #$77AGG S[s##BQ'(d)CK  'CKr   c                 ~   | j                   rCt        j                  | j                        t        j                  | j                        d d d d d fS | j
                  \  }}}}}}}| j                  \  }}d\  }}t        j                  |j                         | j                        \  }	}
t        j                  |	|
      j                  |j                        }|rVt        j                  ||j                         j                  |j                              j                  |j                        }|rt!        |j"                        dk(  r!|j%                  dd      j'                         }n |j%                  dd      j'                         }t        j                  |j                         | j(                        \  }}
t        j                  ||
      j                  |j                        }t        j                  |j                  |j                        |      j                  |j                        }||d d d d d fS )Nr   r   r@   r   r   r   )r   r!   rA   r   r    r2   r3   r#   r'   r(   r,   r)   r&   r   r*   rB   rC   r   rD   rE   r+   )r5   rF   rG   rH   rI   r   r    rJ   rK   rL   r9   rM   rN   r8   fp8Ats                  r   rO   zMatMulFP8Global.backward   s   <<##CEE*E,<,<SUU,CT4QUW[]aaa.1.B.B+	9aAq!{{1# ::k&7&7&9L	5i/22;3D3DE \\&!##%((6<<*@ADDQWWMF177|q [[A&113[[A&113

288:CKK@IBLLU+..qww7E\\%((6<<"8&ADDQWWMFvtT4t;;r   rP   rR   rW   r   r   rZ   rZ   e   rX   r   rZ   c                   :    e Zd Zeddee   fd       Zed        Zy)SwitchBackBnbNr9   c                 0   |xs
 t               }d| _        t        |j                        dk(  rd| _        || _        || _        || _        |j                  d   |j                  d   k(  rIt        j                  |j                  d d |j                  dd  z   |j                  |j                        S t        j                  |j                  d d |j                  d d z   |j                  |j                        S |j                  }|j                  t        j                         |_        |j                  t        j                  k7  r#t        j                   d|j                   d       t#        |j                        d	k(  r-|j%                  d|j                  d         j'                         }t)        j*                  |j-                  t        j                        |j.                  
      \  }}}	}
}|j.                  dkD  r~|||j0                  rF|}d|d d |f<   |d d |f   }|d d |f   j3                         j'                         |_        ||_        na|j8                  U|j:                  j                  df|_        n7|j0                  s)|j8                  |j:                  j                  df|_        d }|j0                  rt=        |dd       d u}|j?                          xr! |j                  d   |jA                  d      k(  }|r|j'                         }|jB                  r|r|j8                  ||jE                          t)        j*                  |j-                  t        j                              \  |_        |_#        |_$        |_%        }|j:                  j                  df|_        nd}||j0                  s||_        |j:                  d d |j6                  jM                         f   jO                         }||jH                  j%                  dd      z  dz  j3                         j'                         j-                  |j                        |_        d|d d |j6                  jM                         f<   |d d |j6                  jM                         f   }|j8                  d   }t#        |      d	k(  r|d   |d   |d   f}n
|d   |d   f}t)        jP                  ||j:                        }||j                  t        j                  k(  r=t)        jR                  ||	|jH                  |      j-                  |j                        }nMt)        jR                  ||	|jH                  d       j-                  |j                        }|jU                  |       |%#|t        jV                  ||j4                        z  }|| _,        || _-        |j                  |j                  |d n|j                  c| _.        | _/        | _0        tc        | jd                  d d       r||f| _3        |
|j6                  f| _4        n"g d| _3        d| _4        | jk                  d d        t#        |      d	k(  rt        jN                  nd } ||j%                  |            S )NFr   Tr   r   r   z'MatMul8bitLt: inputs will be cast from z to float16 during quantizationr@   )	threshold        rowgradg     _@)biasr   NNNr   c                     | S )NrW   )xs    r   <lambda>z'SwitchBackBnb.forward.<locals>.<lambda>,  s    ! r   )6r   r   r   r   r   r    re   r!   r"   r   r   outlier_poolr   get_instancefloat16warningswarnrC   viewrE   r#   int8_double_quantr&   ra   has_fp16_weightsrB   subBidxSBCBgetattris_contiguousstrideis_trainingreset_gradsCBtSCBSCBtlongcloneint8_linear_matmulint8_mm_dequantadd_r*   r9   
grad_shaper/   r0   
dtype_biasr1   r2   r3   tensor_statessave_for_backward)r5   r   r    r6   re   r9   input_shapeCACAtSCASCAtoutlier_colsrs   subAhas_gradis_transposedrI   outliersshapeBoutput_shapeout32r=   
clone_funcs                          r   r>   zSwitchBackBnb.forward   sR   ( =ACLCECECHwwr{aggaj({{1773B<!''!"+#=QWWUVU]U]^^{{1773B<!''"1+#=QWWUVU]U]^^ gg%!4!A!A!CE 77emm#MMCAGG9Lklm qww<1r1772;'224A+,+>+>qttEMM?R^c^m^m+n(CdL??S \%=%%"1c6
Cyq#vY[[]557
	88# %6EH))ehh.>!HHNNE2D !!q&$/t;H ! 11OaggajAHHQK6OMLLN!!(uxx7G!!# ''U]](;<HIIJ!HHNNE2H#E,B,B$EIxx599>>#3 34::<H"UYY^^B%::UBEEGRRTWWXYX_X_`EJ&'Bq%))..""#Q		(()D!{q 'NKNF1IFL'NF1I6L $$R2 <4::6&&uc5994HKKAGGTF&&uc5994HKKAGGTFKK #(8ell444F 	$3477AGGT\T_c_i_i0S[#.s##BQ'(a.CK!%uyy 1C,CK ,C!!$-$'$5$:U[[
&++l344r   c                    | j                   rn| j                  d nt        j                  | j                        }t        j                  | j                        t        j                  | j
                        d |d fS | j                  \  }}}}}| j                  \  }}}	| j                  \  }
}| j                  }d x}x}}|r|j                  d| j                        }t        |j                        dk(  r-|j                  d|j                  d         j                         }t!        j"                  |j%                  t        j&                              \  }}}}}|r$t        j(                  |j+                         |	      }|r|j,                  |j,                  j%                  | j.                  d      j1                  |j2                  j5                  d      j7                  d            }t        j(                  ||      j9                  | j:                        j%                  | j.                        }nt=        d	      ||d |d fS )
Nr   )r   r@   r   T)copyr   g@ ?z7State must contain either CBt or CB matrix for backward)r   re   r!   rA   r   r    r2   r3   r   r9   sumr   rC   r   reshaperE   r#   rp   r&   rl   r*   rB   ru   r/   mul_r|   	unsqueezer
   ro   r   	Exception)r5   rF   	bias_gradrG   rH   rI   req_gradBiasr   r   r   r   rs   r9   rJ   rK   	grad_biasCgradCgradtSCgradSCgradtr   ru   s                         r   rO   zSwitchBackBnb.backward/  s   << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ363G3G0	9aq{{T1%%	c		&***)#@I {  !Q&%--b+2C2CB2GHSSUK787J7J;>>Z_ZgZgKh7i4vvw \\+--/15Fxx#XX[[4[8==eii>Q>QRS>T>X>XYd>efk26;;CNNKNNs{{[ YZZvtY44r   rf   )rS   rT   rU   rV   r   r   r>   rO   rW   r   r   r_   r_      s8    r5x7N r5 r5h !5 !5r   r_   c                 (   | j                   d   }|j                   d   |k(  r|j                   d   n|j                   d   }g d}d\  }}t        |      D ]  \  }}|||dz      kD  s|} n t        |      D ]  \  }}|||dz      kD  s|} ||fS  ||fS )Nr   r   r   )i   i   rQ   i         @   r   )rQ   rQ   )r   	enumerate)	input_matrixweight_matrixinput_featuresoutput_featuresarrayr-   r.   iks	            r   get_block_sizesr   T  s    !''+N0=0C0CA0F.0Xm))!,^k^q^qrs^tO4EIC%  1E!a%L(C %  1U1q5\)D9
 9r   r   r    r+   r,   r6   r-   r.   c           	      j    |dk(  s|dk(  rt        | |      \  }}t        j                  | ||||||      S Nr   )r   rZ   applyr   r    r+   r,   r6   r-   r.   s          r   matmul_fp8_globalr   e  s>     byDBJ#Aq)	T  AsGWc4HHr   c           	      j    |dk(  s|dk(  rt        | |      \  }}t        j                  | ||||||      S r   )r   r   r   r   s          r   matmul_fp8_mixedr   s  s>     byDBJ#Aq)	T1c7GS$GGr   r9   c                 h    |xs
 t               }|dkD  r||_        t        j                  | ||||      S )Nrb   )r   ra   r_   r   )r   r    r6   r9   ra   re   s         r   switchback_bnbr     s7     $]_E3#q!S$66r   )Nr   r   )NNrb   N)	functoolsr   r	   typingr   rm   r!    bitsandbytes.autograd._functionsr   r   bitsandbytes.functional
functionalr#   r   autogradFunctionr   rZ   r_   r   Tensorintr   r   r   rW   r   r   <module>r      s        O #-Q<U^^,, Q<hQ<enn-- Q<hX5ENN++ X5v, #'I||I||I \\I \\	I
 
%,,	I 
I I& #'H||H||H \\H \\	H
 
%,,	H 
H H" #'%)	7||7||7 
%,,	7 M"	7r   