
    biZ&                        d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ  G d
 dej(                  j*                        Z G d dej(                  j*                        Z G d dej(                  j*                        Z G d dej2                        Z eed      Z eedd      Z eed      Z G d dej(                  j*                        Z G d dej2                        Zy)    )partialN)dequantize_rowwise)int8_matmul_mixed_dequantize)int8_matmul_rowwise_dequantize)!quantize_columnwise_and_transpose)quantize_globalquantize_global_transpose)quantize_rowwise)is_triton_availablec                   ,    e Zd Zed        Zed        Zy)_switchback_globalc                    |j                  d|j                  d            }t        |      \  }}t        |      \  }}||f| _         t        ||j                         |||      j                   g |j                         d d d S Nviewsizer
   r   save_for_backwardr   t	ctxX_3DWbiasXX_int8state_XW_int8state_Ws	            _/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/bitsandbytes/nn/triton_based_modules.pyforwardz_switchback_global.forward   s     IIb$))B-( +1-)!, !"1 ]+FFHHJRVW\\s^b^g^g^ijmkm^nsprss    c                 (   |j                  d|j                  d            }d x}x}}| j                  \  }}| j                  d   r\t	        |      \  }}	t        |      \  }
} t        ||
j                         |	|d       j                  g |j                         d d d }| j                  d   r=t        j                  |j                         |j                  |j                              }| j                  d   r|j                  d      }|||fS Nr   r         dim)reshaper   r   needs_input_gradr
   r	   r   r   r   torchmatmultodtypesum)r   G_3DGgrad_Xgrad_W	grad_biasr   r   G_int8state_Gr   r   s               r    backwardz_switchback_global.backward)   s    LLTYYr]+&***)$$1" /q1OFG7:OFGb1&&((*gwX\]bb Sb!F "\\!##%agg7F"!Ivy((r"   N__name__
__module____qualname__staticmethodr!   r7    r"   r    r   r      s*    t t ) )r"   r   c                   ,    e Zd Zed        Zed        Zy)_switchback_vectorrizec                    |j                  d|j                  d            }||f| _        t        |      \  }}t        |      \  }} t	        ||j                         |||      j                   g |j                         d d d S r   )r   r   r   r
   r   r   r   s	            r    r!   z_switchback_vectorrize.forwardE   s     IIb$))B-( !1 +1-*1- _-ffhhj'7TXY^^u`d`i`i`klomo`purtuur"   c                 (   | j                   \  }}|j                  d|j                  d            }d x}x}}| j                  d   r\t	        |      \  }}	t        |      \  }
} t        ||
j                         |	|d       j                  g |j                         d d d }| j                  d   r=t        j                  |j                         |j                  |j                              }| j                  d   r|j                  d      }|||fS r$   )r   r)   r   r*   r
   r   r   r   r   r+   r,   r-   r.   r/   )r   r0   r   r   r1   r2   r3   r4   r5   r6   r   r   s               r    r7   z_switchback_vectorrize.backwardT   s   $$1LLTYYr]+&***)" /q1OFG?BOFGd3FFHHJQXZ^_dd Sb!F "\\!##%agg7F"!Ivy((r"   Nr8   r=   r"   r    r?   r?   D   s*    v v ) )r"   r?   c                   ,    e Zd Zed        Zed        Zy) _switchback_global_mem_efficientc                    |j                  d|j                  d            }|j                         }t        |      \  }}~t        |      \  }}	||||	f| _         t        ||j                         ||	|      j                   g |d d d S r   r   )
r   r   r   r   r   X_3D_szr   r   r   r   s
             r    r!   z(_switchback_global_mem_efficient.forwardo   s     IIb$))B-())+ +1-)!, !' @ ]+FFHHJRVW\\o^efigi^jolnoor"   c                 n   |j                  d|j                  d            }|j                         }d x}x}}| j                  \  }}}	}
| j                  d   rKt	        ||      }~t        j                  |j                         |j                  |j                              }~| j                  d   r|j                  d      }| j                  d   r_t        |      \  }}~|	j                         j                         }	 t        ||	j                         ||
d       j                  g |d d d }|||fS )Nr   r%   r&   r   r'   )r)   r   r   r*   r   r+   r,   r   r-   r.   r/   r
   
contiguousr   r   )r   r0   r1   G_3D_szr2   r3   r4   r   r   r   r   real_Xr5   r6   s                 r    r7   z)_switchback_global_mem_efficient.backward   s$    LLTYYr]+))+&***)+.+@+@("'8F\\!##%177);<F"!I".q1OFGXXZ**,Fb1&&((*gwX\]bbudklomodpurtuFvy((r"   Nr8   r=   r"   r    rC   rC   n   s*    p p" ) )r"   rC   c                   L     e Zd Z	 	 	 	 	 d	dededededef
 fdZd Zd Z xZS )
SwitchBackLinearin_featuresout_featuresr   vector_wise_quantizationmem_efficientc                     t         |   |||||       t               st        d      || _        | j                  r%t
        | _        |rt        d       t        d       y y |rt        | _        y t        | _        y )NzCould not import triton. Please install triton to use SwitchBackLinear.
                               Alternatively, you can use bnb.nn.SwitchBackLinearBnb, but it will be slowerz<mem efficient is not supported for vector-wise quantization.r%   )super__init__r   ImportErrorrN   r?   _fnprintexitrC   r   )	selfrL   rM   r   devicer.   rN   rO   	__class__s	           r    rR   zSwitchBackLinear.__init__   s{     	lD&%H"$ o p p )A%((-DHTUQ  ;-r"   c                     t        d       | j                  rt        | j                        \  }}nt	        | j                        \  }}| j                  d|       | j                  d|       | `y )Nz=> preparing for eval.r   r   )rU   rN   r
   weightr   register_buffer)rW   r   r   s      r    prepare_for_evalz!SwitchBackLinear.prepare_for_eval   s_     	&'((.t{{;OFG-dkk:OFGXv.Y0Kr"   c                    | j                   r1| j                  j                  || j                  | j                        S t        | d      s1| j                  j                  || j                  | j                        S |j                  d|j                  d            }t        |      \  }}| j                  r^ t        || j                  j                         || j                  | j                        j                  g |j                         d d d S  t        || j                  j                         || j                  | j                        j                  g |j                         d d d S )Nr   r   )trainingrT   applyr[   r   hasattrr   r   r
   rN   r   r   r   r   r   )rW   xr   r   r   s        r    r!   zSwitchBackLinear.forward   s5   ==88>>!T[[$))<< 4*xx~~adii@@ r166":&A.q1OFG,,u5fdkkmmowX\XdXdfjfofopuu VVXcr] 
 t3FDKKMMOWVZVbVbdhdmdmnss VVXcr] r"   )TNNFF)	r9   r:   r;   intboolrR   r]   r!   __classcell__)rY   s   @r    rK   rK      sS    
 ).#.. . 	. #'. .:&r"   rK   F)rN   T)rN   rO   c                   .    e Zd Zedd       Zed        Zy)StandardLinearFunctionNc                 <   |j                  d|j                  d            }| j                  |||       |j                  |j	                               }|#||j                  d      j                  |      z  } |j                   g |j                         d d d S )Nr   r   )r   r   r   r,   r   	unsqueeze	expand_as)r   inputr[   r   r   outputs         r    r!   zStandardLinearFunction.forward   s    JJr5::b>*a.fhhj)dnnQ'11&99Fv{{2EJJL"-2r22r"   c                    | j                   \  }}}|j                  d|j                  d            }d x}x}}| j                  d   rM |j	                  |j                  |j                              j                  g |j                         d d d }| j                  d   r8|j                         j	                  |j                  |j                              }| | j                  d   r|j                  d      }|||fS )Nr   r   r%   r&   )
saved_tensorsr)   r   r*   r,   r-   r.   r   r   r/   )	r   grad_output_3Drk   r[   r   grad_output
grad_inputgrad_weightr4   s	            r    r7   zStandardLinearFunction.backward   s    !//vt$,,R1D1DR1HI/33
3[9"N++FIIk6G6G,HINNoP^PcPcPefigiPjolnoJ"%--/00+:K:K1LMK 4 4Q 7#*I;	11r"   Nr8   r=   r"   r    rg   rg      s(    3 3 2 2r"   rg   c                       e Zd Zd Zy)StandardLinearc                 X    t         j                  || j                  | j                        S rs   )rg   r`   r[   r   )rW   rb   s     r    r!   zStandardLinear.forward  s    %++At{{DIIFFr"   N)r9   r:   r;   r!   r=   r"   r    ru   ru     s    Gr"   ru   ) 	functoolsr   r+   torch.nnnn&bitsandbytes.triton.dequantize_rowwiser   0bitsandbytes.triton.int8_matmul_mixed_dequantizer   2bitsandbytes.triton.int8_matmul_rowwise_dequantizer   5bitsandbytes.triton.quantize_columnwise_and_transposer   #bitsandbytes.triton.quantize_globalr   r	   $bitsandbytes.triton.quantize_rowwiser
    bitsandbytes.triton.triton_utilsr   autogradFunctionr   r?   rC   LinearrK   SwitchBackLinearGlobal"SwitchBackLinearGlobalMemEfficientSwitchBackLinearVectorwiserg   ru   r=   r"   r    <module>r      s       E B @))00 ))X')U^^44 ')T))u~~'>'> ))XFryy FR !!1ER %,-=X]mq%r "$%5PTU 2U^^44 2:GRYY Gr"   