
    biM                        d dl Z d dlZd dlmZ d dlZd dlZd dlmZ ddlm	Z	m
Z
  e	       rd dlZd dlmZ d dlmZmZ  ej                   dd      j#                         d	v xr< ej$                  j'                         xr  ej$                  j)                         d    d
k\  Zer e
       rd dlmZ  ed      ZndZej2                  j4                  ej2                  j6                  ej2                  j8                  hZej2                  j<                  ej2                  j>                  ej2                  j@                  ej2                  jB                  ej2                  jD                  ej2                  jF                  hZ$ej2                  jJ                  ej2                  jL                  ej2                  jN                  ej2                  jP                  ej2                  jR                  hZ*ej2                  jV                  ej2                  jX                  ej2                  jZ                  ej2                  j\                  ej2                  j^                  ej2                  j`                  ej2                  jb                  ej2                  jd                  ej2                  jf                  h	Z4e$e*z  e4z  Z5e$e*z  e4z  Z6e$e*z  Z7dejp                  dejp                  de9dejp                  fdZ:d Z;dg fdZ<g fdZ=dZ>dZ?d Z@d ZAd ZBd,dZCd,dZDd,dZEd,dZFd,dZGd,d ZHd,d!ZId,d"ZJd,d#ZKd,d$ZLd,d%ZMej                  ZNej2                  j8                  eMej2                  jD                  eCej2                  jB                  eDej2                  j@                  eEej2                  j>                  eFej2                  j<                  eGej2                  jR                  eHej2                  jP                  eIej2                  jN                  eJej2                  jL                  eKej2                  jJ                  eLiZO ePeOj                               ZRd& ZSd' ZT G d( d)ej                  j                        ZV G d* d+ej                        ZXy)-    N)nullcontext   )is_accelerate_availableis_kernels_available)init_empty_weights)add_hook_to_moduleremove_hook_from_moduleDIFFUSERS_GGUF_CUDA_KERNELSfalse)1trueyes   )
get_kernelzIsotr0py/ggmlxqweightqweight_typereturnc                    |t         v r| |j                  z  S |t        v rt        j                  |   \  }}|j
                  d   |j
                  d   |z  |z  f}t        j                  ||g| }| |j                  | j                        j                  z  }|j                         S t        j                  |      }t        d|       )Nr      z$Unsupported GGUF quantization type: )UNQUANTIZED_TYPESTDEQUANT_TYPESggufGGML_QUANT_SIZESshapeopsggml_dequantizetodtypeGGMLQuantizationTypeNotImplementedError	as_tensor)r   r   r   
block_size	type_sizer   weightys           Z/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/quantizers/gguf/utils.py_fused_mul_mat_ggufr)   O   s    ((799} }$ $ 5 5l C
Iq!7==#3y#@:#MN$$WlCUC		!''"$$$ ;;= 00>!$H"WXX    c                 $   t        t        j                  | j                  j                        }| j
                  }i }t        j                  |j                        }|j                         D ]  }||j                  v s||   ||<     |di |}|S )a  
    Creates a new hook based on the old hook. Use it only if you know what you are doing ! This method is a copy of:
    https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245 with
    some changes
     )getattr
acceleratehooks	__class____name____dict__inspect	signature__init__keys
parameters)old_hookold_hook_clsold_hook_attrfiltered_old_hook_attrold_hook_init_signatureknew_hooks          r(   _create_accelerate_new_hookr?   m   s     :++X-?-?-H-HIL%%M%//0E0EF! 9'222(5a(8"1%9 545HOr*    c                 H   d }t        | j                               }|sy | j                         D ]  \  }}||z   dz   }	t        ||||	|       t	        |t
        j                        s8 |||	      sB||vsGt               rt        nt        }
 |
       5  t        |j                  |j                  |j                  d u|      | j                  |<   d d d        t        |      | j                  |   _        | j                  |   j#                  d        | S # 1 sw Y   HxY w)Nc                 >    |dz   }|| v xr t        | |   t              S )Nr&   )
isinstanceGGUFParameter)
state_dictprefix
weight_keys      r(   _should_convert_to_ggufz:_replace_with_gguf_linear.<locals>._should_convert_to_gguf   s(    h&
Z']Jz*7M},]]r*   .)compute_dtypeF)listchildrennamed_children_replace_with_gguf_linearrC   nnLinearr   r   r   
GGUFLinearin_featuresout_featuresbias_modulestype
source_clsrequires_grad_)modelrJ   rE   rF   modules_to_not_convertrH   has_childrennamemodulemodule_prefixctxs              r(   rN   rN   ~   s   ^ ()L,,. 7f+!&-]Tjk vryy)'
MB22(?(A${C '1&&''KKt+"/	(t$ /36lENN4 +NN4 //6'7* L s   =DD!	c                    | j                         D ]W  \  }}t        |t              r||vr|j                  j                  }t        |dd       }t               rt        nt        } |       5  t        j                  |j                  |j                  |j                  d u|      }d d d        t        j                  t        |j                              _        |||_        t!        |d      r.|j"                  }t%        |      }	t'        |       t)        ||	       |j+                  |       || j,                  |<   t/        |j1                               }
|
sLt3        ||       Z | S # 1 sw Y   xY w)NrT   )device_hf_hook)rM   rC   rQ   r&   ra   r-   r   r   r   rO   rP   rR   rS   rT   	Parameterdequantize_gguf_tensorhasattrrb   r?   r	   r   r   rU   rK   rL   #_dequantize_gguf_and_restore_linear)rY   rZ   r\   r]   ra   rT   r_   
new_moduler8   r>   r[   s              r(   rf   rf      sA   ,,. Pffj)d:P.P]]))F6640D(?(A${C YY&&''KKt+!	
 !#-CFMM-R SJ"&
 vz*!??6x@'/":x8MM&!#-ENN4 FOO-./8NO=P@ L5 s   .:E..E7	      c                     | j                  t        j                        j                  t        j                        } | d d df   | d d df   dz  z  | d d df   dz  z  | d d df   dz  z  j                  d      S )Nr   r            r      )viewtorchuint8r   int32	unsqueeze)r   s    r(   	to_uint32rt      so    	u{{u{{+AadGa1gl"Qq!tW]2Qq!tW]BMMaPPr*   c                     | j                   d   }t        |      |t        |      z
  gz   }t        j                  | |d      S )Nr   dim)r   rK   sumrp   split)blocksargsn_maxdimss       r(   split_block_dimsr~      s<    LLOE:T*++D;;vt++r*   c                    | j                   d   }| j                  t        j                        } | j	                  |ddf      } t        j
                  | | j                   d   dz  d      \  }}}t        j                  |dz  |dz  |dz	  d	z  z  gd
      }t        j                  |dz  |dz	  |dz	  d	z  z  gd
      }|j	                  |df      |j	                  |df      fS )Nr   r      rv   ?      rl   0   rk   )r   ro   rp   rq   reshapery   cat)scalesn_blocksdmm_dscmins          r(   get_scale_minr      s    ||AH[[%F^^Xq!,-FFFLL$4$9rBIAq#	AHsTzqAvo>?R	HB
))QXqa1f_=>B
GCJJ!}%s{{Ha='ABBr*   c                     t        | d      \  }}|j                  t        j                        j	                  |      }|j                  t        j
                        }||z  S )Nrl   )r~   ro   rp   float16r   int8)rz   r$   r%   r    r   r   s         r(   dequantize_blocks_Q8_0r      sJ    FA&DAq	u}}  'A	uzzAq5Lr*   c                    | j                   d   }t        | ddd      \  }}}}|j                  t        j                        j                  |      }|j                  t        j                        j                  |      }t        |      }|j                  |df      t        j                  d|j                  t        j                        j                  dd      z	  }|j                  |dd|dz  f      t        j                  ddg|j                  t        j                        j                  dddd      z	  }	|dz  j                  t        j                        }|	dz  j                  |df      }	|	|dz  z  }||z  |z   S )	Nr   rl   r   r       ra   r    r   r   )r   r~   ro   rp   r   r   rt   r   arangera   rr   tensorrq   )
rz   r$   r%   r    r   r   r   qhqsqls
             r(   dequantize_blocks_Q5_1r      s>   ||AH#FAq!4LAq"b	u}}  'A	u}}  'A	2B	XqM	"ell2ahhekk&Z&b&bcdfh&i	iB	Xr1jAo6	75<<	
Aqxxu{{<gaAq
B q&U[[	!B
t)		h^	,B	rQwBFa<r*   c                    | j                   d   }t        | dd      \  }}}|j                  t        j                        j                  |      }t        |      }|j                  |d      t        j                  d|j                  t        j                        j                  dd      z	  }|j                  |dd|dz        t        j                  ddg|j                  t        j                        j                  dddd      z	  }|dz  j                  t        j                        }|dz  j                  |d      }||dz  z  j                  t        j                        d	z
  }||z  S )
Nr   rl   r   r   r   r   r   r   rm   )r   r~   ro   rp   r   r   rt   r   r   ra   rr   r   rq   r   )	rz   r$   r%   r    r   r   r   r   r   s	            r(   dequantize_blocks_Q5_0r      s%   ||AH A.IAr2	u}}  'A	2B	Ha	 ELLAHHEKK$X$`$`abdf$g	gB	Hb!Z1_	5	
Aqxxu{{:gaAq
B q&U[[	!B
t)		Xr	*B
a.		UZZ	(2	-Br6Mr*   c                    | j                   d   }t        | dd      \  }}}|j                  t        j                        j                  |      }|j                  t        j                        j                  |      }|j                  |dd|dz  f      t        j                  ddg|j                  t        j                        j                  dddd      z	  }|dz  j                  |d      }||z  |z   S )Nr   rl   r   r   r   r   r   )
r   r~   ro   rp   r   r   r   r   ra   rq   )rz   r$   r%   r    r   r   r   r   s           r(   dequantize_blocks_Q4_1r     s    ||AH1-HAq"	u}}  'A	u}}  'A	Xr1jAo6	75<<	
Aqxxu{{<gaAq
B t)		Xr	*BFa<r*   c                    | j                   d   }t        | d      \  }}|j                  t        j                        j                  |      }|j                  |dd|dz  f      t        j                  ddg|j                  t        j                        j                  d      z	  }|dz  j                  |df      j                  t        j                        d	z
  }||z  S )
Nr   rl   r   r   r   r   r   r   rl   r   r   rk   )r   r~   ro   rp   r   r   r   r   ra   rq   r   )rz   r$   r%   r    r   r   r   s          r(   dequantize_blocks_Q4_0r      s    ||AHVQ'EAr	u}}  'A	Xr1jAo6	75<<	
Aqxxu{{<gl
B t)		h^	,	/	/

	;a	?Br6Mr*   c                    | j                   d   }t        | t        dz  t        dz  t        dz        \  }}}}|j                  t        j
                        j                  |      }|j                  t        j                        j                  |      }||z  j                  |t        dz  df      }|j                  |dddf      t	        j                  ddg|j                  t        j                        j                  d	      z	  }|d
z  j                  |ddf      }|j                  |dddf      t	        j                  g d|j                  t        j                        j                  d      z	  }|dz  j                  |ddf      }||dz  z  j                  t        j
                        dz
  }	|	j                  |t        dz  df      }	||	z  j                  |t        f      S )Nr   rl   r   rm   r   r   @   r   r   r   r   r   rl   r      r   r   r   r   r   )r   r~   QK_Kro   rp   r   r   r   r   r   ra   rq   )
rz   r$   r%   r    r   r   r   r   r   qs
             r(   dequantize_blocks_Q6_Kr   -  s   ||AH 	DAItrzB

	 [[$''.F	u}}  'A	
Vh
A67A	Xr1b)	*ellAq6!((Z_ZeZe.f.n.n/ 
B t)		hB/	0B	Xr1b)	*ell<PQPXPX`e`k`k.l.t.t/ 
B t)		hB/	0B	rQwEJJ'",A			8TRZ,-AE??Hd+,,r*   c                    | j                   d   }t        | ddt        t        dz        \  }}}}}	|j	                  t
        j                        j                  |      }|j	                  t
        j                        j                  |      }t        |      \  }
}||
z  j                  |ddf      }||z  j                  |ddf      }|	j                  |dddf      t        j                  ddg|j                  t
        j                        j                  d	      z	  }|j                  |dddf      t        j                  dd|j                  t
        j                        j                  d
      z	  }|dz  j                  |ddf      }|dz  j                  |ddf      }||dz  z  }||z  |z
  j                  |t        f      S )Nr   rl   rk   r   r   r   r   r   r   r   r   rk   r   r   )r   r~   K_SCALE_SIZEr   ro   rp   r   r   r   r   r   ra   rq   r   )rz   r$   r%   r    r   r   dminr   r   r   r   r   dmr   r   s                  r(   dequantize_blocks_Q5_Kr   I  s   ||AH.vq!\4ST9UAtVR	u}}  'A99U]]#&&u-D&!EB	
R(B*+A
(		Xr1-	.B	Xr1b)	*ellAq6!((Z_ZeZe.f.n.n/ 
B 
Xr1b)	*ell1aX]XcXc.d.l.l/ 
B t)		hB/	0B
t)		hB/	0B
bAgAEBJ4 011r*   c                    | j                   d   }t        | ddt              \  }}}}|j                  t        j
                        j                  |      }|j                  t        j
                        j                  |      }t        |      \  }	}
||	z  j                  |ddf      }||
z  j                  |ddf      }|j                  |dddf      t	        j                  ddg|j                  t        j                        j                  d      z	  }|d	z  j                  |ddf      }||z  |z
  j                  |t        f      S )
Nr   rl   r   r   r   r   r   r   r   )r   r~   r   ro   rp   r   r   r   r   r   ra   rq   r   )rz   r$   r%   r    r   r   r   r   r   r   r   r   s               r(   dequantize_blocks_Q4_Kr   c  s)   ||AH*61aFAtVR	u}}  'A99U]]#&&u-D&!EB	
R(B*+A
(		Xr1-	.B	Xr1b)	*ellAq6!((Z_ZeZe.f.n.n/ 
B t)		hB/	0BFRK  (D!122r*   c                    | j                   d   }t        | t        dz  t        dz  d      \  }}}}|j                  t        j
                        j                  |      }|d d d df   |d d dd f   }
}	|	j                  |ddf      t	        j                  ddg|j                  t        j                        j                  d      z	  }	|	j                  |df      }	|
j                  |ddf      t	        j                  g d	|j                  t        j                        j                  d
      z	  }
|
j                  |df      }
|	dz  |
dz  dz  z  }|j                  t        j                        dz
  }||z  j                  |ddf      }|j                  |dddf      t	        j                  g d	|j                  t        j                        j                  d      z	  }|j                  |ddd      t	        j                  dd|j                  t        j                        j                  d      z	  }|j                  |dt        dz  f      dz  }|j                  |dt        dz  f      dz  dz  }|j                  t        j                        |dz  j                  t        j                        z
  }||z  j                  |t        f      S )Nr   rk   r   ri   r   r   )r   rl   r   rm   r   )r   r   r   r   r   r   r   r   r   rl   )r   r~   r   ro   rp   r   r   r   r   ra   rq   r   r   )rz   r$   r%   r    r   hmaskr   r   r   lscaleshscalesdlr   r   r   s                  r(   dequantize_blocks_Q3_Kr   w  sr   ||AH+FDAItqy"ME2vq	u}}  'Aa!e}fQUmWGooxA./5<<Aqxx_d_j_j3k3s3s4 G ooxn-GooxA./5<<QXXU[[4giG ooxn-Gn'D.Q!67FYYuzz"R'F
f*		xQ/	0B	Xr1b)	*ell<PQPXPX`e`k`k.l.t.t/ 
B 
xQ	+u||AqY^YdYd/e/m/m0 
B 
Xr42:.	/!	3B
**hDBJ/
01
4	9B
ejjR1WLL44AFXt,--r*   c                    | j                   d   }t        | t        dz  t        dz  d      \  }}}}|j                  t        j
                        j                  |      }|j                  t        j
                        j                  |      }||dz  z  j                  |t        dz  df      }	||dz	  z  j                  |t        dz  df      }
t	        j                  g d|j                  t        j                        j                  d	      }|j                  |d
ddf      |z	  dz  }|j                  |t        dz  df      }|	|z  |
z
  }|j                  |d
f      S )Nr   rm   r   rl   r   r   r   r   r   r   r   r   )r   r~   r   ro   rp   r   r   r   r   ra   rq   )rz   r$   r%   r    r   r   r   r   r   r   mlshifts               r(   dequantize_blocks_Q2_Kr     s4   ||AH*642:tqy!LFB4	u}}  'A99U]]#&&u-D v|
	%	%xQ&?	@B
&A+
	'	'42:q(A	BBLLahhekkJRRS_`E
**hAr*
+u
4	9B	Xtrz2.	/B	b2B::xn%%r*   c                     | j                  t        j                        j                  t        j                        dz  j                  t        j
                        S )Nrm   )ro   rp   int16r   rr   float32)rz   r$   r%   r    s       r(   dequantize_blocks_BF16r     s8    KK$''4:@@OOr*   c                 (    g | d d | d   |z  |z  S )Nr   r,   )r   r%   r$   s      r(   _quant_shape_from_byte_shaper     s&    =U3BZ=ri/*<==r*   c                 n   t        | d      s| S | j                  }t        |   }t        |   \  }}| j	                  t
        j                        } t        | j                  ||      }| j                         |z  }| j                  ||f      } ||||      }|j                  |      }|j                         S )N
quant_type)re   r   dequantize_functionsr   ro   rp   rq   r   r   numelr   r#   )	r   r   
dequant_fnr$   r%   r   r   rz   dequants	            r(   rd   rd     s    6<(""J%j1J,Z8J	[[%F(y*ME||~*H^^Xy12FY7Gooe$Gr*   c                   H     e Zd ZddZd Zed        Zed fd	       Z xZ	S )rD   c                     ||nt        j                  d      }t         j                  j                  | ||      }||_        t
        |   \  }}t        |j                  ||      |_        |S Nr   )	rp   emptyTensor_make_subclassr   r   r   r   quant_shape)clsdatarequires_gradr   selfr$   r%   s          r(   __new__zGGUFParameter.__new__  s^    'tU[[^||**3mD$ 0 <
I7

IzZr*   c                 t    t         j                  j                  t         j                  | | j                        S N)rp   r   r   r   )r   s    r(   r#   zGGUFParameter.as_tensor  s%    ||**5<<t?Q?QRRr*   c                     | D ]T  }t        |t              r$t        |d   t              r|d   j                  c S t        |t              sH|j                  c S  y r   )rC   rK   rD   r   )r{   args     r(   _extract_quant_typez!GGUFParameter._extract_quant_type  sS    
  	&C#t$CFM)J1v(((#}-~~%		&
 r*   c                    |i }t         	|   ||||      }t        |t        j                        r| j                  |      } | ||      S t        |      t        t        fv rU| j                  |      }|D cg c](  }t        |t        j                        r
 | ||      n|* }} t        |      |      S |S c c}w )N)r   )	super__torch_function__rC   rp   r   r   rV   rK   tuple)
r   functypesr{   kwargsresultr   r   wrappedr0   s
            r(   r   z GGUFParameter.__torch_function__  s    >F+D%vFfell+006Jv*55&\dE]*006Jdjk_`
1ell8Ss14YZZkGk4<((M ls   9-B;)FN)r,   N)
r1   
__module____qualname__r   r#   staticmethodr   classmethodr   __classcell__r0   s   @r(   rD   rD     s6    S 	 	  r*   rD   c                        e Zd Z	 	 	 d	 d fdZdej
                  fdZdej
                  fdZdej
                  fdZ xZ	S )rQ   c                 F    t         |   ||||       || _        || _        y r   )r   r5   rJ   ra   )r   rR   rS   rT   rJ   ra   r0   s         r(   r5   zGGUFLinear.__init__  s'     	lD&A*r*   inputsc                     t         3| j                  j                  r|j                  r| j                  |      S | j	                  |      S r   )r   r&   is_cudaforward_cudaforward_native)r   r   s     r(   forwardzGGUFLinear.forward  s;    ?t{{22v~~$$V,,""6**r*   c                 "   t        | j                        }|j                  | j                        }| j                  %| j                  j                  | j                        nd }t
        j                  j                  j                  |||      }|S r   )	rd   r&   r   rJ   rT   rp   rO   
functionallinear)r   r   r&   rT   outputs        r(   r   zGGUFLinear.forward_native  sj    '44--.37993Htyy||D../d$$++FFDAr*   c                     | j                   j                  }t        |j                  | j                        | j                   |      }| j
                  (|| j
                  j                  | j                        z  }|S r   )r&   r   r)   r   rJ   rT   )r   r   r   r   s       r(   r   zGGUFLinear.forward_cuda!  s_    [[++
$VYYt/A/A%BDKKQ[\99 diill4#5#566Fr*   )FNN)r   N)
r1   r   r   r5   rp   r   r   r   r   r   r   s   @r(   rQ   rQ     sM    
 
 

+ell +
U\\ 5<< r*   rQ   r   )Yr3   os
contextlibr   r   rp   torch.nnrO   utilsr   r   r.   r   accelerate.hooksr   r	   getenvlowercudais_availableget_device_capabilitycan_use_cuda_kernelskernelsr   r   r!   F32F16BF16r   Q4_0Q4_1Q5_0Q5_1Q8_0Q8_1STANDARD_QUANT_TYPESQ2_KQ3_KQ4_KQ5_KQ6_KKQUANT_TYPESIQ1_MIQ1_SIQ2_XXSIQ2_XSIQ2_SIQ3_XXSIQ3_SIQ4_XSIQ4_NLIMATRIX_QUANT_TYPESr   MMVQ_QUANT_TYPESMMQ_QUANT_TYPESr   intr)   r?   rN   rf   r   r   rt   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r6   SUPPORTED_GGUF_QUANT_TYPESr   rd   rc   rD   rP   rQ   r,   r*   r(   <module>r!     s    	 "    B -L BII+W5;;=AUU 3

!3

((*1-2 
 02"
_
%C
C..22D4M4M4Q4QSWSlSlSqSqr """"""""""""  	"""""""""" 	####%%$$##%%##$$$$
  %|36II',69LL &55<< %,, c V[VbVb <" HJbd B GI !R Q
,
C&&
-8243(.B&(P (( ""$:""$:""$:""$:""$:""$:""$:""$:""$:""$:""$:  ""6";";"=> >**EHH&& *Z r*   