
    bi                         d dl mZmZmZmZmZ d dlmZ ddlm	Z	m
Z
mZmZmZmZ ddlmZ erddlmZ  e       rd dlZ e
       rd d	lmZmZ  e       rd
dlmZ  ej0                  e      Z G d de      Zy)    )TYPE_CHECKINGAnyDictListUnion)is_optimum_quanto_version   )get_module_from_nameis_accelerate_availableis_accelerate_versionis_optimum_quanto_availableis_torch_availablelogging   )DiffusersQuantizer)
ModelMixinN)CustomDtypeset_module_tensor_to_device   )_replace_with_quanto_layersc            
       2    e Zd ZdZdZdZddgZ fdZd Zdd	d
dde	de
e	ef   fdZdd	d
dde	ddfdZde
e	eee	f   f   de
e	eee	f   f   fdZd dZd!d"dZdee	   de	dee	   fdZg fdd	dee	   fdZd Zed        Zed        Zedefd       Z xZS )#QuantoQuantizerz0
    Diffusers Quantizer for Optimum Quanto
    TFquanto
acceleratec                 &    t        |   |fi | y N)super__init__)selfquantization_configkwargs	__class__s      g/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/quantizers/quanto/quanto_quantizer.pyr   zQuantoQuantizer.__init__)   s    ,77    c                    t               st        d      t        dd      st        d      t               st        d      |j	                  dd       }t        |t              r(t        |j                               dkD  rt        d      y y )	NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)>=z0.2.6zLoading an optimum-quanto quantized model requires `optimum-quanto>=0.2.6`. Please upgrade your installation with `pip install --upgrade optimum-quantoz`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)
device_mapr   zy`device_map` for multi-GPU inference or CPU/disk offload is currently not supported with Diffusers and the Quanto backend)
r   ImportErrorr   r   get
isinstancedictlenkeys
ValueError)r   argsr!   r'   s       r#   validate_environmentz$QuantoQuantizer.validate_environment,   s    *,z  )w7^ 
 '(r  ZZd3
j$'C
0A,BQ,F L  -G'r$   modelr   param_valueztorch.Tensor
param_name
state_dictc                    
 ddl m}m} ddlm} t        ||      \  
}	| j                  rt        
fd||fD              ryt        
|      rd|	v r
j                   S y)Nr   )QModuleMixinQTensor)PackedTensorc              3   6   K   | ]  }t        |        y wr   )r*   ).0tmodules     r#   	<genexpr>z;QuantoQuantizer.check_if_quantized_param.<locals>.<genexpr>O   s     %]j&;%]s   TweightF)
optimum.quantor6   r7   optimum.quanto.tensor.packedr8   r
   pre_quantizedanyr*   frozen)r   r1   r2   r3   r4   r!   r6   r7   r8   tensor_namer<   s             @r#   check_if_quantized_paramz(QuantoQuantizer.check_if_quantized_paramB   s[     	9=25*E#%]g|E\%]"]-(k2I}}$$r$   target_deviceztorch.devicec                     |j                  dt        j                        }t        ||      \  }}	| j                  rt        ||	|       yt        |||||       |j                          d|j                  _	        y)ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        dtypeFN)
r)   torchfloat32r
   rA   setattrr   freezer>   requires_grad)
r   r1   r2   r3   rF   r/   r!   rH   r<   rD   s
             r#   create_quantized_paramz&QuantoQuantizer.create_quantized_paramV   sd     

7EMM225*EFK5'z=+W\]MMO*/FMM'r$   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   rO   keyvals       r#   adjust_max_memoryz!QuantoQuantizer.adjust_max_memoryl   s6    6@6F6F6HI(#sc3:oI
I Js   )c                     t        dd      rXt        j                  t        j                  t        j
                  t        j                  d}|| j                  j                     }|S )Nr&   z0.27.0)int8float8int4int2)	r   rI   rW   r   FP8INT4INT2r    weights_dtype)r   target_dtypemappings      r#   adjust_target_dtypez#QuantoQuantizer.adjust_target_dtypep   sQ     x0

%//#((#((	G #4#;#;#I#IJLr$   c                 T    |%t         j                  d       t        j                  }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)loggerinforI   rJ   )r   torch_dtypes     r#   update_torch_dtypez"QuantoQuantizer.update_torch_dtype|   s$    KKpq--Kr$   missing_keysprefixc                 $   ddl m} g }|j                         D ]\  \  }}t        ||      s|D ]E  }||v s
|| d| v s|j	                  d      r#|j	                  d      r5|j                  |       G ^ |D 	cg c]	  }	|	|vs|	 c}	S c c}	w )Nr   )r6   .z.weightz.bias)r?   r6   named_modulesr*   endswithappend)
r   r1   rg   rh   r6   not_missing_keysnamer<   missingks
             r#   update_missing_keysz#QuantoQuantizer.update_missing_keys   s    /!//1 	9LD&&,/+ 9GDvhay4I,I ' 0 0 ; ' 0 0 9(//89	9 (Ea14D+DEEEs   <	BBkeep_in_fp32_modulesc                 X   | j                   j                  | _        t        | j                  t              s| j                  g| _        | j                  j	                  |       t        || j                  | j                   | j                        }| j                   |j                  _         y )N)modules_to_not_convertr    rA   )r    ru   r*   listextendr   rA   config)r   r1   r'   rs   r!   s        r#   $_process_model_before_weight_loadingz4QuantoQuantizer._process_model_before_weight_loading   s     '+&>&>&U&U#$55t<+/+F+F*GD'##**+?@+#'#>#> $ 8 8,,	
 ,0+C+C(r$   c                     |S r    )r   r1   r!   s      r#   #_process_model_after_weight_loadingz3QuantoQuantizer._process_model_after_weight_loading   s    r$   c                      yNTr{   r   s    r#   is_trainablezQuantoQuantizer.is_trainable       r$   c                      yr~   r{   r   s    r#   is_serializablezQuantoQuantizer.is_serializable   r   r$   c                      yr~   r{   r   s    r#   is_compileablezQuantoQuantizer.is_compileable   r   r$   )r_   torch.dtyperP   r   r   )re   r   rP   r   )__name__
__module____qualname____doc__use_keep_in_fp32_modulesrequires_calibrationrequired_packagesr   r0   strr   r   rE   rN   r   intrU   ra   rf   r   rr   ry   r|   propertyr   r   boolr   __classcell__)r"   s   @r#   r   r       sd     $ !<08, $ 	
 cN(00 $0 	0
 &0,DeCHo1E,F 4PSUZ[^`c[cUdPdKe 
FtCy F# FRVWZR[ F( +-	DD #3i	D,       r$   r   )typingr   r   r   r   r   diffusers.utils.import_utilsr   utilsr
   r   r   r   r   r   baser   models.modeling_utilsr   rI   accelerate.utilsr   r   r   
get_loggerr   rc   r   r{   r$   r#   <module>r      sc    8 8 B  & 3 I 2			H	%U( Ur$   