
    bia(                         d Z ddlmZmZ ddlmZmZmZmZm	Z	m
Z
 ddlmZ ddlmZ erddlmZ  e       rdd	lZ G d
 de      Zy	)z
Adapted from
https://github.com/huggingface/transformers/blob/52cb4034ada381fe1ffe8d428a1076e5411a8026/src/transformers/quantizers/base.py
    )ABCabstractmethod)TYPE_CHECKINGAnyDictListOptionalUnion   )is_torch_available   )QuantizationConfigMixin)
ModelMixinNc            
          e Zd ZdZdZdZdefdZd'd	Zd
e	e
eef      de	e
eef      fdZd'dZdee   dedee   fdZddde
edf   fdZde
eeeef   f   de
eeeef   f   fdZdddddede
eef   def
dZd(dZd Zd Zd)dZd)dZd Zd  Zd! Zed"        Z ed#        Z!e"ed$               Z#e"ed%               Z$e"defd&       Z%y)*DiffusersQuantizera  
    Abstract class of the HuggingFace quantizer. Supports for now quantizing HF diffusers models for inference and/or
    quantization. This class is used only for diffusers.models.modeling_utils.ModelMixin.from_pretrained and cannot be
    easily used outside the scope of that method yet.

    Attributes
        quantization_config (`diffusers.quantizers.quantization_config.QuantizationConfigMixin`):
            The quantization config that defines the quantization parameters of your model that you want to quantize.
        modules_to_not_convert (`List[str]`, *optional*):
            The list of module names to not convert when quantizing the model.
        required_packages (`List[str]`, *optional*):
            The list of required pip packages to install prior to using the quantizer
        requires_calibration (`bool`):
            Whether the quantization method requires to calibrate the model before using it.
    FNquantization_configc                     || _         |j                  dg       | _        |j                  dd      | _        | j                  s&| j                  rt        d|j                   d      y y )Nmodules_to_not_convertpre_quantizedTzThe quantization method z does require the model to be pre-quantized. You explicitly passed `pre_quantized=False` meaning your model weights are not quantized. Make sure to pass `pre_quantized=True` while knowing what you are doing.)r   popr   r   requires_calibration
ValueErrorquant_method)selfr   kwargss      T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/quantizers/base.py__init__zDiffusersQuantizer.__init__6   sv    #6  '-jj1I2&N##ZZ>!!d&?&?*+>+K+K*L MN O  '@!    torch_dtypetorch.dtypereturnc                     |S )aM  
        Some quantization methods require to explicitly set the dtype of the model to a target dtype. You need to
        override this method in case you want to make sure that behavior is preserved

        Args:
            torch_dtype (`torch.dtype`):
                The input dtype that is passed in `from_pretrained`
         r   r   s     r   update_torch_dtypez%DiffusersQuantizer.update_torch_dtypeD   s
     r   
device_mapc                     |S )a  
        Override this method if you want to pass a override the existing device map with a new one. E.g. for
        bitsandbytes, since `accelerate` is a hard requirement, if no device_map is passed, the device_map is set to
        `"auto"``

        Args:
            device_map (`Union[dict, str]`, *optional*):
                The device_map that is passed through the `from_pretrained` method.
        r#   )r   r&   s     r   update_device_mapz$DiffusersQuantizer.update_device_mapO   s
     r   c                     |S )a  
        Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained` to compute the
        device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype` to `torch.int8`
        and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.

        Args:
            torch_dtype (`torch.dtype`, *optional*):
                The torch_dtype that is used to compute the device_map.
        r#   r$   s     r   adjust_target_dtypez&DiffusersQuantizer.adjust_target_dtype[   s
     r   missing_keysprefixc                     |S )z
        Override this method if you want to adjust the `missing_keys`.

        Args:
            missing_keys (`List[str]`, *optional*):
                The list of missing keys in the checkpoint compared to the state dict of the model
        r#   )r   modelr+   r,   s       r   update_missing_keysz&DiffusersQuantizer.update_missing_keysg   s
     r   c                     |j                         D ci c]&  \  }t        fd| j                  D              r|( c}}S c c}}w )ap  
        returns dtypes for modules that are not quantized - used for the computation of the device_map in case one
        passes a str as a device_map. The method will use the `modules_to_not_convert` that is modified in
        `_process_model_before_weight_loading`. `diffusers` models don't have any `modules_to_not_convert` attributes
        yet but this can change soon in the future.

        Args:
            model (`~diffusers.models.modeling_utils.ModelMixin`):
                The model to quantize
            torch_dtype (`torch.dtype`):
                The dtype passed in `from_pretrained` method.
        c              3   &   K   | ]  }|v  
 y wNr#   ).0mnames     r   	<genexpr>z?DiffusersQuantizer.get_special_dtypes_update.<locals>.<genexpr>   s     B19Bs   )named_parametersanyr   )r   r.   r   r5   _s      ` r   get_special_dtypes_updatez,DiffusersQuantizer.get_special_dtypes_updateq   sP      !113
 
aBd&A&ABB +
 	
 
s   +A
max_memoryc                     |S )zaadjust max_memory argument for infer_auto_device_map() if extra memory is needed for quantizationr#   )r   r;   s     r   adjust_max_memoryz$DiffusersQuantizer.adjust_max_memory   s    r   r.   r   param_valueztorch.Tensor
param_name
state_dictc                      y)z
        checks if a loaded state_dict component is part of quantized param + some validation; only defined for
        quantization methods that require to create a new parameters for quantization.
        Fr#   )r   r.   r>   r?   r@   r   s         r   check_if_quantized_paramz+DiffusersQuantizer.check_if_quantized_param   s     r   c                      y)zV
        takes needed components from state_dict and creates quantized param.
        Nr#   r   argsr   s      r   create_quantized_paramz)DiffusersQuantizer.create_quantized_param   s     	r   c                      y)zC
        checks if the quantized param has expected shape.
        Tr#   rD   s      r   check_quantized_param_shapez.DiffusersQuantizer.check_quantized_param_shape   s     r   c                      y)a#  
        This method is used to potentially check for potential conflicts with arguments that are passed in
        `from_pretrained`. You need to define it for all future quantizers that are integrated with diffusers. If no
        explicit check are needed, simply return nothing.
        Nr#   rD   s      r   validate_environmentz'DiffusersQuantizer.validate_environment   s     	r   c                 l    d|_         | j                  j                  |_         | j                  |fi |S )a_  
        Setting model attributes and/or converting model before weights loading. At this point the model should be
        initialized on the meta device so you can freely manipulate the skeleton of the model in order to replace
        modules in-place. Make sure to override the abstract method `_process_model_before_weight_loading`.

        Args:
            model (`~diffusers.models.modeling_utils.ModelMixin`):
                The model to quantize
            kwargs (`dict`, *optional*):
                The keyword arguments that are passed along `_process_model_before_weight_loading`.
        T)is_quantizedr   r   quantization_method$_process_model_before_weight_loadingr   r.   r   s      r   preprocess_modelz#DiffusersQuantizer.preprocess_model   s8     "$($<$<$I$I!8t88I&IIr   c                 (     | j                   |fi |S )a  
        Post-process the model post weights loading. Make sure to override the abstract method
        `_process_model_after_weight_loading`.

        Args:
            model (`~diffusers.models.modeling_utils.ModelMixin`):
                The model to quantize
            kwargs (`dict`, *optional*):
                The keyword arguments that are passed along `_process_model_after_weight_loading`.
        )#_process_model_after_weight_loadingrO   s      r   postprocess_modelz$DiffusersQuantizer.postprocess_model   s     8t77HHHr   c                 ,    | j                  |      }|`|S )z
        Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance. Note
        not all quantization schemes support this.
        )_dequantizehf_quantizerr   r.   s     r   
dequantizezDiffusersQuantizer.dequantize   s!    
   ' r   c                      y)aN  
        The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up cuda.
        A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
        we allocate half the memory of the weights residing in the empty model, etc...
           r#   r   s    r   get_cuda_warm_up_factorz*DiffusersQuantizer.get_cuda_warm_up_factor   s     r   c                 F    t        | j                  j                   d      )NzH has no implementation of `dequantize`, please raise an issue on GitHub.)NotImplementedErrorr   r   rW   s     r   rU   zDiffusersQuantizer._dequantize   s'    !''4455}~
 	
r   c                      y r2   r#   rO   s      r   rN   z7DiffusersQuantizer._process_model_before_weight_loading   s    EHr   c                      y r2   r#   rO   s      r   rR   z6DiffusersQuantizer._process_model_after_weight_loading   s    DGr   c                      y r2   r#   r[   s    r   is_serializablez"DiffusersQuantizer.is_serializable   s    "r   c                      y r2   r#   r[   s    r   is_trainablezDiffusersQuantizer.is_trainable   s    r   c                      y)z;Flag indicating whether the quantized model can be compiledFr#   r[   s    r   is_compileablez!DiffusersQuantizer.is_compileable   s     r   )r   r    r!   r    )r!   ztorch.nn.Parameter)r.   r   )&__name__
__module____qualname____doc__r   required_packagesr   r   r%   r	   r   strr   r(   r*   r   r/   r:   r
   intr=   boolrB   rF   rH   rJ   rP   rS   rX   r\   rU   r   rN   rR   propertyrb   rd   rf   r#   r   r   r   r   "   s     !,C 	
HT#s(^,D 
RVWZ\_W_R`Ia 

tCy # RVWZR[ 
M 
dSVXeSeNf 
(DeCHo1E,F 4PSUZ[^`c[cUdPdKe  $ 	
 cN 
J I
	

 H HG G"  "    r   r   )rj   abcr   r   typingr   r   r   r   r	   r
   utilsr   r   r   models.modeling_utilsr   torchr   r#   r   r   <module>ru      s:   
 $ B B & 8 2S Sr   