
    bi                         d dl mZmZmZmZmZmZ ddlmZ erddl	m
Z
 ddlmZmZmZmZmZmZmZ  e       r e       rd dlZdd	lmZmZmZmZmZ  ej2                  e      Z G d
 de      Zy)    )TYPE_CHECKINGAnyDictListOptionalUnion   )DiffusersQuantizer   )
ModelMixin)get_module_from_nameis_accelerate_availableis_accelerate_versionis_gguf_availableis_gguf_versionis_torch_availableloggingN   )GGML_QUANT_SIZESGGUFParameter#_dequantize_gguf_and_restore_linear_quant_shape_from_byte_shape_replace_with_gguf_linearc                   N    e Zd ZdZ fdZd Zdeeee	ef   f   deeee	ef   f   fdZ
ddZddZd	 Zd
dded   dedeeef   def
dZ	 	 dd
dded   dedddeeeef      deee      fdZg fd
ddee   fdZddZed        Zedefd       Zedefd       Zd Z xZS ) GGUFQuantizerTc                     t        |   |fi | |j                  | _        |j                  | _        |j                  | _        t        | j                  t              s| j                  g| _        y y N)super__init__compute_dtypepre_quantizedmodules_to_not_convert
isinstancelist)selfquantization_configkwargs	__class__s      c/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/quantizers/gguf/gguf_quantizer.pyr   zGGUFQuantizer.__init__'   sf    ,770>>0>>&9&P&P#$55t<+/+F+F*GD' =    c                     t               rt        dd      rt        d      t               rt	        dd      rt        d      y )N<z0.26.0zoLoading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`z0.10.0zhTo load GGUF format files you must have `gguf` installed in your environment: `pip install gguf>=0.10.0`)r   r   ImportErrorr   r   )r%   argsr'   s      r)   validate_environmentz"GGUFQuantizer.validate_environment1   sL    &(,A#x,P B  !"oc8&Dz  'Er*   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r%   r0   keyvals       r)   adjust_max_memoryzGGUFQuantizer.adjust_max_memory<   s6    6@6F6F6HI(#sc3:oI
I Js   )c                 z    |t         j                  k7  rt        j                  d| d       t         j                  S )Nztarget_dtype z3 is replaced by `torch.uint8` for GGUF quantization)torchuint8loggerinfo)r%   target_dtypes     r)   adjust_target_dtypez!GGUFQuantizer.adjust_target_dtypeA   s.    5;;&KK-~5hij{{r*   c                 "    || j                   }|S r   )r    )r%   torch_dtypes     r)   update_torch_dtypez GGUFQuantizer.update_torch_dtypeF   s    ,,Kr*   c                     |j                   }|j                   }|j                  }t        |   \  }}t        |||      }	|	|k7  rt	        | d|	 d|       y)Nz% has an expected quantized shape of: z, but received shape: T)shape
quant_typer   r   
ValueError)
r%   
param_namecurrent_paramloaded_paramloaded_param_shapecurrent_param_shaperC   
block_size	type_sizeinferred_shapes
             r)   check_quantized_param_shapez)GGUFQuantizer.check_quantized_param_shapeK   sx    )//+11!,,
 0 <
I56H)U_`00,CNCSSij|i}~  r*   modelr   param_value)r   ztorch.TensorrE   
state_dictc                 &    t        |t              ryy)NTF)r#   r   )r%   rN   rO   rE   rP   r'   s         r)   check_if_quantized_paramz&GGUFQuantizer.check_if_quantized_paramZ   s     k=1r*   target_deviceztorch.deviceunexpected_keysc                 .   t        ||      \  }}	|	|j                  vr|	|j                  vrt        | d|	 d      |	|j                  v r|j	                  |      |j                  |	<   |	|j                  v r|j	                  |      |j                  |	<   y y )Nz- does not have a parameter or a buffer named .)r   _parameters_buffersrD   to)
r%   rN   rO   rE   rS   rP   rT   r'   moduletensor_names
             r)   create_quantized_paramz$GGUFQuantizer.create_quantized_paramg   s     35*Ef000[5Wx'TU`Taabcdd&,,,.9nn].KF{+&//)+6>>-+HFOOK( *r*   keep_in_fp32_modulesc                     |j                  dd       }| j                  j                  |       | j                  D cg c]  }||	 c}| _        t        || j                  || j                         y c c}w )NrP   )r"   )getr"   extendr   r    )r%   rN   
device_mapr]   r'   rP   rZ   s          r)   $_process_model_before_weight_loadingz2GGUFQuantizer._process_model_before_weight_loadingz   sk     ZZd3
##**+?@<@<W<W&n&[a[mv&n#!4%%z$JeJe	
 'os   A4A4c                     |S r    )r%   rN   r'   s      r)   #_process_model_after_weight_loadingz1GGUFQuantizer._process_model_after_weight_loading   s    r*   c                      yNFrd   r%   s    r)   is_serializablezGGUFQuantizer.is_serializable       r*   c                      yrg   rd   rh   s    r)   is_trainablezGGUFQuantizer.is_trainable   rj   r*   c                      y)NTrd   rh   s    r)   is_compileablezGGUFQuantizer.is_compileable   s    r*   c                 r   |j                   j                  dk(  }|rrt        j                  d       t	        t
        d      rt
        j                  j                         nt
        j                  j                         }|j                  |       t        || j                        }|r|j                  d       |S )NcpuzModel was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device.accelerator)devicetyper:   r;   hasattrr8   rq   current_acceleratorcudacurrent_devicerY   r   r"   )r%   rN   is_model_on_cpurr   s       r)   _dequantizezGGUFQuantizer._dequantize   s    ,,++u4KK f
 5-0 !!557ZZ..0 
 HHV3E4;V;VWHHUOr*   )r<   torch.dtyper1   rz   )r?   rz   r1   rz   )NN)rN   r   )__name__
__module____qualname__use_keep_in_fp32_modulesr   r/   r   strr   intr6   r=   r@   rM   r   boolrR   r   r   r\   rb   re   propertyri   rl   rn   ry   __classcell__)r(   s   @r)   r   r   $   s   #HDeCHo1E,F 4PSUZ[^`c[cUdPdKe 


 :; 	
 cN 
& 04/3II :;I 	I
 &I T#s(^,I "$s),I. +-	

 #3i	
    d     r*   r   )typingr   r   r   r   r   r   baser
   models.modeling_utilsr   utilsr   r   r   r   r   r   r   r8   r   r   r   r   r   
get_loggerr{   r:   r   rd   r*   r)   <module>r      sc    B B % 3   -/  
		H	%E& Er*   