
    biz5                     4   d Z ddlZddlmZ ddlmZ ddlmZmZmZm	Z	 ddl
mZ  e       r
ddlZddlmZ  e       rddlZ e       rddlZdd	lmZ dd
lmZmZ  e	j,                  e      Z	 	 	 	 ddZddZdddZd Z	 	 	 	 ddZ	 	 ddZdeeef   fdZ y)z
Adapted from
https://github.com/huggingface/transformers/blob/c409cd81777fb27aadc043ed3d8339dbc020fb3b/src/transformers/integrations/bitsandbytes.py
    N)	signature)Union   )is_accelerate_availableis_bitsandbytes_availableis_torch_availablelogging   )QuantizationMethod)init_empty_weights)add_hook_to_moduleremove_hook_from_modulec           	         | j                         D ]9  \  }}|g }|j                  |       t        |t        j                        r||vrdj                  |      t        fd|D              st               5  |j                  }|j                  }|j                         dk(  rTt        j                  j                  |||j                  du|j                  |j                        | j                   |<   d}n|j"                  ||j"                  v rndt%        t'        t        j                  j(                        j*                        v rd|j,                  ini }	t        j                  j(                  |||j                  du|j.                  f|j0                  |j2                  d|	| j                   |<   d}t5        |      | j                   |   _        | j                   |   j9                  d	       ddd       t;        t%        |j=                                     d
kD  rt?        |||||      \  }
}|jA                  d       < | |fS # 1 sw Y   YxY w)z
    Private method that wraps the recursion for module replacement.

    Returns the converted model and a boolean that indicates if the conversion has been successful or not.
    N.c              3   :   K   | ]  }|d z   v xs |k(    ywr   N .0keycurrent_key_name_strs     b/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/utils.py	<genexpr>z+_replace_with_bnb_linear.<locals>.<genexpr>>   .      Y\s22T?S8ST   llm_int8)has_fp16_weights	thresholdTquant_storage)compress_statistics
quant_typeFr   )has_been_replaced)!named_childrenappend
isinstancennLinearjoinanyr   in_featuresout_featuresquantization_methodbnbLinear8bitLtbiasllm_int8_has_fp16_weightllm_int8_threshold_modulesllm_int8_skip_moduleslistr   
Linear4bit
parametersbnb_4bit_quant_storagebnb_4bit_compute_dtypebnb_4bit_use_double_quantbnb_4bit_quant_typetype
source_clsrequires_grad_lenchildren_replace_with_bnb_linearpop)modelmodules_to_not_convertcurrent_key_namequantization_configr"   namemoduler+   r,   extra_kwargs_r   s              @r   rA   rA   *   sU    ,,. ;!f#!%fbii(T9O-O#&88,<#=  `v  () &?"("4"4K#)#6#6L*>>@JN/2vv/B/B'("KKt3-@-Y-Y&9&L&L 0C 0t, -1) 0EEQ $(;(Q(Q Q  $3d9SVVEVEV;W;b;b6c#c "12E2\2\ ]%' )
 47663D3D + , &4 7 3 J J	4
 5H4a4a+>+R+R4 #/4ENN40 15-6:6lENN4(3NN4(77>M&?N tFOO%&'!+#;& #"3$ A  	R w;!x ###c&? &?s   >E=II	c                     t        | |||      \  } }t        d | j                         D              }|st        j	                  d       | S )a  
    Helper function to replace the `nn.Linear` layers within `model` with either `bnb.nn.Linear8bit` or
    `bnb.nn.Linear4bit` using the `bitsandbytes` library.

    References:
        * `bnb.nn.Linear8bit`: [LLM.int8(): 8-bit Matrix Multiplication for Transformers at
          Scale](https://huggingface.co/papers/2208.07339)
        * `bnb.nn.Linear4bit`: [QLoRA: Efficient Finetuning of Quantized
          LLMs](https://huggingface.co/papers/2305.14314)

    Parameters:
        model (`torch.nn.Module`):
            Input model or `torch.nn.Module` as the function is run recursively.
        modules_to_not_convert (`List[`str`]`, *optional*, defaults to `[]`):
            Names of the modules to not convert in `Linear8bitLt`. In practice we keep the `modules_to_not_convert` in
            full precision for numerical stability reasons.
        current_key_name (`List[`str`]`, *optional*):
            An array to track the current key of the recursion. This is used to check whether the current key (part of
            it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or
            `disk`).
        quantization_config ('transformers.utils.quantization_config.BitsAndBytesConfig'):
            To configure and manage settings related to quantization, a technique used to compress neural network
            models by reducing the precision of the weights and activations, thus making models more efficient in terms
            of both storage and computation.
    c              3      K   | ]E  \  }}t        |t        j                  j                  t        j                  j                  f       G y wN)r&   r.   r'   r6   r/   r   rJ   replaced_modules      r   r   z*replace_with_bnb_linear.<locals>.<genexpr>   s<      A 	?SVV%6%68K8K$LMs   AAzYou are loading your model in 8bit or 4bit but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)rA   r*   named_modulesloggerwarning)rC   rD   rE   rF   rJ   r"   s         r   replace_with_bnb_linearrS   u   sY    4 (/EGWYlmHE1 "'"5"5"7  	
 L    c                    t        | t        j                  j                        st	        dt        |        d      | j                  j                  }|dvr| S |dk(  rt        j                  j                  | j                  | j                        }d|j                   d}|r#d|j                   d| }|j                  |      }t        j!                  |       |S |j"                  | j"                  |_        t%        t        j                  d	      r5t        j                  j'                  | j                  |j"                        }n,| j                  |j"                  j)                  d
d      z  dz  }|r|j                  |      }|S )z
    Helper function to dequantize 4bit or 8bit bnb weights.

    If the weight is not a bnb quantized weight, it will be returned as is.
    z1Input weight should be of type nn.Parameter, got z instead)
Params4bit
Int8ParamsrV   z(The model is going to be dequantized in z - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`z.The model is going to be first dequantized in z and type-casted to int8_vectorwise_dequantr#      g   @ ?)r&   torchr'   	Parameter	TypeErrorr<   	__class____name__r.   
functionaldequantize_4bitdataquant_statedtypetorQ   warning_onceSCBhasattrrX   view)weightstaterc   cls_nameoutput_tensormsgdequantizeds          r   dequantize_bnb_weightro      sX    fehh001KDQWL>Yabcc((H33<66v{{FDVDVW89L9L8M  Nx  yB=CVCVBWWklqkrsC),,U3MC yyJJ	s~~89nn<<V[[%))T kkEIINN2q$99<PP!nnU+rT   c                 $   t        t        j                  | j                  j                        }| j
                  }i }t        j                  |j                        }|j                         D ]  }||j                  v s||   ||<     |di |}|S )a  
    Creates a new hook based on the old hook. Use it only if you know what you are doing ! This method is a copy of:
    https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245 with
    some changes
    r   )getattr
acceleratehooksr]   r^   __dict__inspectr   __init__keysr7   )old_hookold_hook_clsold_hook_attrfiltered_old_hook_attrold_hook_init_signatureknew_hooks          r   _create_accelerate_new_hookr      s     :++X-?-?-H-HIL%%M%//0E0EF! 9'222(5a(8"1%9 545HOrT   c           	      Z   |j                         }|dk(  rt        j                  j                  nt        j                  j                  }| j                         D ]  \  }}	|g }|j                  |       t        |	|      rK||vrFdj                  |      t        fd|D              s t        |	dd      }
|	j                  j                  }t               5  t        j                  j                  |	j                   |	j"                  |
du      }ddd       |dk(  r|	j$                  }nd}t        j                  j'                  t)        |	j                  ||            _        |
|
|_        t-        |	d      r.|	j.                  }t1        |      }t3        |	       t5        ||       |j7                  |       || j8                  |<   d}t;        t=        |	j?                                     d	kD  rtA        |	|||||
      \  }}|jC                  d        | |fS # 1 sw Y   xY w)ap  
    Converts a quantized model into its dequantized original version. The newly converted model will have some
    performance drop compared to the original model before quantization - use it only for specific usecases such as
    QLoRA adapters merging.

    Returns the converted model and a boolean that indicates if the conversion has been successful or not.
    r   Nr   c              3   :   K   | ]  }|d z   v xs |k(    ywr   r   r   s     r   r   z*_dequantize_and_replace.<locals>.<genexpr>   r   r   r0   )r0   _hf_hookTr   )rc   rD   rE   rF   r"   r#   )"r-   r.   r'   r/   r6   r$   r%   r&   r)   r*   rq   ri   devicer   rZ   r(   r+   r,   rj   r[   ro   r0   rg   r   r   r   r   rd   r3   r?   r5   r@   _dequantize_and_replacerB   )rC   rc   rD   rE   rF   r"   quant_method
target_clsrG   rH   r0   r   
new_modulerj   rx   r~   rJ   r   s                    @r   r   r      s    '::<L(4
(B$$HYHYJ,,. 1!f#!%fj)d:P.P#&88,<#=  `v  vvt4--') q!&1C1CVEXEX_cko_o!pJq  :-"LLE E$)HH$6$67LV]]\ach7i$j
!#&*JO 6:.%H:8DH+F3&z8<f%'1t$$(!tFOO%&'!+#:'=!1$7"3$ A  	R c1!d ###Gq qs   9H  H*	c                     t        | | j                  ||      \  } }t        d | j                         D              }|st        j                  d       | S )N)rc   rD   rF   c              3   j   K   | ]+  \  }}t        |t        j                  j                         - y wrM   )r&   rZ   r'   r(   rN   s      r   r   z)dequantize_and_replace.<locals>.<genexpr>)  s(      9KO
?EHHOO4s   13zkSome linear modules were not dequantized. This could lead to unexpected behaviour. Please check your model.)r   rc   r*   rP   rQ   rR   )rC   rD   rF   rJ   r"   s        r   dequantize_and_replacer     s`    
 'kk5/	HE1  OTObObOd  y	
 LrT   returnc                     t        | d      xr, | j                  xr t        | dd       t        j                  k(  }t        | d      xr, | j
                  xr t        | dd       t        j                  k(  }|xs |||fS )Nis_loaded_in_4bitr-   is_loaded_in_8bit)rg   r   rq   r   BITS_AND_BYTESr   )rH   is_loaded_in_4bit_bnbis_loaded_in_8bit_bnbs      r   _check_bnb_statusr   4  s    +, 	^$$	^F148<N<]<]]  	+, 	^$$	^F148<N<]<]] 
 !9$9;PRgggrT   )NNNF)NNN)NN)ri   ztorch.nn.Parameterrc   ztorch.dtype)!__doc__ru   r   typingr   utilsr   r   r   r	   rF   r   rZ   torch.nnr'   bitsandbytesr.   rr   r   accelerate.hooksr   r   
get_loggerr^   rQ   rA   rS   ro   r   r   r   boolr   r   rT   r   <module>r      s   
    d d 4 -L			H	%
  H$V'V"J(  E$T  ,htTz!2 hrT   