
    big                    >   d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	Z	d dl
mZ d dlmZmZ d dlmZmZmZmZ dd	lmZmZ dd
lmZ ddlmZmZ ej8                  g diZddZddZddZ ddZ!ddZ"dddd	 	 	 	 	 	 	 	 	 ddZ#	 d	 	 	 	 	 	 	 	 	 ddZ$d dZ%ddZ&y)!    )annotationsN)
attrgetter)LiteralOptional)
PeftConfig)PEFT_TYPE_TO_CONFIG_MAPPINGPEFT_TYPE_TO_PREFIX_MAPPING)Conv2dLinear
LoraConfig	LoraLayer   )get_pattern_keyinfer_device)PeftType)$_insert_adapter_name_into_state_dictload_peft_weights)
use_rsloralora_dropoutalpha_patternuse_dorac                d   | j                   |   |k(  ryt        | j                   |   t        j                        r| j                   |   j	                  |       yt        | j                   |   t
        t        f      r|| j                   |<   yt        dt        | j                   |          d      )z
    Update the value of the scalings of the LoRA module.

    Takes into consideration that scalings can be tensors from prepare_model_for_compiled_hotswap.
    NzSomething went wrong when trying to set the new scale value, expected to find the old value to be of type float or torch.Tensor, got 	 instead.)	scaling
isinstancetorchTensorfill_floatint
ValueErrortype)lora_moduleadapter_namer   s      M/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/utils/hotswap.py_update_scalingr&   $   s     <(G3+%%l3U\\BL)//8	K''5s|	D,3L)**.{/B/B</P*Q)RR[]
 	
    c                   d}| j                         D ]  }t        |t              sd}|j                  }|j	                         D ]v  \  }}t        |t
              r/t        j                  ||j                  j                        ||<   Et        |t        j                        r`t        dt        |       d        |S )z
    Convert the LoRA scaling values into torch.tensors to prevent recompilation if they change.

    Returns:
        bool:
            Returns `True` if an appropriate adapter was found, else `False`.
    FT)devicezkSomething went wrong while trying to convert the scalings, expected to find values of type float but found r   )modulesr   r   r   itemsr   r   tensorweightr)   r   r!   r"   )modelfound_adaptermoduler   keyvals         r%   _convert_scalings_to_tensorr3   8   s     M--/ &),.. 	HC#u%$||C8L8LMU\\2 !!%c96 	 r'   c                n   | j                   }|r|j                  d      n|j                  d      }||k(  r| S ||kD  rt        d| d| d      |j                  \  }}|rgt	        j
                  |||j                  |j                        }||d|ddf<   t        j                  j                  ||| j                  du      }nft	        j
                  |||j                  |j                        }||ddd|f<   t        j                  j                  ||| j                  du      }|j                   j                  |j                  k7  r0t        d	|j                   d
|j                   j                   d      | j                  g|j                  j                  | j                  j                  k7  r:t        d| j                  j                   d
|j                  j                   d      ||j                   _        | j                  %| j                  j                  |j                  _        |S )a  
    Get a new Linear layer for LoRA with padded weights according to the target rank.

    Args:
        lora_module (nn.Module):
            The LoRA sub-module (e.g. module.lora_A[adapter_name]).
        target_rank (int):
            The desired rank to pad to.
        is_lora_A (bool):
            True if this is the LoRA A matrix, False if LoRA B.

    Returns:
        nn.Linear:
            A newly created and padded Linear layer. If the rank already fit, the original layer is returned.
    r   r   -Trying to pad the adapter to the target rank #, but the original rank is larger (). This is not possible.r)   dtypeN)biaszYSomething went wrong when trying to pad the LoRA Linear weights, the new shape should be  but l was found. Please open an issue on PEFT (https://github.com/huggingface/peft/issues) and report this error.zVSomething went wrong when trying to pad the LoRA Linear bias, the new shape should be )r-   sizer!   shaper   zerosr)   r9   nnr   r:   data)	r#   target_rank	is_lora_Ar-   original_rankout_featuresin_featurespadded	new_layers	            r%   _get_padded_linearrI   S   s#     F '0FKKNV[[^M #{";K= I68
 	

 !'L+ [+fmmSYS_S_`$*~~q !HHOOK;CSCS[_C_O`	 \;v}}TZT`T`a$*q.=. !HHOOKKDTDT\`D`Oa	 -g||nE)"2"2"8"8!9 :RR
 	

 	$9>>+?+?;CSCSCYCY+Yd%%&eINN,@,@+A BRR
 	
 #I#)..33	r'   c           	        | j                   }|j                  \  }}}}|r|n|}||k(  r| S ||kD  rt        d| d| d      |rt        j                  |||||j
                  |j                        }	||	d|ddddddf<   t        j                  j                  ||| j                  | j                  | j                  | j                  du| j                        }
nt        j                  |||||j
                  |j                        }	||	ddd|ddddf<   t        j                  j                  ||| j                  | j                  | j                  | j                  du| j                        }
|
j                   j                  |	j                  k7  r0t        d|	j                   d|
j                   j                   d	      | j                  g|
j                  j                  | j                  j                  k7  r:t        d
| j                  j                   d|
j                  j                   d	      |	|
j                   _        | j                  %| j                  j                  |
j                  _        |
S )a  
    Get a new Conv2d layer for LoRA with padded weights according to the target rank.

    Args:
        lora_module (nn.Module):
            The LoRA sub-module (e.g. module.lora_A[adapter_name]).
        target_rank (int):
            The desired rank to pad to.
        is_lora_A (bool):
            True if this is the LoRA A matrix, False if LoRA B.

    Returns:
        nn.Conv2d:
            A newly created and padded Conv2d layer. If the rank already fit, the original layer is returned.
    r5   r6   r7   r8   N)kernel_sizestridepaddingr:   groupszSSomething went wrong when trying to pad the LoRA  weights, the new shape should be r;   r<   zVSomething went wrong when trying to pad the LoRA Conv2d bias, the new shape should be )r-   r>   r!   r   r?   r)   r9   r@   r
   rK   rL   rM   r:   rN   rA   )r#   rB   rC   r-   out_channelsin_channelskhkwrD   rG   rH   s              r%   _get_padded_conv2drS      sw     F(.%L+r2$-L;M#{";K= I68
 	
 [+r2fmm[a[g[gh)/}}aA%&HHOO#//%%''!!-%% $ 
	 \;Bv}}\b\h\hi(.q,;,1$%HHOO#//%%''!!-%% $ 
	 -a||nE)"2"2"8"8!9 :RR
 	

 	$9>>+?+?;CSCSCYCY+Yd%%&eINN,@,@+A BRR
 	
 #I#)..33	r'   c                   d}| j                         D ]  }t        |t              rt        }nt        |t              rt
        }n2|j                  j                         D ]  \  }} |||d      }||j                  |<   ! |j                  j                         D ]  \  }} |||d      }||j                  |<   ! d} |S )a\  
    Pad LoRA weights in a model to a target rank while preserving the original behavior.

    Args:
      model (nn.Module): The model containing LoRA modules (with lora_A and lora_B).
      target_rank (int): The target rank to pad to.

    Returns:
        bool:
            Returns `True` if an appropriate adapter was found, else `False`.
    FT)rB   rC   )	r*   r   r   rI   r
   rS   lora_Ar+   lora_B)	r.   rB   r/   r0   pad_fnr$   lora_A_modulerH   lora_B_modules	            r%   _pad_lora_weightsrZ      s     M--/ ff%'F''F  ,2==+>+>+@ 	4'L-}+QUVI*3FMM,'	4
 ,2==+>+>+@ 	4'L-}+QVWI*3FMM,'	4 )* r'   error)rB   configcheck_compiledc                  t        | d      xs t        | dd      }|r?|dk(  rt        d      |dk(  rt        j                  d       n|dk7  rt        d	| d
      t        |       }|t        | |      }nd}|s|st        d      |sy|yt        |t              sd|i}|j                         D ]6  }||_
        |j                  s|j                  D ]  }||j                  |<    8 y)a	  
    Helper function that prepares the model so that it can later be compiled and then used with hot-swapping.

    It is necessary to call this function on the model for hot-swapping to work if both of these are true:

    - the different LoRA adapters have different ranks and/or different alpha values (i.e. scalings)
    - you plan to torch.compile the model and want to avoid re-compilation

    It is important to call this function *after* the first LoRA adapter has been loaded (i.e. the one that will be
    swapped out) but *before* the model is compiled.

    Even with this function, hot-swapping LoRA adapters that target different layers is still not supported.

    Note: This function modifies the model in-place. If you want to restore the model to its initial state, you will
    have to reload it.

    Args:
        model (`nn.Module`):
            The model with the loaded adapter, before compilation.
        target_rank (`int`, *optional*):
            The target rank to pad the LoRA weights to. Should be the maximum rank among all LoRA adapters that will be
            hot-swapped. If not specified, the target ranks will not be changed.
        config (`LoraConfig` or `dict[str, LoraConfig]`, *optional*):
            Optionally pass the `LoraConfig`s of the LoRA adapters. If passed, the rank in the configs will be updated
            to `target_rank`.
        check_compiled (`str`, *optional*, defaults to `"error"`):
            How to handle the case when the model is already compiled, which should generally be avoided. The options
            are:
              - "error" (default): raise an error
              - "warn": issue a warning
              - "ignore": do nothing

    Raises:
        ValueError
            If the model is already compiled or if no adpater layer was found, raise an error.

    Example:

        ```py
        base_model = ...
        model = PeftModel.from_pretrained(base_model, path_adapter_0)
        # Prepare the model to allow hotswapping even if ranks/scalings of 2nd adapter differ.
        # You can skip this step if all ranks and scalings are identical.
        prepare_model_for_compiled_hotswap(model, target_rank=highest_lora_rank)
        model = torch.compile(model)
        # do inference with adapter 0
        # replace the "default" lora adapter with the new one
        hotswap_adapter(model, path_adapter_1, adapter_name="default", torch_device=device)
        # do inference with adapter 1
        ```

    	_orig_mod_compiled_call_implFr[   zDCall prepare_model_for_compiled_hotswap *before* compiling the modelwarnzprepare_model_for_compiled_hotswap was called with a model that is already compiled. This will likely result in re-compilation, hurting performance. Call the function before compiling the model.ignorezCcheck_compiles should be one of 'error', 'warn', or 'ignore', got 'z
' instead.N)rB   zNo adapter layers found on the model, make sure call `prepare_model_for_compiled_hotswap` after loading the first adapter and before loading the second adapter.dummy)hasattrgetattrr!   warningsra   r3   rZ   r   dictvaluesrrank_pattern)	r.   rB   r\   r]   is_compiledconversion_found_adapterpadding_found_adapterlora_configr1   s	            r%   "prepare_model_for_compiled_hotswapro     s&   v %-]@UW\1]KW$cddv%MMo x'UVdUeeop   ;5A 1%[ Q %$(=G
 	

 fd#6"}} <###"// <0;((-<<r'   c           
        t        | d      }t        t        | dd            }| j                         D ch c]  }||v s||v s| }}g }	|j	                         D ]?  \  }
}	  t        |
      |       }|r|j                  d|
z          /|j                  |
       A |	r ddj                  |	       d}t        |      |D ]s  }
|
j                  d      }
dj                  |
j                  d      dd       }| j                  |      } t        |
      |       }|j                  j                  d	       u |j	                         D ]  \  }
}dj                  |
j                  d      dd       }| j                  |      }t!        |j"                  j%                         |
      }t!        |j&                  j%                         |
      }|j"                  j)                  ||j*                        }|j&                  j)                  ||j,                        }|j.                  r|t1        j2                  |      z  }n||z  }t5        |||
        t        |
      |       }|j7                  |j                  j8                        }|s%|s#	 t:        j<                  j?                  ||       d|j@                  |j@                  k(  r'|j                  jC                  |j                         |jE                         dvrtG        d|jE                          d      |j@                  d   |j@                  d   kD  rR|j                  j                  d       |j                  d|j@                  d    jC                  |j                         D|j@                  d   |j@                  d   kD  rX|j                  j                  d       |j                  ddd|j@                  d   f   jC                  |j                         tI        d|
 d|j@                   d|j@                   d       yc c}w # t        $ r |	j                  |
       Y w xY w# t        $ r d}Y w xY w)a  
    Swap out the adapter weights from the model with the weights from state_dict.

    As of now, only LoRA is supported.

    This is a low-level function that assumes that the adapters have been checked for compatibility and that the
    state_dict has been correctly mapped to work with PEFT. For a high level function that performs this work for you,
    use `hotswap_adapter` instead.

    Args:
        model (`nn.Module`):
            The model with the loaded adapter.
        state_dict (`dict[str, torch.Tensor]`):
            The state dict of the new adapter, which needs to be compatible (targeting same modules etc.).
        adapter_name (`str`):
            The name of the adapter that should be hot-swapped, e.g. `"default"`. The name will remain the same after
            swapping.
        config (`LoraConfig`):
            The config of the LoRA adapter. This is used to determine the scaling and rank of the adapter.
        parameter_prefix (`str`, *optional*, defaults to `"lora_"`)
            The prefix used to identify the adapter's keys in the state dict. For LoRA, this would be `"lora_"` (the
            default).

    Raises:
        RuntimeError
            If the old and the new adapter are not compatible, a RuntimeError is raised.

    r_   r`   Nz
_orig_mod.zAHot swapping the adapter did not succeed, unexpected keys found: , .g        )r$   r   T)      z.Trying to hotswap an adapter whose weight has z5 dimensions, but only Conv2d and Linear are supportedr   r   z+Incompatible shapes found for LoRA weights z: z vs z. Please ensure that all ranks are padded to the largest rank among all LoRA adapters by using peft.utils.hotswap.prepare_model_for_compiled_hotswap.)%rd   boolre   
state_dictr+   r   AttributeErrorappendremovejoinRuntimeErrorremoveprefixsplitget_submodulerA   r   r   rj   keysr   getri   
lora_alphar   mathsqrtr&   tor)   r   utilsswap_tensorsr>   copy_dimNotImplementedErrorr!   )r.   rw   r$   r\   parameter_prefixrk   is_compiled_inplacekmissing_keysunexpected_keysr1   new_valold_valmsgmodule_namer0   r_key	alpha_keyrankalphar   s                        r%   hotswap_adapter_from_state_dictr   q  s   N %-Kwu.CTJK$//1e!6F!6KR^bcRcAeLeO #((* 
%W	%joe,G
 s 23$
% QRVR[R[\kRlQmmno3
   |,hhsyy~cr23$$[1!*S/%(3  #((* :Whhsyy~cr23$$[1   3 3 8 8 :C@#F$8$8$=$=$?E	""&&ufhh7$$((F4E4EFdiio-GdlG\7K "*S/%(**W\\001
 #6#((': ==GMM) LLw||, {{}F*)DW[[]O T+ +  }}Q'--"22""1%/w}}Q/066w||Dq!GMM!$44""1%Q 2'--"2 22399',,G A#bW[\c\i\i[j kM M m:G f  	""3'	p   #"#s4   	P.P.P.#P3
 Q3QQQ#"Q#c           	     d   | j                   |j                   k7  r;d| j                   j                   d|j                   j                   }t        |      | j                   t        vrIddj	                  t        j                                d| j                   j                   d}t        |      t        | j                      }| j                         } |j                         }t               }|D ]?  }| j                  ||      }|j                  ||      }||k7  s-t        d| d| d|        y	)
at  
    Check if two configs are compatible for hot-swapping.

    Only LoRA parameters are checked for now.

    To hot-swap two adapters, their configs must be compatible. Otherwise, the results could be false. E.g. if they use
    different alpha values, after hot-swapping, the alphas from the first adapter would still be used with the weights
    from the 2nd adapter, which would result in incorrect behavior. There is probably a way to swap these values as
    well, but that's not implemented yet, and we need to be careful not to trigger re-compilation if the model is
    compiled (so no modification of the dict).

    zIncompatible PEFT types found: z and zHotswapping only supports rq   r;   z was passed.zConfigs are incompatible: for z != N)		peft_typevaluer!   CONFIG_KEYS_TO_CHECKr{   r   to_dictobjectr   )config0config1r   config_keys_to_checksentinelr1   val0val1s           r%    check_hotswap_configs_compatibler     s.    G---/0A0A0G0G/HgN_N_NeNeMfgo 44(3G3L3L3N)O(PPU  &&'|5 	 o/0A0AB ooGooGxH# W{{3){{3)4<=cU"TF$tfUVV	Wr'   c                   |
t               }t        t        j                  ||j	                  dd      |j	                  dd      |j	                  dd      |j	                  dd      |j	                  dd               } |j
                  |fi |}t        | j                  |       t        |fd|i|}t        |j                     }t        |||	      }	t        | |	|||
       y)a3  Substitute old adapter data with new adapter data, keeping the rest the same.

    As of now, only LoRA is supported.

    This function is useful when you want to replace the loaded adapter with a new adapter. The adapter name will
    remain the same, but the weights and other parameters will be swapped out.

    If the adapters are incomptabile, e.g. targeting different layers or having different alpha values, an error will
    be raised.

    Example:

    ```py
    >>> import torch
    >>> from transformers import AutoModelForCausalLM
    >>> from peft import PeftModel
    >>> from peft.utils.hotswap import hotswap_adapter

    >>> model_id = ...
    >>> inputs = ...
    >>> device = ...
    >>> model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

    >>> # load lora 0
    >>> model = PeftModel.from_pretrained(model, "path-adapter-0")
    >>> model = torch.compile(model)  # optionally compile the model
    >>> with torch.inference_mode():
    ...     output_adapter_0 = model(inputs)

    >>> # replace the "default" lora adapter with the new one
    >>> hotswap_adapter(model, "path-adapter-1", adapter_name="default", torch_device=device)
    >>> with torch.inference_mode():
    ...     output_adapter_1 = model(inputs).logits
    ```

    Args:
        model ([`~PeftModel`]):
            The PEFT model with the loaded adapter.
        model_name_or_path (`str`):
            The name or path of the model to load the new adapter from.
        adapter_name (`str`):
            The name of the adapter to swap, e.g. `"default"`. The name will stay the same after swapping.
        torch_device: (`str`, *optional*, defaults to None):
            The device to load the new adapter onto.
        **kwargs (`optional`):
            Additional keyword arguments used for loading the config and weights.

    N	subfolderrevision	cache_diruse_auth_tokentoken)r   r   r   r   r   r)   )r$   r   )r.   rw   r$   r   r\   )r   r   r   _get_peft_typer   from_pretrainedr   active_peft_configr   r	   r   r   r   )
r.   model_name_or_pathr$   torch_devicekwargs
config_clsr\   rw   r   peft_model_state_dicts
             r%   hotswap_adapterr   !  s    b #~ -!!jjd3ZZ
D1jjd3!::&6=**Wd+	
	J (Z''(:EfEF$U%=%=vF"#5UlUfUJ 363C3CD@@P $(!)r'   )N)returnrv   )r#   torch.nn.ModulerB   r    rC   rv   r   ztorch.nn.Linear)r#   r   rB   r    rC   rv   r   ztorch.nn.Conv2d)r.   r   rB   r    r   rv   )
r.   r   rB   zOptional[int]r\   z,Optional[LoraConfig | dict[str, LoraConfig]]r]   z"Literal['error', 'warn', 'ignore']r   None)lora_)
r.   r   rw   zdict[str, torch.Tensor]r$   strr\   r   r   r   )r   r   r   r   r   r   )'
__future__r   r   rf   operatorr   typingr   r   r   peft.configr   peft.mappingr   r	   peft.tuners.lorar
   r   r   r   otherr   r   
peft_typesr   save_and_loadr   r   LORAr   r&   r3   rI   rS   rZ   ro   r   r   r    r'   r%   <module>r      s    #    $  " Q B B 0   R !'bc 
(6@FM`#R "&;?9@b<b< b< 9	b<
 7b< 
b<T $GG'G G 	G
 GT#WLWr'   