
    bi\P                    "   d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z  	 	 	 	 	 	 	 	 ddZ!	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ" G d de      Z#y)    )annotationsN)contextmanager)partial)OptionalUnion)	LoraLayer)	LoraModel)	BaseTuner)DUMMY_TARGET_MODULES)set_peft_model_state_dict   )lora   )XLoraClassifier)XLoraConfig)XLoraConv2dLayerXLoraEmbeddingLayerXLoraLinearLayerc                   d}g }d}| j                         D ]  }t        |t        j                        r|j                  t        t        |j                                 j                  j                  }t        |||j                  ||      }|j                  |       |j                  |_
        |dz  }t        |t        j                        rx|j                  t        t        |j                                 j                  }t        |||j                  ||      }|j                  |       |j                  |_
        |dz  }1t        |t        j                        sM|j                  t        t        |j                                 j                  j                  }t!        |||j                  ||      }|j                  |       |j                  |_
        |dz  } ||fS )z/
    Returns the number of swapped layers.
    r   N)modeltargettarget_forwardlayer_numberconfigr   )modules
isinstancer   Linearlora_Anextiterweightdevicer   forwardappend	Embeddinglora_embedding_Ar   Conv2dr   )base
xloramodelr   total_swapped
all_layersr"   module	new_layers           R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/xlora/model.pyconvert_layers_to_xlorar/   $   s    MJF,,. %fdkk*]]4V]](;#<=DDKKF( %~~*I i(&..FNQM/,,T$v7N7N2O-PQXXF+ %~~*I i(&..FNQM,]]4V]](;#<=DDKKF( %~~*I i(&..FNQMK%N 6""    c                   ddl m} ddlm}	 ddlm}
 ddlm} |j                  |      \  }}| |
       }|| j                  vrH |	j                  |f||d|}d|_        || j                  |<   | j                  | j                  |        ||f||d	|}i }|j                         D ]n  }|}|j                  d
      r|j                  d      r;||j!                  d      dz   d }|j                  d
      s)|j                  d      r;d
|z   }||   ||<   p |j#                  dd      }t%        | |||      }t'        |j(                        dkD  rt+        d|j(                         t-        | d      r| j/                  ||       yy)z
    This method emulates the behavior of `PeftModel.from_pretrained`. Updates to `PeftModel.from_pretrained` may need
    to be reflected here.

    All params pertain to the adapter (adapter name, model id, `i` is the adapter number in 0 indexing).
    r   )	PeftModel)
LoraConfig)infer_device)load_peft_weightsN)ephemeral_gpu_offload	subfolderF)r"   r7   zmodel.zmodel.model..r   ignore_mismatched_sizes)adapter_namer9   zSGot unexpected keys! Please raise an issue and tag @EricLBuehler.

unexpected_keys=_cast_adapter_dtype)r:   autocast_adapter_dtype)peft.peft_modelr2   peft.tuners.lora.configr3   peft.utils.otherr4   peft.utils.save_and_loadr5   _split_kwargspeft_configfrom_pretrainedinference_modeinject_adapterr   keys
startswithfindgetr   lenunexpected_keys
ValueErrorhasattrr;   )
lora_modelr:   model_idtorch_devicer6   r<   r7   kwargsr2   r3   r4   r5   hf_hub_download_kwargslora_peft_configadapter_weightsnew_adapter_weightsold_keykeyr9   load_results                       r.   _load_adapter_into_lora_modelrY   Z   s     *2-:%.%<%<V%D"F#~:1115:55
"7
 %	
 +0'/?
|,!!*"2"2LA'uQZu^tuO"'') <>>(+CNN>4Rchhsma')*C >>(+CNN>4R n#27#;C < %jj)BEJ+! 7	K ;&&'!+cdodd  dA  B
 	
 z01&&LYo&p 2r0   c                       e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZd Zd Zed        Zd fdZ	e
d        Z	 ddZ	 dd	Z	 dd
Zd Ze
d        Zd ZddZddZddZddZd dZd!dZd Zd Zd Zd"dZ xZS )#
XLoraModela)  
    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently, this X-LoRA
    implementation only works with models with a transformer architecture.

    The method is described in detail in https://huggingface.co/papers/2402.07148.

    Args:
        model ([`torch.nn.Module`]): The model to be adapted.
        config ([`XLoraConfig`]): The configuration of the Lora model.
        adapter_name (`str`): The name of the adapter, does not affect the LoRA adapter names.

    Returns:
        `torch.nn.Module`: The X-LoRA model.

    Example:
        ```py
        >>> from transformers import AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
        >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training

        >>> model_config = AutoConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
        >>> config = XLoraConfig(
        ...     task_type="CAUSAL_LM",
        ...     hidden_size=model_config.hidden_size,
        ...     xlora_depth=4,
        ...     adapters={
        ...         "adapter_1": "./path/to/the/checkpoint/",
        ...         "adapter_2": "./path/to/the/checkpoint/",
        ...         "adapter_n": "./path/to/the/checkpoint/",
        ...     },
        ... )
        >>> int8_config = BitsAndBytesConfig(load_in_8bit=True)
        >>> model = AutoModelForCausalLM.from_pretrained(
        ...     "mistralai/Mistral-7B-Instruct-v0.1",
        ...     trust_remote_code=True,
        ...     attn_implementation="flash_attention_2",
        ...     device_map="cuda:0",
        ...     torch_dtype=torch.bfloat16,
        ...     quantization_config=int8_config,
        ... )
        >>> model = prepare_model_for_kbit_training(4)
        >>> xlora_model = get_peft_model(model, config)
        ```
    c                   t         j                  j                  |        t        |t              r||   }n|}t        j
                  |      }	t        |	_        d|	_        d|	_	        t        ||	|      }
|| _        |
| _        |}t        |j                  d      r!|j                  j                  rt!        d      |j"                  j%                         }t        | j                  d      r9t'        |j"                  j%                         | j                  j(                        }n|j"                  j%                         }t        | j                  d      r>t+        |      D ]/  \  }\  }}}t-        d| j                  t/        |      |||||d| 1 n<t+        |      D ].  \  }\  }}t-        d| j                  t/        |      ||||dd| 0 | j                  j1                  t3        |j"                  j5                                      | j7                          t9        || |      \  }}t;        |j"                        }t=        |||||      }|| _        d| _         d| _!        y)	a3  
        Create a new X-LoRA model

        Args:
            model (`nn.Module`):
                Base model to apply X-LoRA to.
            config: ([`XLoraConfig`]):
                X-LoRA configuration object.
            adapter_name: (`str`):
                Adapter name for the X-LoRA adapter.
            torch_device (`str`, *optional*, defaults to None):
                (For loading the LoRA adapters) The device to load the adapter on. If `None`, the device will be
                inferred.
            ephemeral_gpu_offload (`bool`, *optional*, defaults to `False`):
                (For loading the LoRA adapters) Whether to use ephemeral GPU offloading for partially loaded modules.
                Defaults to `False`.
            autocast_adapter_dtype (`bool`, *optional*, defaults to `True`):
                (For loading the LoRA adapters) Whether to autocast the adapter dtype. Defaults to `True`. Right now,
                this will only cast adapter weights using float16 and bfloat16 to float32, as this is typically
                required for stable training, and only affect select PEFT tuners.
            kwargs: (`optional`):
                (For loading the LoRA adapters) Additional arguments to modify the way the adapter is loaded, e.g. the
                token for Hugging Face Hub.
        Nnone	use_cachez`use_cache` must be False_subfolders)rN   r:   rO   rP   r6   r<   r7   F )"nnModule__init__r   dictcopyr   target_moduleslayer_replicationbiasr	   xlora_configrN   rM   r   r^   rL   adaptersitemszipr_   	enumeraterY   strset_adapterlistrF   _maybe_freeze_all_adaptersr/   rJ   r   internal_xlora_classifierinternal_xlora_scalingsdisabled)selfr   r   r:   rP   r6   r<   rQ   confbase_lora_configrN   rB   adapters_itemsi_adapter_namerO   r7   r*   r"   	n_classesxlora_classifiers                        r.   rc   zXLoraModel.__init__   sF   F 			4 fd#,'DD  99T?*>'-1* &u&6E
 $5<<-%,,2H2H899$--3354$$m4 !5!5!;!;!=t?P?P?\?\]N(11779N4$$m4;D^;T 
7,M8i- 	#!$Q%!-*?+A'	 	
 1:.0I 
,,M8- 	#!$Q%!-*?+A"	 	
 	##D)=)=)B)B)D$EF'') 7!
v ,,-	*5+y-Y_` *:&'+$r0   c                    | j                          | j                  j                  s%| j                         D ]  \  }}d|v sd|_         y y )Nlora_F)evalri   use_trainable_adaptersnamed_parametersrequires_grad)ru   nameparams      r.   rq   z%XLoraModel._maybe_freeze_all_adapters5  sJ    		  77#446 0ed?*/E'0 8r0   c                h    d|d<    | j                   j                  |i |}| j                          |S )NFr^   )rN   generaterq   )ru   argsrQ   ress       r.   r   zXLoraModel.generate<  s8    #{&doo&&77'')
r0   c              /      K   d d  fd} j                   s' j                  j                  j                  |d      }d   j                   s(D ]  }|j	                           j	                          y y w)Nc                    ||d<   ||fS )Nscalingsr`   )r   r   rQ   r   s       r.   scalings_injection_hookzFXLoraModel._enable_peft_forward_hooks.<locals>.scalings_injection_hookE  s    !)F:<r0   c                   |d   }|d   }|j                  |        j                  j                  |i |}g }j                         D ]D  } t	        | t
              st        |      }| j                  |d      }|j                  |       F t        j                         5  j                  j                          	 |j                         }	d|	d<   d|	d<   	  j                  j                  j                  |i |	}
|D ]  }|j!                           	 j                  j#                          	 d d d         j                  |d
i|}g }j                         D ]D  } t	        | t
              st        |      }| j                  |d      }|j                  |       F |y # |D ]  }|j!                           w xY w# j                  j#                          w xY w# 1 sw Y   xY w)	Nr   r   )r   Twith_kwargsoutput_hidden_statesreturn_dictresult)updaterr   make_dummy_scalingsr   r   r   r   register_forward_pre_hookr$   torchno_gradrN   disable_adapter_layersre   r   r#   removeenable_adapter_layers)r,   r   rQ   	args_realkwargs_realdummy_scalingshook_handlespre_forwardhandlescaling_pass_kwargsbase_outputxlora_scalingshandles_to_remover   ru   s               r.   r   z:XLoraModel._enable_peft_forward_hooks.<locals>.pre_forwardL  s   
 QIq'Kv&OT;;OOQZj^ijNL,,. 0fi0")*AN"[K#==kW[=\F ''/	0  <668<*5*:*:*<'BF'(>?9='6,&Cdoo&;&;&C&CY&fRe&f '3 ,F"MMO, OO99;<  <T;;QZj;j^ijN L,,. 0fi0")*AN"[K#==kW[=\F ''/	0 !-! '3 ,F"MMO, OO99;< <s<   &G)G
&F.G
G).GG

G&&G))G2Tr   )rt   rN   r   r   r   )ru   generate_argsgenerate_kwargsr   forward_handler   r   r   s   `     @@r.   _enable_peft_forward_hooksz%XLoraModel._enable_peft_forward_hooksC  sx     	 
 !-	-^ }}!__22LL[fjLkN 	}}+   !!#	 s   A9A>c                z    	 t         |   |      S # t        $ r |dk(  r t        | j                  |      cY S w xY w)z1Forward missing attributes to the wrapped module.rN   )super__getattr__AttributeErrorgetattrrN   )ru   r   	__class__s     r.   r   zXLoraModel.__getattr__  sC    	27&t,, 	2|#4??D11	2s    %::c                    | S Nr`   )rB   _model_configs     r.   _prepare_adapter_configz"XLoraModel._prepare_adapter_config  s
     r0   c                     y r   r`   ru   s    r.    _mark_only_adapters_as_trainablez+XLoraModel._mark_only_adapters_as_trainable  s    r0   c                    d| _         y NFrt   r   s    r.   r   z XLoraModel.enable_adapter_layers  s	    r0   c                    d| _         y )NTr   r   s    r.   r   z!XLoraModel.disable_adapter_layers  s	    r0   c                     y r   r`   )ru   lora_configr:   r   target_nameparentcurrent_keys          r.   _create_and_replacezXLoraModel._create_and_replace  s     	r0   c                     yr   r`   )r   rW   s     r.   _check_target_module_existsz&XLoraModel._check_target_module_exists  s     r0   c                :     | j                   j                  |i |S r   )rN   r   )ru   r   rQ   s      r.   r#   zXLoraModel.forward  s    $t$$d5f55r0   c                >    | j                   }||j                  _        y)z
        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense.
        This is reflected in the config.
        N)rr   r   
top_k_loraru   value
classifiers      r.   set_topk_lorazXLoraModel.set_topk_lora  s    
 '+&D&D
',
$r0   c                >    | j                   }||j                  _        y)z
        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This
        is reflected in the config.
        Nrr   r   global_scaling_weight)ru   r!   r   s      r.   set_global_scaling_weightz$XLoraModel.set_global_scaling_weight  s    
 '+&D&D
28
/r0   c                >    | j                   }|j                  |       y)z
        Set the scaling pass value, the value to set the scalings to during the scaling pass. If the value is None, the
        scaling pass value will be 1/n where n is the number of adapters.
        N)rr    _set_override_scaling_pass_valuer   s      r.   set_scaling_pass_valuez!XLoraModel.set_scaling_pass_value  s    
 '+&D&D
33E:r0   c                F    | j                   }|j                  j                  S )z-
        Get the global LoRA weight.
        r   ru   r   s     r.   get_global_scaling_weightz$XLoraModel.get_global_scaling_weight  s!     '+&D&D
  666r0   c                    | j                   S )z
        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape
        (batch_size, seq_len, n_layers, n_classes).
        )rs   r   s    r.   get_latest_scalingszXLoraModel.get_latest_scalings  s    
 +++r0   c                N    | j                   }|j                  j                         S )a9  
        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log.
        Editing the list does not change the underlying log. The tensors are of shape (batch_size, seq_len, n_layers,
        n_classes). The seq_len dim may vary with input dimension.
        )rr   log_scalingsre   r   s     r.   get_scalings_logzXLoraModel.get_scalings_log  s$     '+&D&D
&&++--r0   c                *    | j                   }d|_        y)z*
        Enable scalings logging.
        TNrr   scalings_loggingr   s     r.   enable_scalings_loggingz"XLoraModel.enable_scalings_logging  s     '+&D&D
&*
#r0   c                *    | j                   }d|_        y)zE
        Disable scalings logging, without clearing the log.
        FNr   r   s     r.   disable_scalings_loggingz#XLoraModel.disable_scalings_logging  s     '+&D&D
&+
#r0   c                P    | j                   }|j                  j                          y)z)
        Clear the scalings log.
        N)rr   r   clearr   s     r.   clear_scalings_logzXLoraModel.clear_scalings_log  s!     '+&D&D
%%'r0   c                :    | j                   }|j                         S )z
        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first) and the
        associated tensors. The positions are paired with the associated tensors and give the position in the scaling
        log.
        )rr   _get_bucketed_scalingsr   s     r.   get_bucketed_scalings_logz$XLoraModel.get_bucketed_scalings_log  s     '+&D&D
0022r0   )NFT)r   	nn.Moduler   z*Union[dict[str, XLoraConfig], XLoraConfig]r:   rn   rP   Optional[str]r6   boolr<   r   returnNone)r   rn   )r   r   )r   zOptional[int])r!   float)r   zfloat | None)r   r   )r   zOptional[torch.Tensor])r   zlist[torch.Tensor])r   z/dict[int, tuple[list[int], list[torch.Tensor]]])__name__
__module____qualname____doc__rc   rq   r   r   r   r   staticmethodr   r   r   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   __classcell__)r   s   @r.   r[   r[      s   *b '+&+'+jj ;j 	j
 $j  $j !%j 
jX0 A$ A$F2   <
  6-9;7,.+,(3r0   r[   )r(   r   r)   r   r   r   r   ztuple[int, torch.device | None])NFTN)rN   r	   r:   rn   rO   rn   rP   r   r6   r   r<   r   r7   r   )$
__future__r   re   
contextlibr   	functoolsr   typingr   r   r   torch.nnra   peft.tuners.lora.layerr   peft.tuners.lora.modelr	   peft.tuners.tuners_utilsr
   peft.utils.constantsr   r@   r    r   r   r   r   r   layerr   r   r   r/   rY   r[   r`   r0   r.   <module>r      s    #  %  "   , , . 5 >  '  J J3#
3#3# 3# %	3#t #'"'#'#?q?q?q ?q  	?q
  ?q !?q ?qDl3 l3r0   