
    bi                     v    d dl mZ d dlmZmZ ddlmZmZ ddlm	Z	m
Z
 ddlmZ  G d dej                        Zy)	    N)_freeze_adapter_get_submodules   )AdaptionPromptConfigprepare_config)AdaptedAttentionAdaptedAttentionGPT)is_adaption_prompt_trainablec                        e Zd ZdZdedef fdZdededdfdZdeddfd	Z	d
 Z
d Zdedeej                     ddfdZdeddfdZdeddfdZdej                  ddfdZdef fdZ xZS )AdaptionPromptModelaY  
    Implements adaption prompts as described in https://huggingface.co/papers/2303.16199.

    The top L attention modules are replaced with AdaptedAttention modules that wrap the original ones, but insert
    trainable prompts with gates (for zero init).

    Notes on the multi-adapter pattern:
    - We store the states of different adapters by keeping a dictionary of AdaptedAttention modules indexed by adapter
      name.
    - Every time we switch adapters, we remove the modules of the currently active adapter from the model, store them
      in the dictionary, and replace them with the modules of the new adapter.
    - To avoid duplicated and potentially inconsistent state, the currently active adapter is always removed from the
      dictionary.
    - Disabling the adapter would also result in the modules being removed from the model.
    configsadapter_namec                    t         |           || _        i | _        i | _        i | _        d | _        d| _        | j                  j                  | _        | j                  |||          | j                  | j                         y )NT)super__init__modelpeft_config_parents_cached_adapters_active_adapter_enabledforwardadd_adapter(_mark_only_adaption_prompts_as_trainable)selfr   r   r   	__class__s       \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/adaption_prompt/model.pyr   zAdaptionPromptModel.__init__*   su    
<> 5713#zz))w|'<=55djjA    configreturnNc                 H   t        || j                        }|| j                  v rt        d| d      g }| j                  j	                         D ]O  \  }}|j                  d|j                         s%t        | j                  |      \  }}}|j                  |       Q t        |      |j                  k  r%t        d|j                   dt        |       d      ||j                   d }|| j                  |<   | j                  '| j                  r| j                  | j                         || _        || j                  |<   | j                  ||       | j                  s| j                  | j                         |j                   rt#        | j                  |       yy)z.Add an adapter with the given name and config.Adapter with name 'z' already exists..z&Config specifies more adapter layers 'z' than the model has 'z'.N)r   r   r   
ValueErrornamed_modulesendswithtarget_modulesr   appendlenadapter_layersr   r   r   _remove_adapted_attentions_create_adapted_attentionsinference_moder   )r   r   r   parentsname_pars          r   r   zAdaptionPromptModel.add_adapter<   s   

34+++2<.@QRSSzz//1 	$GD!}}q!6!6 789+DJJ=	Qs#	$ w<&///89N9N8OOefijqfressuv  600023&-l# +++D,@,@A+)/&''8}}++D,@,@A  DJJ5 !r   c                     | j                   |k(  ry|| j                  vrt        d| d      | j                  r,| j	                  | j                          | j                  |       || _         y)z5Set the model to use the adapter with the given name.Nr"   z' does not exist.)r   r   r$   r   r+   _set_adapted_attentions)r   r   s     r   set_adapterzAdaptionPromptModel.set_adapter_   sh    </t///2<.@QRSS==++D,@,@A((6+r   c                 H    d| _         | j                  | j                         y)zEEnable adapter layers by swapping in cached AdaptedAttention modules.TN)r   r3   r   r   s    r   enable_adapter_layersz)AdaptionPromptModel.enable_adapter_layersl   s    $$T%9%9:r   c                 H    d| _         | j                  | j                         y)z@Disable adapter layers by swapping out AdaptedAttention modules.FN)r   r+   r   r6   s    r   disable_adapter_layersz*AdaptionPromptModel.disable_adapter_layersq   s    ''(<(<=r   r.   c           	         |D ]  }| j                   j                  j                  dk(  rKt        | j                   j                  j                  |j                  t        ||j                              }nJt        | j                   j                  j                  |j                  t        ||j                              }t        ||j                  |        y)zHWrap LlamaAttention modules with newly created AdaptedAttention modules.gpt2)
model_typeadapter_lenr   N)	r   r   r<   r	   r=   getattrr'   r   setattr)r   r   r.   r1   attns        r   r,   z.AdaptionPromptModel._create_adapted_attentionsv   s     	6Czz  ++v5*#zz00;; & 2 2!#v'<'<= (#zz00;; & 2 2!#v'<'<=
 C..5	6r   c                     | j                   |   }| j                   |= | j                  |   }t        | j                  |         D ]  \  }}t	        ||j
                  ||          ! y)zDReplace LlamaAttention modules with cached AdaptedAttention modules.N)r   r   	enumerater   r?   r'   )r   r   cachedr   ir1   s         r   r3   z+AdaptionPromptModel._set_adapted_attentions   sg    &&|4!!,/!!,/l ;< 	;FAsC..q	:	;r   c                     | j                   |   }g }| j                  |   D ]J  }t        ||j                        }|j	                  |       t        ||j                  |j                         L || j                  |<   y)zKRemove AdaptedAttention modules from the model and store them in the cache.N)r   r   r>   r'   r(   r?   r   r   )r   r   r   adapted_attentionsr1   r@   s         r   r+   z.AdaptionPromptModel._remove_adapted_attentions   sy    !!,/==. 	<C3 5 56D%%d+C..

;	< /Al+r   r   c                 Z    |j                         D ]  \  }}t        |      rd|_         y)z?Freeze all parameters of the model except the adaption prompts.FN)named_parametersr
   requires_grad)r   r   nps       r   r   z<AdaptionPromptModel._mark_only_adaption_prompts_as_trainable   s-    **, 	(DAq/2"'	(r   r/   c                 z    	 t         |   |      S # t        $ r |dk(  r t        | j                  |      cY S w xY w)z1Forward missing attributes to the wrapped module.r   )r   __getattr__AttributeErrorr>   r   )r   r/   r   s     r   rM   zAdaptionPromptModel.__getattr__   sD    	-7&t,, 	- w4::t,,	-s    %::)__name__
__module____qualname____doc__dictstrr   r   r   r4   r7   r9   listnnModuler,   r3   r+   r   rM   __classcell__)r   s   @r   r   r      s     Bt B3 B$!6 !65I !6d !6F, , ,;
>
61E 6PTUWU^U^P_ 6dh 6$;C ;D ;As At A(bii (D (	- 	- 	-r   r   )torch.nnrV   
peft.utilsr   r   r   r   r   layerr   r	   utilsr
   rW   r    r   r   <module>r^      s(      7 8 8 /P-")) P-r   