
    bi[{                   `   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ d dlmc mZ d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZ ddlm Z   G d d      Z! G d de      Z" G d dejF                  e"      Z$ G d dejF                  e"      Z% G d dejF                  e"      Z& G d de&      Z' G d de&      Z( G d de&      Z) G d dejF                  e"      Z* G d d ejF                        Z+d! Z, G d" d#ejF                  e"      Z-	 d%	 	 	 	 	 	 	 	 	 d&d$Z.y)'    )annotationsN)contextmanager)AnyOptionalUnion)svd_lowrank)Conv1D)
BufferDict)BaseTunerLayercheck_adapters_to_merge)dequantize_module_weightgather_params_ctxget_bnb_param_typeskip_init_on_device)	transpose   )
LoraConfigc                  j    e Zd ZdZedd       Zed	d       Zed
d       Zed	d       Zedd       Z	y)LoraVarianta{  
    Base class for LoRA variants, e.g. DoRA.

    This class should be subclassed and the methods below should be implemented accordingly. The methods should be
    implemented as static methods, this makes it easier to combine variants.

    Note for developers: These methods are prone to change and should thus considered to be "private". Use at your own
    discretion.
    c                    t         )zKInitialization code for the LoRA variant, it's called within `update_layer`NotImplementedError)moduleadapter_names     Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/lora/layer.pyinitzLoraVariant.init3   
     "!    c                    t         )zZSafe merging of the weights from `merge(..., safe_merge=True)`, should return a new tensorr   r   active_adapterorig_weights      r   
merge_safezLoraVariant.merge_safe8   r   r   c                     y)zdUnsafe merging of the weights from `merge(..., safe_merge=False)`, should modify the weight in-placeN r    s      r   merge_unsafezLoraVariant.merge_unsafe=       r   c                     y)zFRemove the adapter weights from the original weights, then return themNr%   r    s      r   unmergezLoraVariant.unmergeA   r'   r   c                    t         )a  
        The forward pass of the LoRA variant, should return the overall result (not just the diff)

        Args:
            module (LoraLayer): The module on which the forward pass is called
            active_adapter (str): The name of the active adapter
            x (torch.Tensor): The input to the forward call
            result (torch.Tensor): The result from the base model
        r   )r   r!   xresults       r   forwardzLoraVariant.forwardE   s
     "!r   N)r   	LoraLayerr   strreturnNone)r   r.   r!   r/   r"   torch.Tensorr0   r2   )r   r.   r!   r/   r"   r2   r0   r1   )
r   r.   r!   r/   r+   r2   r,   r2   r0   r2   )
__name__
__module____qualname____doc__staticmethodr   r#   r&   r)   r-   r%   r   r   r   r   (   so     " " " " s s U U 
" 
"r   r   c                      e Zd ZU dZded<   dZded<   dddZddZ	 	 	 	 d	 	 	 	 	 	 	 ddZd	 Z	d
 Z
d Zd Zd Z ej                         d        ZddZddZddZddZd d!dZd Z	 	 	 	 	 	 	 	 	 	 d"dZy)#r.   )lora_Alora_Blora_embedding_Alora_embedding_Bztuple[str, ...]adapter_layer_names)r
lora_alphascalinglora_dropoutother_param_namesc                n	   || _         i | _        i | _        i | _        t	        j
                  i       | _        t	        j
                  i       | _        t	        j
                  i       | _        t	        j                  i       | _
        t	        j                  i       | _        d| _        g | _        i | _        i | _        t         j                  j                         | _        i | _        || _        d| _        i | _        || _        | j/                         }t1        |t        j2                        r|j4                  |j6                  }}nct1        |t        j8                        r|j:                  |j<                  }}n/t1        |t        j>                        r|j:                  |j<                  }}nt1        |t        j@                        r|j:                  |j<                  }}nt1        |t        jB                        r|jD                  |jF                  }}nt1        |tH              rGtK        |jL                  d      r|jL                  jN                  n|jL                  jP                  \  }}n<t1        |t        jR                        rB|jT                  stW        d| jX                   d      |jZ                  d|jZ                  z  }}ntK        |d      r&tK        |d      r|j\                  |j^                  }}ntK        |d	      r&tK        |d
      r|j`                  |jb                  }}n|tK        |d      r3|jX                  jd                  dk(  r|j4                  |j6                  }}n=tK        |d      r2|jX                  jd                  dk(  r|j4                  |j6                  }}n|jX                  jd                  dk(  r|j4                  |j6                  }}ntK        |d      r2|jX                  jd                  dk(  r|j4                  |j6                  }}n|jX                  jd                  dk(  r|j4                  |j6                  }}n]tK        |d      r%tK        |d      r|j4                  |j6                  }}nd\  }}tg        jh                  dtk        |       dtl               || _        || _        y )NFTds_shapez=Only same dim for query/key/value is supported as of now for .   
infeaturesoutfeatures
input_sizeoutput_size	codebooksQuantizedLinearw_bitWQLinear_GEMM
EetqLinearW_q	HQQLinearPatchedLinearin_featuresout_features)NNzUnsupported layer type 'z(' encountered, proceed at your own risk.)7
base_layerr>   r?   r@   nn
ModuleDictrA   r9   r:   ParameterDictr;   r<   _disable_adaptersmerged_adaptersuse_dora	lora_biastorchlora_magnitude_vector_cachesephemeral_gpu_offloadcast_input_dtype_enabledlora_variantkwargsget_base_layer
isinstanceLinearrS   rT   Conv1din_channelsout_channelsConv2dConv3d	Embeddingnum_embeddingsembedding_dimr	   hasattrweightrD   shapeMultiheadAttention_qkv_same_embed_dim
ValueError	__class__	embed_dimrG   rH   rI   rJ   r3   warningswarntypeUserWarning)selfrU   r`   rc   rS   rT   s         r   __init__zLoraLayer.__init__Y   s   $MM"-mmB'mmB' " 0 0 4 " 0 0 4!&!)+*,%*XX%8%8%:"')+@".2%46((*
j")),(2(>(>
@W@WK
BII.(2(>(>
@W@WK
BII.(2(>(>
@W@WK
BII.(2(>(>
@W@WK
BLL1(2(A(A:C[C[K
F+.5j6G6G.T
!!**ZdZkZkZqZq &K 
B$9$9:11 #`aeaoao`ppq!rss(2(<(<a*BVBV>VKZ.7:}3U(2(=(=z?U?UKZ.7:}3U(2(=(=z?U?UKZ-*2F2F2O2OSd2d(2(>(>
@W@WKZ)j.B.B.K.K.^(2(>(>
@W@WK!!**l:(2(>(>
@W@WKZ'J,@,@,I,I[,X(2(>(>
@W@WK!!**o=(2(>(>
@W@WK z=1gj.6Y,6,B,BJD[D[\,6)\MM*4
+;*<<degr '(r   c                    y)a  Return a matching LoRA variant for this layer type.

        Given the init arguments of this layer, return the correct LoRA variant, if any. E.g., if `use_dora=True`, this
        method should return the DoRA variant for the given layer.

        If there is no fitting variant, return None.

        Note: If this layer type does not support the LoRA variant at all, please raise an error during __init__ as is
        convention, and not here.

        Nr%   )r{   r[   rc   s      r   resolve_lora_variantzLoraLayer.resolve_lora_variant   s     r   c                0   t               j                         }|d= |dk  rt        d|       | j                  |||
      }||| j                  |<   || j
                  |<   || j                  |<   |dkD  rt        j                  |      }nt        j                         }| j                  j                  t        j                  ||i             t        j                  | j                  |d      | j                  |<   t        j                  || j                   |	      | j"                  |<   |	| j$                  |<   |r&|t'        j(                  |      z  | j*                  |<   n||z  | j*                  |<   || j,                  |<   t/        |t0              rQ|j3                  d	      r@t5        | j7                         j8                        5  | j;                  ||       d d d        nt/        |t0              rQ|j3                  d
      r@t5        | j7                         j8                        5  | j=                  ||       d d d        n7t/        |t0              rQ|j?                         dk(  r>t5        | j7                         j8                        5  | jA                  |       d d d        n|dk(  r>t5        | j7                         j8                        5  | jC                  |       d d d        n|dk(  r7t        jD                  jG                  | j"                  |   j8                         nW|dk(  r>t5        | j7                         j8                        5  | jI                  |       d d d        n|r| jK                  ||       | jM                  |       || j                  v r  | j                  |   jD                  | fi | | jO                  | jP                         y # 1 sw Y   dxY w# 1 sw Y   pxY w# 1 sw Y   |xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nr{   r   ?`r` should be a positive integer value but the value passed is r[   
use_qaloraqalora_group_size        pFbiaspissacordaoloraloftqeva
orthogonal))localscopyrt   r~   rb   r>   r?   rV   DropoutIdentityrA   updaterW   rf   rS   r9   rT   r:   r\   mathsqrtr@   r[   re   r/   
startswithr   rd   rp   
pissa_init
corda_initlower
olora_init
loftq_initr   zeros_orthogonal_initreset_lora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adaptersr{   r   r>   r?   rA   init_lora_weights
use_rslorar[   r   r\   r   rc   rb   lora_dropout_layers                 r   update_layerzLoraLayer.update_layer   s    6N 6^_`^abcc00*HY 1 
 #.:Dl+ |(2%#!#l!;!#  >P/Q!RS$&IId.>.>$NL!$&IIa1B1B$SL!'0|$)3diil)BDLL&)3aDLL&&.l# '-2C2N2Nw2W"4#6#6#8#?#?@ A.?@A A)3/4E4P4PQX4Y"4#6#6#8#?#?@ A.?@A A)3/4E4K4K4MQX4X"4#6#6#8#?#?@ .-. .')"4#6#6#8#?#?@ .-. .%'GGNN4;;|4;;<,."4#6#6#8#?#?@ 3$$\23 3&&|5FG22<@4,,,0Dl+00@@--.1A AA A. .. .
3 3s<   O-O(O4P PO%(O14O= P	Pc                (   |du ry || j                   j                         v r5|du rLt        j                  j	                  | j                   |   j
                  t        j                  d             nj|j                         dk(  rIt        j                  j                  | j                   |   j
                  d| j                  |   z         nt        d|      t        j                  j                  | j                  |   j
                         | j                  |   r6t        j                  j                  | j                  |   j                         || j                   j                         v rt        j                  j                  | j                   |          t        j                  j                  | j"                  |          | j                  |   r7t        j                  j                  | j"                  |   j                         y y y )	NFT   )agaussianr   )stdz)Unknown initialization init_lora_weights=)r9   keysrV   r   kaiming_uniform_rp   r   r   r   normal_r>   rt   r   r:   r\   r   r;   r<   )r{   r   r   s      r   r   zLoraLayer.reset_lora_parameters   s   %4;;++-- D( ((\)B)I)ITYYWX\(Z"((*j8L 9 @ @a$&&Q]J^F^_ #M;L:N!OPPGGNN4;;|4;;<~~l+t{{<8==>4005577 GGNN400>?GGOOD11,?@~~l+t44\BGGH , 8r   c                   | j                         }|j                  }t        |      }|j                  }|rt	        |      }nC|t
        j                  t
        j                  t
        j                  fv r|}nt        d| d      | j                  |   }| j                  |   }|j                  t
        j                        }t
        j                  j                  |j                        \  }	}
|	d d d |f   |
d | }}|j!                         | j"                  |   j                  _        |j!                         | j$                  |   j                  _        |xj                  || j$                  |   j                  z  | j"                  |   j                  z  z  c_        |dk(  r_|j'                  ||j(                  |j*                  |j,                  |j.                        j                  |j0                        }||_        y |dk(  rI|j'                  ||j2                  |j4                        j                  |j0                        }||_        y |j                  |      }||j                  _        y )Nz.Unsupported data type for the base layer. Got rE   4bit)
quant_typequant_storagecompress_statisticsr   8bit)requires_gradhas_fp16_weights)rd   rp   r   dtyper   r]   float32float16bfloat16	TypeErrorr@   r>   tolinalgqrdata
contiguousr9   r:   ru   r   r   r   r   devicer   r   )r{   r   rU   r"   bnb_param_typer   weight_tensorscale_factorr>   QRQrRrs                r   r   zLoraLayer.olora_init  s#   ((*
 ''+K8!!4Z@Mu}}emmU^^DD'MLUGSTUVV||L1FF< %((7||}11211bqb51Ra5B02L!((-02L!((-lT[[-F-M-MMPTP[P[\hPiPpPpppV#'11&11)77$/$C$C")) 2  b##$  !.Jv%'11)77!,!=!= 2  b##$	 
 !.J),,U3M%2J"r   c           	     J   | j                         j                  }|j                  }|t        j                  t        j
                  t        j                  fvrt        d      t        |j                  t        j                        | j                        }|dk(  r~t        j                  j                  |j                  d      \  }}}|d d d | j                  |   f   }|d | j                  |    }	|	| j                  |   z  }	|d | j                  |    }
nt!        |j#                  d            dk(  rgt%        |j                  | j                  |   t'        |j#                  d      d               \  }}	}|	| j                  |   z  }	|j)                         }
nt+        d	| d
      t        j,                  t        j.                  |	            |
z  }|t        j,                  t        j.                  |	            z  }|| j0                  |   j                  _        || j2                  |   j                  _        |j                  | j                  |   |z  |z  z
  }t        |j                  |      | j                        }|| j                         j                  _        y )NzPlease initialize PiSSA under float32, float16, or bfloat16. Subsequently, re-quantize the residual model to help minimize quantization errors.r   F)full_matrices_niter_   )niterzLinit_lora_weights should be 'pissa' or 'pissa_niter_[number of iters]', got 	 instead.)rd   rp   r   r]   r   r   r   r   r   r   fan_in_fan_outr   svdr   r>   r@   lensplitr   inttrt   diagr   r9   r:   )r{   r   r   rp   r   VSUhVrSrUhrUrr9   r:   s                 r   r   zLoraLayer.pissa_initF  s@   $$&--u~~FFe  699U]]3T5H5HI'||''5'IHAq"1,|,,,-B)TVVL)*B$,,|,,B+tvvl+,C"((349$TVVL1=N=T=TU^=_`b=c9dJBB $,,|,,B$$&C^_p^qqz{  EJJrN+c1ejjB0006L!((-06L!((-t||L9FBVKK699U+T-@-@A,2$$)r   c           	        | j                         }|j                  }|j                  }|t        j                  t        j
                  t        j                  fvrt        d      |j                  t        j                        }|j                  j                  d      }|j                  j                  d      }t        |d      st        d      |j                  }|j                  }	|j                  }
|j                   }| j"                  |   }t        j$                  |
      j'                         s#t        j(                  |
      j'                         rt        d      t        j$                  |	      j'                         s#t        j(                  |	      j'                         rt        d      t        j$                  |      j'                         s#t        j(                  |      j'                         rt        d      |	j                  d      |k7  s|	j                  d      |k7  r#t        d	|	j                          d
| d| d      |
j                  d      |k7  r t        d|
j                          d
| d      |j                  d      |k7  s|j                  d      |k7  r#t        d|j                          d
| d| d      |
| j*                  |   z  }
|j-                         j/                  |
j1                         j3                  dd            j5                         }|	j/                  |
j1                               j5                         }|| j6                  |   j                  _	        || j8                  |   j                  _	        |j                  | j*                  |   |z  |z  z
  }|j                  |      }|| j                         j                  _	        |`y )NzPlease initialize CorDA under float32, float16, or bfloat16. Subsequently, re-quantize the residual model to help minimize quantization errors.r   r   eigensz`eigens` attribute not found for layer, please run `preprocess_corda` first. More information can be found at examples/corda_finetuning/README.md.zdInvalid value found in matrix S. Please file an issue at https://github.com/huggingface/peft/issues.zdInvalid value found in matrix U. Please file an issue at https://github.com/huggingface/peft/issues.zdInvalid value found in matrix V. Please file an issue at https://github.com/huggingface/peft/issues.zMatrix U size mismatch: z vs. (z, z). Please make sure the `lora_config` and `model` argument of `preprocess_corda` is consistent with `get_peft_model`. If you're using cache in `preprocess_corda`, please make sure the cache is built with the same model and LoRA rank.zMatrix S size mismatch: z,). Please make sure the `lora_config` and `model` argument of `preprocess_corda` is consistent with `get_peft_model`. If you're using cache in `preprocess_corda`, please make sure the cache is built with the same model and LoRA rank.zMatrix V size mismatch: r   )rd   rp   r   r]   r   r   r   r   r   r   sizero   rt   r   U_WCS_WCV_WCr>   isnananyisinfr@   r   mulr   viewr   r9   r:   )r{   r   r   linearrp   r   out_dimin_dimr   Ur   r   r>   r9   r:   s                  r   r   zLoraLayer.corda_initi  sI   $$&u~~FFe  5==)++""1%!!!$ vx(X  KKKKKKFF<  ;;q>5;;q>#5#5#7v  ;;q>5;;q>#5#5#7v  ;;q>5;;q>#5#5#7v 
 66!9166!9>*1668*F7)2aS Ip p 
 66!9>*1668*F1# >Y Y 
 66!9!&&)q.*1668*F6("QC Hp p  	
T\\,'' 1668==Q/0;;=qvvx++-06L!((-06L!((-t||L9FBVKK5!,2$$) Mr   c                   ddl m} | j                         j                  }| j                  j                  dd      | j                  |   | j                  j                  dd      d} ||fi |\  }}}|| j                  j                         v r<|| j                  |   j                  _	        || j                  |   j                  _	        || j                  j                         v r<|| j                  |   j                  _	        || j                  |   j                  _	        || j                         j                  _	        y )Nr   )r   
loftq_bits   
loftq_iterr   )num_bitsreduced_ranknum_iter)peft.utils.loftq_utilsr   rd   rp   rc   getr>   r9   r   r   r:   r;   r<   )r{   r   r   rp   rc   qweightr9   r:   s           r   r   zLoraLayer.loftq_init  s   5$$&--a8 FF<0a8
 #-V">v">4;;++--4:DKK%,,14:DKK%,,14005577>DD!!,/66;>DD!!,/66;,3$$)r   c                D   | j                   |   }|dz  dk7  rt        d| d      t        j                  ||      }t        j                  j                  |      \  }}|dd dd d f   }|dd dd d f   }| j                         j                  j                  }t        j                  | j                  |dz        j                  |      j                  dz  }	t        j                  |dz  | j                        j                  j                  |      dz  }
t        j                  |	j                         j!                  |            | j"                  |   _        t        j                  |
j                         j!                  |            | j$                  |   _        y )Nr   r   zAOrthogonal initialization requires the LoRA rank to be even, got r   r   g      $@)r>   rt   r]   randnr   r   rd   rp   r   rS   mmTrT   rV   	Parameterr   r   r9   r:   )r{   r   rankXr   _q_oddq_evenr   r9   r:   s              r   r   zLoraLayer.orthogonal_init  sW    vvl#!8q=`ae`ffopqqKKd#||q!1!$Q$'
14a47##%,,22T--tqy9<<UCEELTQY(9(9:<<??G$N+-<<8I8I8K8N8Nu8U+VL!(+-<<8I8I8K8N8Nu8U+VL!(r   c                "    || j                   |<   y N)r_   r{   keyvalues      r   _cache_storezLoraLayer._cache_store  s    !Sr   c                <    | j                   j                  |      }|S r  )r_   popr  s      r   
_cache_popzLoraLayer._cache_pop  s      %r   c                    || j                   vry|| j                  |   z  | j                  |   z  | j                   |<   y)zSet the scale of the given adapter to the initial scale multiplied by the provided factor

        The initial scale is determined by the configured `r` (rank) and `lora_alpha`.
        N)r@   r?   r>   )r{   adapterscales      r   	set_scalezLoraLayer.set_scale  s<    
 $,,& %(@ @466'? RWr   c                    |dk(  ry| j                   D ]6  }|| j                  j                         vr | j                  |xx   |z  cc<   8 y)zHMultiply the current scale of all active adapters by the provided factorr   N)r   r9   r   r@   r{   r  r!   s      r   scale_layerzLoraLayer.scale_layer  sM    A:"22 	2NT[[%5%5%77LL(E1(		2r   Nc                    | j                   D ]e  }|| j                  j                         vr |-| j                  |   | j                  |   z  | j
                  |<   O| j
                  |xx   |z  cc<   g y)zDivide the current scale of all active adapters by the provided factor. If `scale=None` is passed, reset to
        initial scale

        The initial scale is determined by the configured `r` (rank) and `lora_alpha`.

        N)r   r9   r   r?   r>   r@   r  s      r   unscale_layerzLoraLayer.unscale_layer  ss     #22 	6NT[[%5%5%77}/3~/NQUQWQWXfQg/g^,^,5,	6r   c                n   |j                  dd      }|yt        |      t        |      k7  r&dt        |       dt        |       d}t        |      | j                  rd}t        |      |D ch c]
  }|dk7  s	| }}|D ]+  }| j                  j                  |d      s d	}t        |       yc c}w )
MCheck if the arguments are compatible with the configs and state of the modeladapter_namesNzNLength of `adapter_names` should be the same as the number of inputs, but got z and z respectively.z`Cannot pass `adapter_names` when there are merged adapters, please call `unmerge_adapter` first.__base__Fz1Cannot pass `adapter_names` when DoRA is enabled.)r   r   rt   mergedr[   )	r{   r+   argsrc   r  msgnameunique_adaptersr   s	            r   _check_forward_argszLoraLayer._check_forward_args  s    

?D9 q6S''`}%&eCF8>C  S/!;; uCS/! -:PDTZ=O4PP+ 	&L}}  u5I o%	& Qs   0
B2;B2c          	         | j                   |g|i |}|j                  }t        |      }g }|D ]5  }	|j                  t	        |      D 
cg c]  \  }
}||	k(  s|
 c}}
       7 t	        |      D ]  \  }}|dk(  r|| j
                  j                         vr)| j
                  |   }| j                  |   }| j                  |   }| j                  |   }|||      j                  |j                  j                        } | | ||                  |z  }|||   xx   |j                  |      z  cc<    |S c c}}
w Nr  )rU   r   setappend	enumerater9   r   r:   rA   r@   r   rp   )r{   r+   r  r  rc   r,   torch_result_dtyper  sub_batch_indices_listr  indexitemir!   r9   r:   dropoutr@   	sub_batchlora_outputs                       r   _mixed_batch_forwardzLoraLayer._mixed_batch_forward   s[   
 !4T4V4#\\m,!#& 	rG"))ImD\*p[UD`dho`o5*pq	r "+?!; 	TA~+T[[%5%5%77[[0F[[0F''7Gll>2G 034778K8KLI 	(:!;<wFK)!,-@R1SS-	T" ' +qs   D?D?)F)rU   	nn.Moduler`   boolr0   r1   r[   r,  r0   zOptional[LoraVariant]FFF    r[   r,  r   r,  r\   r,  r   r   )r  r/   r  r   r0   r1   )r  r/   r0   r   )r  r/   r  float | intr0   r1   )r  r1  r0   r1   r  )r  zOptional[float | int]r0   r1   
r+   r2   r  r   r  	list[str]rc   r   r0   r2   )r3   r4   r5   r=   __annotations__rB   r|   r~   r   r   r   r   r   r   r]   no_gradr   r  r	  r  r  r  r  r*  r%   r   r   r.   r.   S   s    +gg)WWK)Z,  !#J/ J/ J/ J/ J/XI2+3Z!3FHT4* U]]_W W "S	26 &6&):COR	r   r.   c                       e Zd Z	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd
dZdddZddZddZddZd fdZ	 xZ
S )rf   c           
         t         |           t        j                  | |fi | || _        || _        | j                  ||||||	|
|       || _        y )Nr?   rA   r   r   r[   r\   )superr|   r.   r   _active_adapterr   is_target_conv_1d_layer)r{   rU   r   r>   r?   rA   r   r;  r   r   r[   r\   rc   ru   s                r   r|   zLinear.__init__M  sj     	46v6,+!%/! 	 		
 (?$r   c               "    |sy ddl m}  |       S )Nr   )DoraLinearVariant)variantsr=  )r{   r[   rc   r=  s       r   r~   zLinear.resolve_lora_variantm      / ""r   c                   t        | |      }|sy|D ]l  }|| j                  j                         v s!| j                         }|r`|j                  j
                  j                         }|j                  }|| j                  vr&| j                  |      }||j                  |      z  }n | j                  |   j                  | ||      }t        j                  |      j                         st        d| d      ||j                  _        | j                   |   rG|j"                  | j$                  |   j"                  | j&                  |   z  z   }t        j                  |      j                         st        d| d      |j                  |      |j"                  _        n|| j                  vr1| j                  |      }|j                  xj
                  |z  c_        n*| j                  |   j)                  | ||j                         | j                   |   rF|j"                  xj
                  | j$                  |   j"                  | j&                  |   z  z  c_        | j*                  j-                  |       o ya^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        N1NaNs detected in the merged weights. The adapter  seems to be broken)r   r9   r   rd   rp   r   cloner   rb   get_delta_weightr   r#   r]   isfiniteallrt   r\   r   r:   r@   r&   rZ   r   )	r{   
safe_merger  r!   rU   r"   
orig_dtypedelta_weightnew_biass	            r   mergezLinear.mergeu  s;    0mD+ '	<N!1!1!33!002
 #-"3"3"8"8">">"@K!,!2!2J%T->->>'+'<'<^'L#|z'BB&*&7&7&G&R&RSWYgit&u >>+6::<(OP^O__rs  .9J%%*~~n5#-??T[[5P5U5UX\XdXdesXt5t#t$~~h7;;=","STbSccv w#  08{{:/F
, &T->->>'+'<'<^'L"))..,>.)).9FFt^]g]n]no~~n5",,N0K0P0PSWS_S_`nSo0oo,$$++N;O'	<r   c                   | j                   st        j                  d       yt        | j                        dkD  rE| j                  j                         }|| j                  j                         v r| j                         j                  }|| j                  vrB|j                  }| j                  |      }|xj                  |j                  |      z  c_        n'| j                  |   j                  | ||      }||_        | j                   |   rT| j                         j"                  xj                  | j$                  |   j"                  | j&                  |   z  z  c_        t        | j                        dkD  rDyyW
        This method unmerges all merged adapter layers from the base weights.
         Already unmerged. Nothing to do.Nr   r  rw   rx   r   rZ   r  r9   r   rd   rp   rb   r   rE  r   r   r)   r\   r   r:   r@   r{   r!   rp   rI  rJ  unmergeds         r   r)   zLinear.unmerge  2    {{MM<=$&&'!+!11557N!1!1!33,,.55!):)::!'J#'#8#8#HLKK<??:#>>K#00@HH~_efH"*FK>>.1'')..33t{{>7R7W7WZ^ZfZfguZv7vv3 $&&'!+r   c                   | j                   |   j                  j                  }| j                   |   j                  j                  }|j                  dk(  xr( |t
        j                  k(  xs |t
        j                  k(  }| j                  |   j                  }| j                   |   j                  }|r |j                         }|j                         }t        ||z  | j                        | j                  |   z  }|rl|j                  |      }|j                  |      | j                  |   j                  _        |j                  |      | j                   |   j                  _        |S 
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpur   )r:   rp   r   r   ry   r]   r   r   r9   floatr   r   r@   r   r   r{   r  r   r   cast_to_fp32weight_Aweight_Boutput_tensors           r   rE  zLinear.get_delta_weight  s.    W%,,33G$++11
 {{e+c%--1G1b5TYTbTbKb;;w'..;;w'..~~'H~~'H!(X"5t7J7JKdll[bNcc),,5,9M 08{{5/ADKK '',/7{{5/ADKK '',r   c           	     v    | j                   |g|i | |j                  dd       }| j                  r4| j                  r| j	                           | j
                  |g|i |}|S | | j                  |g|d|i|}|S | j                  r | j
                  |g|i |}|S  | j
                  |g|i |}|j                  }| j                  j                         }| j                  D ]  }||vr| j                  |   }	| j                  |   }
| j                  |   }| j                  |   }| j                  ||	j                  j                        }|| j                   vr| |
 |	 ||                  |z  z   }| j                   |   j#                  | |||      } |j%                  |      }|S Nr  )r!   r+   r,   )r  r  disable_adaptersr  r)   rU   r*  r   r9   r   r   r:   rA   r@   _cast_input_dtyperp   rb   r-   r   )r{   r+   r  rc   r  r,   r"  lora_A_keysr!   r9   r:   r'  r@   s                r   r-   zLinear.forward  s      4T4V4

?D9  {{$T__Q888F> = &.T..q_4_}_X^_F: 9 [[$T__Q888F6 3 %T__Q888F!'++**,K"&"6"6 !4^4^4++N;,,~6**1fmm.A.AB!):)::#fVGAJ-?&@7&JJF!..~>FF'5%	 G F& YY12Fr   c                *    t         |          }d|z   S Nlora.r9  __repr__r{   repru   s     r   ri  zLinear.__repr__      g }r   	r   r   r   FFTFFF)r   r/   r>   r   r?   r   rA   rZ  r   r,  r;  r,  r   Union[bool, str]r   r,  r[   r,  r\   r,  r0   r1   r-  FNrH  r,  r  Optional[list[str]]r0   r1   r0   r1   r0   r2   r+   r2   r  r   rc   r   r0   r2   r0   r/   )r3   r4   r5   r|   r~   rL  r)   rE  r-   ri  __classcell__ru   s   @r   rf   rf   K  s     !$(-.2 ? ? 	?
 ? ? ? "&? ,? ? ? ? 
?@#9<vw, D&P r   rf   c                       e Zd Z	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZd ZdddZddZddZ	 	 	 	 	 	 	 	 	 	 ddZ	ddZ
dd	Zd fd
Z xZS )rl   c           
         |
r&t        d|
 d| j                  j                   d      t        |           t
        j	                  | |       || _        || _        | j                  |||||||	|
       y )Nz
lora_bias=z is not supported for rE   r8  )	rt   ru   r3   r9  r|   r.   r   r:  r   )r{   rU   r   r>   r?   rA   r   r   r   r[   r\   rc   ru   s               r   r|   zEmbedding.__init__  s     z)4J4>>KbKbJccdeff4,,+!%/! 	 		
r   c               "    |sy ddl m}  |       S )Nr   )DoraEmbeddingVariant)r>  r{  )r{   r[   rc   r{  s       r   r~   zEmbedding.resolve_lora_variant9  s    2#%%r   c	                &   t               j                         }	|	d= |dk  rt        d|       | j                  |      }
|
|
| j                  |<   || j
                  |<   || j                  |<   |dkD  rt        j                  |      }nt        j                         }|| j                  |<   t        j                  || j                  f      }t        j                  | j                  |f      }t        j                  |      | j                   |<   t        j                  |      | j"                  |<   || j$                  |<   |r&|t'        j(                  |      z  | j*                  |<   n||z  | j*                  |<   || j,                  |<   |dk(  r| j/                  |       n|r| j1                  ||       | j3                  |       || j                  v r  | j                  |   j4                  | fi |	 | j7                  | j8                         y )Nr{   r   r   r[   r   r   r   )r   r   rt   r~   rb   r>   r?   rV   r   r   rA   r]   r   rS   rT   r   r;   r<   r\   r   r   r@   r[   r   r   r   r   r   r   )r{   r   r>   r?   rA   r   r   r[   r\   rc   rb   r   r]  r^  s                 r   r   zEmbedding.update_layerA  s    6N6^_`^abcc00(0C#.:Dl+ |(2%#!#l!;!#*<,';;4#3#345;; 1 1156.0ll8.Dl+.0ll8.Dl+'0|$)3diil)BDLL&)3aDLL&&.l#'OOL)&&|5FG 	22<@4,,,0Dl+00@@--.r   c                T   t        | |      }|sy|D ]  }|| j                  j                         v s!| j                         }|j                  j
                  }|r|j                  j                  j                         }|| j                  vr$|| j                  |      j                  |      z  }n | j                  |   j                  | ||      }t        j                  |      j                         st        d| d      ||j                  _        nv|| j                  vr>|j                  xj                  | j                  |      j                  |      z  c_        n*| j                  |   j!                  | ||j                         | j"                  j%                  |        yrA  )r   r;   r   rd   rp   r   r   rD  rb   rE  r   r#   r]   rF  rG  rt   r&   rZ   r   )r{   rH  r  r!   rU   rI  r"   s          r   rL  zEmbedding.merger  s    0mD+ 	<N!6!6!;!;!==!002
'..44
 #-"3"3"8"8">">"@K%T->->>#t'<'<^'L'O'OPZ'[[&*&7&7&G&R&RSWYgit&u >>+6::<(OP^O__rs  .9J%%*%T->->>"))..$2G2G2W2Z2Z[e2ff.)).9FFt^]g]n]no$$++N;1	<r   c                f   | j                   st        j                  d       yt        | j                        dkD  r| j                  j                         }| j                         j                  j                  }|| j                  j                         v r| j                         j                  }|| j                  vr4|xj                  | j                  |      j                  |      z  c_        n'| j                  |   j                  | ||      }||_        t        | j                        dkD  ryyrN  )r  rw   rx   r   rZ   r  rd   rp   r   r;   r   rb   r   rE  r   r)   )r{   r!   rI  rp   rS  s        r   r)   zEmbedding.unmerge  s     {{MM<=$&&'!+!11557N,,.55;;J!6!6!;!;!==,,.55!):)::KK4#8#8#H#K#KJ#WWK#00@HH~_efH"*FK $&&'!+r   c                :   | j                   |   j                  }| j                  |   j                  }|j                  dk(  xr( |t
        j                  k(  xs |t
        j                  k(  }| j                  |   }| j                   |   }|r |j                         }|j                         }t        ||z  d      | j                  |   z  }|rN|j                  |      }|j                  |      | j                  |<   |j                  |      | j                   |<   |S )rW  rX  TrY  )r<   r   r;   r   ry   r]   r   r   rZ  r   r@   r   r[  s           r   rE  zEmbedding.get_delta_weight  s	    &&w/66%%g.44
 {{e+c%--1G1b5TYTbTbKb((1((1~~'H~~'H!(X"5t<t||G?TT),,5,9M .6[[-?D!!'*-5[[-?D!!'*r   c          	         | j                   |g|i |}t        |      }g }|D ]5  }|j                  t        |      D 	
cg c]  \  }	}
|
|k(  s|	 c}
}	       7 t        |      D ]  \  }}|dk(  r|| j                  j                         vr)| j                  |   j                  }| j                  |   j                  }| j                  |   }|||      }| j                  ||      }|||   xx   ||z  |z  z  cc<    |S c c}
}	w r  )
rU   r  r   r!  r;   r   r   r<   r@   _embed)r{   r+   r  r  rc   r,   r  r#  r  r$  r%  r&  r!   embedding_Aembedding_Br@   r(  after_As                     r   r*  zEmbedding._mixed_batch_forward  s5   
 !4T4V4m,!#& 	rG"))ImD\*p[UD`dho`o5*pq	r "+?!; 	SA~+T%:%:%?%?%AA//?AAK//?AAKll>2G 034Ikk)[9G)!,-'K2G71RR-	S  % +qs   DDc           	         | j                         }t        j                  |||j                  |j                  |j
                  |j                  |j                        S )N)padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rd   F	embeddingr  r  r  r  r  )r{   inputrp   rU   s       r   r  zEmbedding._embed  sT    ((*
{{"..(( **)<<$$
 	
r   c                    | j                   |g|i | |j                  dd       }| j                  r4| j                  r| j	                           | j
                  |g|i |}|S | | j                  |g|d|i|}|S | j                  r | j
                  |g|i |}|S  | j
                  |g|i |}|j                  }| j                  D ]  }|| j                  vr|| j                  vr_| j                  |   j                  }| j                  |   j                  }	| j                  |   }
| j                  ||      }|||	z  |
z  z   }| j                  |   j                  | |||      } |j!                  |      }|S ra  )r  r  rb  r  r)   rU   r*  r   r   r;   rb   r   r<   r@   r  r-   r   )r{   r+   r  rc   r  r,   r"  r!   r  r  r@   r  s               r   r-   zEmbedding.forward  s      4T4V4

?D9  {{$T__Q888F6 5 &.T..q_4_}_X^_F2 1 [[$T__Q888F. + %T__Q888F!'"&"6"6 !)>)>>!):)::"&"7"7"G"I"IK"&"7"7"G"I"IK"ll>:G"kk![9G#w'<&GGF!..~>FF'5%	 G F" YY12Fr   c                *    t         |          }d|z   S rf  rh  rj  s     r   ri  zEmbedding.__repr__!  rl  r   )r   r   r   FTFFF)rU   r+  r   r/   r>   r   r?   r   rA   rZ  r   r,  r   rn  r   r,  r[   r,  r\   r,  r0   r1   r-  ro  rp  rr  rs  r2  )r  r2   rp   r2   r0   r2   rt  ru  )r3   r4   r5   r|   r~   r   rL  r)   rE  r*  r  r-   ri  rv  rw  s   @r   rl   rl     s     !$.2  
 
  
 	 

  
  
  
 , 
  
  
  
 
 
D&//b*<X+$ D&):COR	<

#J r   rl   c                       e Zd Z	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZd Zd ZdddZddZddZddZ	d fdZ
 xZS )_ConvNdc
           
        t         |           t        j                  | |       |j                  dkD  rt	        j
                  d       ||j                  z  dk7  r3t        d|j                  j                   d|j                   d| d      || _	        |j                  j                         | _        | j                  ||||||||	       y )	Nr   zMLoRA adapter added to ConvNd layer with groups > 1. Merging is not supported.r   zTargeting a z with groups=z
 and rank z. Currently, support is limited to conv layers where the rank is divisible by groups. Either choose a different rank or do not target this specific layer.r8  )r9  r|   r.   groupsrw   rx   rt   ru   r3   r:  rp   dim_kernel_dimr   )r{   rU   r   r>   r?   rA   r   r   r[   r\   rc   ru   s              r   r|   z_ConvNd.__init__(  s     	4,q MMijz   A%z33<<=]:K\K\J]]ghigj kW W   ,%,,002!%/! 	 		
r   c	                h   t               j                         }	|	d= |dk  rt        d|       | j                  |      }
|
|
| j                  |<   || j
                  |<   || j                  |<   |dkD  rt        j                  |      }nt        j                         }|| j                  |<   | j                         }|j                  }|j                  }|j                  }t        |      }d| j                   dz
  z  x}} || j"                  ||||d	
      | j$                  |<    ||| j&                  |||j(                  |      | j*                  |<   || j,                  |<   |r&|t/        j0                  |      z  | j2                  |<   n||z  | j2                  |<   || j4                  |<   |dk(  r| j7                  |       n|r| j9                  ||       | j;                  |       || j                  v r  | j                  |   j<                  | fi |	 | j?                  | j@                         y )Nr{   r   r   r}  r   r   r   r   Fr   )r  r   r   )!r   r   rt   r~   rb   r>   r?   rV   r   r   rA   rd   kernel_sizestridepaddingry   r  rS   r9   rT   r  r:   r\   r   r   r@   r[   r   r   r   r   r   r   )r{   r   r>   r?   rA   r   r   r[   r\   rc   rb   r   rU   r  r  r  
conv_layer
out_kernel
out_strides                      r   r   z_ConvNd.update_layerP  s    6N6^_`^abcc00(0C#.:Dl+ |(2%#!#l!;!#*<,'((*
 ,,""$$*%
"&$*:*:Q*>"??
Z$.t/?/?KQWY`gl$mL!$.t  *jARARYb%
L! (1|$)3diil)BDLL&)3aDLL&&.l#'OOL)&&|5FG 	22<@4,,,0Dl+00@@--.r   c                ,    dd| j                   dz
  z  z   S )N)r   r  r   )r  r{   s    r   _get_dora_factor_viewz_ConvNd._get_dora_factor_view  s    tt//!3444r   c                l   t        | |      }|sy|D ]  }|| j                  j                         v s!| j                         }|j                  j
                  }|j                  dkD  rt        d      |rT|j                  j                  j                         }|| j                  vr&| j                  |      }||j                  |      z  }n | j                  |   j                  | ||      }t        j                  |      j!                         st#        d| d      ||j                  _        | j$                  |   rV|j&                  | j(                  |   j&                  | j*                  |   z  z   }t        j                  |      j!                         st#        d| d      |j                  |      |j&                  _        n|| j                  vr@| j                  |      }|j                  xj                  |j                  |      z  c_        n*| j                  |   j-                  | ||j                         | j$                  |   rF|j&                  xj                  | j(                  |   j&                  | j*                  |   z  z  c_        | j.                  j1                  |        y)a`  
        Merge the active adapter weights inside the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nr   z<Merging is not supported for _ConvNd layers with groups > 1!rB  rC  )r   r9   r   rd   rp   r   r  r   r   rD  rb   rE  r   r#   r]   rF  rG  rt   r\   r   r:   r@   r&   rZ   r   )	r{   rH  r  r!   rU   rI  r"   rJ  rK  s	            r   rL  z_ConvNd.merge  sd    0mD+ ,	<N!1!1!33!002
'..44
$$q(-.lmm #-"3"3"8"8">">"@K%T->->>'+'<'<^'L#|z'BB&*&7&7&G&R&RSWYgit&u >>+6::<(OP^O__rs  .9J%%*~~n5#-??T[[5P5U5UX\XdXdesXt5t#t$~~h7;;=","STbSccv w#  08{{:/F
, &T->->>'+'<'<^'L"))..,//*2MM.)).9FFt^]g]n]no~~n5",,N0K0P0PSWS_S_`nSo0oo,$$++N;Y,	<r   c                   | j                   st        j                  d       yt        | j                        dkD  rE| j                  j                         }|| j                  j                         v r| j                         j                  }|| j                  vrB|j                  }| j                  |      }|xj                  |j                  |      z  c_        n'| j                  |   j                  | ||      }||_        | j                   |   rT| j                         j"                  xj                  | j$                  |   j"                  | j&                  |   z  z  c_        t        | j                        dkD  rDyyrN  rQ  rR  s         r   r)   z_ConvNd.unmerge  rT  r   c                   | j                   |   j                  j                  }| j                  |   j                  j                  }|j
                  dk(  xr( |t        j                  k(  xs |t        j                  k(  }| j                  |   j                  }| j                   |   j                  }|r |j                         }|j                         }| j                         j                  j                         dd dk(  rp|j                  d      j                  d      |j                  d      j                  d      z  j                  d      j                  d      | j                  |   z  }nt| j                  |j!                  dd      |      }| j                         j"                  dkD  r|| j                  |   z  }n"|j!                  dd      | j                  |   z  }|rl|j%                  |      }|j%                  |      | j                  |   j                  _        |j%                  |      | j                   |   j                  _        |S )	rW  rX  r   r   )r   r   rF   r   r   rY  )r:   rp   r   r9   r   ry   r]   r   r   rZ  rd   r   squeeze	unsqueezer@   conv_fnr   r  r   r   r[  s           r   rE  z_ConvNd.get_delta_weight  s    W%,,33G$++11
 {{e+c%--1G1b5TYTbTbKb;;w'..;;w'..~~'H~~'H  '',,.q3v=%--a088;h>N>Nq>Q>Y>YZ[>\\gghijttW%&M !LL););Aq)A8LM""$++a/ -W0E E - 7 71 =W@U U),,5,9M 08{{5/ADKK '',/7{{5/ADKK '',r   c           	     r    | j                   |g|i | |j                  dd       }| j                  r4| j                  r| j	                           | j
                  |g|i |}|S | | j                  |g|d|i|}|S | j                  r | j
                  |g|i |}|S  | j
                  |g|i |}|j                  }| j                  D ]  }|| j                  j                         vr | j                  |   }| j                  |   }	| j                  |   }
| j                  |   }| j                  ||j                  j                        }|| j                   vr| |	 | |
|                  |z  z   }| j                   |   j#                  | |||      } |j%                  |      }|S ra  )r  r  rb  r  r)   rU   r*  r   r   r9   r   r:   rA   r@   rc  rp   rb   r-   r   )r{   r+   r  rc   r  r,   r"  r!   r9   r:   r'  r@   s               r   r-   z_ConvNd.forward  s      4T4V4

?D9  {{$T__Q888F< ; &.T..q_4_}_X^_F8 7 [[$T__Q888F4 / %T__Q888F!'"&"6"6 !)9)9);;^4^4++N;,,~6**1fmm.A.AB!):)::#fVGAJ-?&@7&JJF!..~>FF'5%	 G F& YY12Fr   c                *    t         |          }d|z   S rf  rh  rj  s     r   ri  z_ConvNd.__repr__5  rl  r   )r   r   r   TFFF)rU   r+  r   r/   r>   r   r?   r   rA   rZ  r   rn  r   r,  r[   r,  r\   r,  r0   r1   ro  rp  rr  rs  )r+   r2   r0   r2   ru  )r3   r4   r5   r|   r   r  rL  r)   rE  r-   ri  rv  rw  s   @r   r  r  &  s     !.2 &
&
 &
 	&

 &
 &
 ,&
 &
 &
 &
 
&
P5/n5><@w,,\%N r   r  c                  &     e Zd Z fdZddZ xZS )rj   c                    t        |   |i | | j                  dk(  st        d| j                         t        j
                  | _        y )Nr   z0Conv2d layer kernel must have 4 dimensions, not )r9  r|   r  rt   r  conv2dr  r{   r  rc   ru   s      r   r|   zConv2d.__init__<  I    $)&)1$OPTP`P`Oabccxxr   c               "    |sy ddl m}  |       S )Nr   )DoraConv2dVariant)r>  r  )r{   r[   rc   r  s       r   r~   zConv2d.resolve_lora_variantB  r?  r   r-  r3   r4   r5   r|   r~   rv  rw  s   @r   rj   rj   :       #r   rj   c                  &     e Zd Z fdZddZ xZS )rg   c                    t        |   |i | | j                  dk(  st        d| j                         t        j
                  | _        y )NrF   z0Conv1d layer kernel must have 3 dimensions, not )r9  r|   r  rt   r  conv1dr  r  s      r   r|   zConv1d.__init__M  r  r   c               "    |sy ddl m}  |       S )Nr   )DoraConv1dVariant)r>  r  )r{   r[   rc   r  s       r   r~   zConv1d.resolve_lora_variantS  r?  r   r-  r  rw  s   @r   rg   rg   K  r  r   rg   c                  &     e Zd Z fdZddZ xZS )rk   c                    t        |   |i | | j                  dk(  st        d| j                         t        j
                  | _        y )Nr   z0Conv3d layer kernel must have 5 dimensions, not )r9  r|   r  rt   r  conv3dr  r  s      r   r|   zConv3d.__init__^  r  r   c               "    |sy ddl m}  |       S )Nr   )DoraConv3dVariant)r>  r  )r{   r[   rc   r  s       r   r~   zConv3d.resolve_lora_variantd  r?  r   r-  r  rw  s   @r   rk   rk   \  r  r   rk   c                      e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZedd       Zed d       Zed d       Zed!d       Z	edd       Z
ed"d       Zed!d	       Zedd
       Zed#d       Zed#d       Zed$d       Zed%d       Zed%d       Zd&dZed!d       Zd' fdZd(d)dZd'dZ	 	 	 	 	 	 	 	 d*dZd+dZ fdZd,dZed        Z fdZ fdZd- fdZ  xZ!S ).rr   a  LoRA implemented in a multihead attention layer

    This is currently only implemented for the case of `_qkv_same_embed_dim = True`, i.e. query, key, and value having
    the same dimension.

    Note: LoRA is applied to both the in_proj (query/key/value) and out_proj. There is currently no way to specify only
    one of them. Don't try to apply LoRA to the out_proj of MultiheadAttention by targeting that layer specifically,
    since the forward method of that layer is not being used, hence the LoRA adapter would be ignored.

    This is a little bit hacky because of the way that MultiheadAttention is implemented in PyTorch: There are no
    `nn.Linear` layers which we can hook onto or, in case of output projection, `.forward` is not used. This
    implementation works around these problems by merging the weights before the forward call and unmerging them after
    the forward call.
    c	           
        t        |dd      s#t        d| j                  j                   d      |r"t        | j                  j                   d      t        
|           t        j
                  | |fi |	 t        |j                  t        j                        r.t        |j                  |f||||||d|	| j                  _        n#t        d| j                  j                   d      || _        | j                  ||||||       y )Nrs   Tz?Only same embed for query/key/value is supported as of now for rE   z: does not support DoRA (yet), please set use_dora to False)r>   r?   rA   r   r   r[   z.out_proj must be an instance of nn.Linear for )getattrrt   ru   r3   r9  r|   r.   re   out_projrV   rf   rU   r:  r   )r{   rU   r   r>   r?   rA   r   r   r[   rc   ru   s             r   r|   zMultiheadAttention.__init__}  s    z#8$? QRVR`R`RiRiQjjkl   7 788rstt46v6 j))2995'-##
( %)"3%!
( 
(DOO$ MdnnNeNeMffghii+,:|EVXbcr   c                6    | j                         j                  S r  )rd   rv   r  s    r   rv   zMultiheadAttention.embed_dim      ""$...r   c                6    | j                         j                  S r  )rd   kdimr  s    r   r  zMultiheadAttention.kdim      ""$)))r   c                6    | j                         j                  S r  )rd   vdimr  s    r   r  zMultiheadAttention.vdim  r  r   c                6    | j                         j                  S r  )rd   rs   r  s    r   rs   z&MultiheadAttention._qkv_same_embed_dim  s    ""$888r   c                6    | j                         j                  S r  )rd   	num_headsr  s    r   r  zMultiheadAttention.num_heads  r  r   c                6    | j                         j                  S r  )rd   r'  r  s    r   r'  zMultiheadAttention.dropout  s    ""$,,,r   c                6    | j                         j                  S r  )rd   batch_firstr  s    r   r  zMultiheadAttention.batch_first  s    ""$000r   c                6    | j                         j                  S r  )rd   head_dimr  s    r   r  zMultiheadAttention.head_dim  s    ""$---r   c                6    | j                         j                  S r  )rd   in_proj_weightr  s    r   r  z!MultiheadAttention.in_proj_weight  s    ""$333r   c                6    | j                         j                  S r  )rd   in_proj_biasr  s    r   r  zMultiheadAttention.in_proj_bias  s    ""$111r   c                R    | j                         j                  j                         S r  )rd   r  r  s    r   r  zMultiheadAttention.out_proj  s     ""$--<<>>r   c                6    | j                         j                  S r  )rd   bias_kr  s    r   r  zMultiheadAttention.bias_k      ""$+++r   c                6    | j                         j                  S r  )rd   bias_vr  s    r   r  zMultiheadAttention.bias_v  r  r   c                B     | j                         j                  |i |S r  )rd   merge_masks)r{   r  rc   s      r   r  zMultiheadAttention.merge_masks  s#    0t""$00$A&AAr   c                6    | j                         j                  S r  )rd   add_zero_attnr  s    r   r  z MultiheadAttention.add_zero_attn  s    ""$222r   c                p    t        |   |i |  | j                  j                  j                  |i | y r  )r9  r   rU   r  r  s      r   r   zMultiheadAttention.update_layer  s4    d-f--  --t>v>r   c                   t        | |      }|sy|D ]  }|| j                  j                         v s!| j                         }|j                  j
                  j                  }|r|j                  j                  j                         j                         }|| j                  |      j                  |      z  }t        j                  |      j                         st!        d| d      |j                  j
                  j                  j                         j                         }||j                  j                  |      j                  |      z  }t        j                  |      j                         st!        d| d      |`||_        |j                  j                         `||j                  j                         _        |j                  j#                  |g       n| j                  |      j                  |      }|j                  j                  j                         |z   }	|`|	|_        |j                  j                  |      j                  |      }|j                  j
                  j                  j                         |z   }	|j                  j                         `|	|j                  j                         _        |j                  j#                  |g       | j$                  j'                  |        y)a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        NrB  rC  r  )r   r9   r   rd   r  rp   r   r  r   detachrD  rE  r   r]   rF  rG  rt   rL  rZ   r   )
r{   rH  r  r!   rU   rI  orig_weight_inorig_weight_outrJ  weight_mergeds
             r   rL  zMultiheadAttention.merge  s    0mD , /	<N!1!1!33!002
'0077==
 &0%>%>%C%C%J%J%L%R%R%TN"d&;&;N&K&N&Nz&ZZN >>.9==?(OP^O__rs 
 '1&9&9&@&@&E&E&L&L&N&T&T&VO#z':':'K'KN'['^'^_i'jjO >>/:>>@(OP^O__rs  #10>J-"++::<CBQJ''668?''--^<L-M $(#8#8#H#K#KJ#WL$.$=$=$B$B$I$I$Kl$ZM #10=J- $.#6#6#G#G#W#Z#Z[e#fL$.$7$7$>$>$C$C$J$J$L|$[M"++::<CBOJ''668?''--^<L-M$$++N;_/	<r   c                   | j                   st        j                  d       y| j                         }|j                  j
                  j                  j                  }t        | j                        dkD  r[| j                  j                         }|| j                  j                         v r
| j                  |      j                  |      }|j                  j                   |z
  }|`|j#                  dt%        j&                  |d             |j                  j                  |      j                  |      }|j                  j
                  j                  j                   |z
  }|j                  j
                  `|j                  j
                  j#                  dt%        j&                  |d             t        | j                        dkD  r[| j                         j                  j)                          y)rO  rP  Nr   r  Fr   rp   )r  rw   rx   rd   r  rU   rp   r   r   rZ   r  r9   r   rE  r   r  r   register_parameterrV   r   r)   )r{   rU   rI  r!   rJ  
old_weights         r   r)   zMultiheadAttention.unmerge1  s    {{MM<= ((*
((33::@@
$&&'!+!11557N!1!1!33
  $44^DGG
S'66;;lJ
---.>Zgl@mn  *22CCNSVVWab'00;;BBGG,V
''229##..AAbll:UK! $&&'!+( 	&&..0r   c                   |r| j                  ||       | j                         }|j                  }|`|j                  dt	        j
                  |j                  |j                               |j                  j                         }|j                  }|`	|j                  dt	        j
                  |j                  |j                               ||_        |S )z
        Merging and unloading of the MultiheadAttention module

        This requires an extra step for MultiheadAttention, which is why there is this special method instead of
        relying on the normal merge_and_unload code path.
        rH  r  r  r  rp   )
rL  rd   r  r  rV   r   r   r   r  rp   )r{   rL  rH  r  rU   rp   out_proj_layers          r   "unload_and_optionally_merge_modulez5MultiheadAttention.unload_and_optionally_merge_moduleR  s     JJ*MJJ((*
 **%%%&6V[[`f`t`t8uv $,,;;=&&!))(BLL\b\p\p4qr,
r   c                   | j                   |   j                  j                  }| j                   |   j                  j                  }|j                  dk(  xr |t
        j                  k(  }| j                  |   j                  }| j                   |   j                  }|r |j                         }|j                         }||z  | j                  |   z  }|rl|j                  |      }|j                  |      | j                  |   j                  _        |j                  |      | j                   |   j                  _        |S rV  )r:   rp   r   r   ry   r]   r   r9   rZ  r@   r   r   r[  s           r   rE  z#MultiheadAttention.get_delta_weightn  s    W%,,33G$++11
 {{e+F0F;;w'..;;w'..~~'H~~'H!H,W0EE),,5,9M 08{{5/ADKK '',/7{{5/ADKK '',r   c                z    d|v r#t        d| j                  j                   d      t        |   |g|i | y )Nr  rg  z( does not support mixed adapter batches.)r   ru   r3   r9  r  r{   r+   r  rc   ru   s       r   r  z&MultiheadAttention._check_forward_args  sB    f$eDNN$;$;#<<deff#A777r   c                @   |j                   } | j                  |g|i | | j                  r3| j                  r| j	                           | j
                  |g|i |}n| j                  r | j
                  |g|i |}n| j                         j                  }|j                  | j                  k7  r6| j                         j                  j                  }t        d| d| d      | j                  D cg c]  }|| j                  v s| }	}	 | j                  |	        | j
                  |g|i |}| j	                          |d   j                  |      |d   |d   j                  |      f}|S |d   f}|S c c}w # | j	                          w xY w)NzThe out_proj layer of z has merged layers but zJ itself doesn't; please ensure that either both or none have merged layersr  r   r   )r   r  rb  r  r)   rU   rd   r  r   ru   r3   rt   r9   rL  r   )
r{   queryr  rc   previous_dtyper,   r  cls_namer   r   s
             r   r-   zMultiheadAttention.forward  s      888  {{$T__U<T<V<F[[$T__U<T<V<F**,55H''4+?+??  ..0::CC ,XJ6MhZ XB B  +/*>*>SQ!t{{BRqSOS


9(@@@ ),,~.PVWXPYPeq	^0Lu lrrsktu T s   <FF(F Fc                |   | j                         }|j                  }|`|j                  dt        j                  |j
                  |j                               |j                  j                         }|j                  }|`|j                  dt        j                  |j
                  |j                               y )Nr  r  rp   )	rd   r  r  rV   r   r   r   r  rp   )r{   rU   rp   s      r   _restore_weightsz#MultiheadAttention._restore_weights  s     ((*
**%%%&6V[[`f`t`t8uv  ((779
""%%hV[[X^XlXl0mnr   c                B    | j                          t        |   |i |S r  )r  r9  
state_dictr  s      r   r  zMultiheadAttention.state_dict  s$    w!42622r   c                B    | j                          t        |   |i |S r  )r  r9  named_modulesr  s      r   r  z MultiheadAttention.named_modules  s$    w$d5f55r   c                *    t         |          }d|z   S rf  rh  rj  s     r   ri  zMultiheadAttention.__repr__  rl  r   )r   r   r   TFF)r   r/   r>   r   r?   r   rA   rZ  r   rn  r   r,  r[   r,  r0   r1   )r0   r   )r0   zOptional[int])r0   r,  )r0   rZ  )r0   znn.Parameter)r0   r+  )r0   zOptional[nn.Parameter])r0   z,tuple[Optional[torch.Tensor], Optional[int]]rr  ro  rp  )rL  r,  rH  r,  r  rq  r0   znn.MultiheadAttentionrs  )r  r2   r  r   rc   r   r0   r2   ru  )"r3   r4   r5   r6   r|   propertyrv   r  r  rs   r  r'  r  r  r  r  r  r  r  r  r  r   rL  r)   r  rE  r  r-   r   r  r  r  ri  rv  rw  s   @r   rr   rr   m  s   & !.2 +d +d 	+d
 +d +d ,+d +d +d 
+dZ / / * * * * 9 9 / / - - 1 1 . . 4 4 2 2 ? ? , , , ,B 3 3?
E<N1B'+<O	8 D8
'X o o(36
 r   rr   c                  (     e Zd ZdZ fdZd Z xZS )_LoraParameterProxyzThis proxies an `nn.Parameter` that is targeted with LoRA.

    Intended to be used in conjunction with `nn.utils.parametrize`, see `ParamWrapper`.
    c                0    t         |           || _        y r  )r9  r|   rJ  )r{   rJ  ru   s     r   r|   z_LoraParameterProxy.__init__  s    (r   c                    t         j                  j                  j                         5  || j                  z   cd d d        S # 1 sw Y   y xY wr  )rV   utilsparametrizecachedrJ  )r{   Ws     r   r-   z_LoraParameterProxy.forward  s;    XX!!((* 	)t(((	) 	) 	)s   AA)r3   r4   r5   r6   r|   r-   rv  rw  s   @r   r  r    s    
))r   r  c                    t        |t        j                        r| j                  ||       y | j	                  ||       y r  )re   rV   r   r  register_buffer)r   r  r   s      r   _register_parameter_or_bufferr    s0    !R\\"!!$*tQ'r   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 d	 	 	 	 	 	 	 ddZdddZd Zd Ze	dd       Z
d Zddd	Zdd
Z fdZddZddZd fdZ xZS )ParamWrappera   A LoRA wrapper for `nn.Parameter`. This layer is dispatched if users target a parameter directly with
    `lora_config.target_parameters`

    Note:

    - When accessing the wrapped nn.Parameter directly, e.g. via `module.weight`, the LoRA weights are *not* applied.
    - It is currently not implemented to target multiple parameters on the same module. To achieve this, it is
      currently required to create a separate LoRA adapter (with another adapter name) and activate both at the same
      time.
    c           
     x   t         |           t        j                  | |fi | || _        | j	                         }|j
                  dk(  r |j                  \  | _        | _        | _	        n0d|j                  d   |j                  d   c| _        | _        | _	        |j
                  dvr0t        d| j                  j                   d|j
                   d      |r#t        d| j                  j                   d      |r#t        d| j                  j                   d	      |r#t        d| j                  j                   d
      |r#t        d| j                  j                   d      |r#t        d| j                  j                   d      || _        || _        | j                  |||||	|
||       y )NrF   r   r   )r   rF   rg  z was initialized with z9 dimensional Parameter, but only 2d and 3d are supported.& does not work with lora_dropout != 0.z# does not work with fan_in_fan_out.z# does not work with lora_bias=True.z" does not work with use_dora=True.z1 does not work with is_target_conv_1d_layer=True.r8  )r9  r|   r.   parameter_name	get_paramndimrq   num_expertsrS   rT   rt   ru   r3   r   r:  r   )r{   rU   r   r  r>   r?   rA   r   r;  r   r   r[   r\   rc   paramru   s                  r   r|   zParamWrapper.__init__  s     	46v6, ::?DIKKADd.0ADEu{{ST~W\WbWbcdWeADd.0A::V#//00Fuzzl S( (   uT^^%<%<$==cdeeuT^^%<%<$==`abbuT^^%<%<$==`abbuT^^%<%<$==_`aa"uT^^%<%<$==nopp,+!%/! 	 		
r   c                   t               j                         }|d= |dk  rt        d|       | j                  |||
      }|#t        d| j                  j
                   d      || j                  |<   || j                  |<   |dkD  r#t        d| j                  j
                   d      t        j                         }| j                  j                  t        j                  ||i             t        j                  | j                  || j                  z  d	
      | j                   |<   t        j                  || j                  z  | j"                  |	
      | j$                  |<   |	| j&                  |<   |r&|t)        j*                  |      z  | j,                  |<   n||z  | j,                  |<   || j.                  |<   t1        |t2              rQ|j5                  d      r@t7        | j9                         j:                        5  | j=                  ||       d d d        nt1        |t2              rQ|j5                  d      r@t7        | j9                         j:                        5  | j?                  ||       d d d        n7t1        |t2              rQ|jA                         dk(  r>t7        | j9                         j:                        5  | jC                  |       d d d        n|dk(  r>t7        | j9                         j:                        5  | jE                  |       d d d        n|dk(  r7t        jF                  jI                  | j$                  |   j:                         nW|dk(  r>t7        | j9                         j:                        5  | jK                  |       d d d        n|r| jM                  ||       | jO                  |       || jP                  v r  | jP                  |   jF                  | fi | | jS                  | jT                         y # 1 sw Y   dxY w# 1 sw Y   pxY w# 1 sw Y   |xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nr{   r   r   r   rg  z, does not work with LoRA variants like DoRA.r   r
  Fr   r   r   r   r   r   r   )+r   r   rt   r~   ru   r3   r>   r?   rV   r   rA   r   rW   rf   rS   r  r9   rT   r:   r\   r   r   r@   r[   re   r/   r   r   rd   rp   r   r   r   r   r   r   r   r   r   r   rb   r   r   r   s                 r   r   zParamWrapper.update_layer@  s     6N 6^_`^abcc00*HY 1 
 #uT^^%<%<$==ijkk |(2%# uT^^%<%<$==cdee!#  >P/Q!RS %'IId.>.>DDTDT@T[`$aL!$&IIa$2B2B.BDDUDU\e$fL!'0|$)3diil)BDLL&)3aDLL&&.l# '-2C2N2Nw2W"4#6#6#8#?#?@ A.?@A A)3/4E4P4PQX4Y"4#6#6#8#?#?@ A.?@A A)3/4E4K4K4MQX4X"4#6#6#8#?#?@ .-. .')"4#6#6#8#?#?@ .-. .%'GGNN4;;|4;;<,."4#6#6#8#?#?@ 3$$\23 3&&|5FG22<@4,,,0Dl+00@@--.1A AA A. .. .
3 3s<   P'P"
P.P:QP"P+.P7:QQc                X   | j                         j                  }t        j                  d      | j                         }| j                  | j                  z   D ]  }t        | |d      }t        |t        j                  t        j                  t        f      s@||vrEt        fd|j                         D              rh|j                  j                  s|j                  j                  r$||   j!                  ||j                        ||<   ||   j!                  |      ||<    y)z}
        Move the adapter of the given name to the device of the base layer. Needs special handling for nn.Parameter
        metaNc              3  <   K   | ]  }|j                   k(    y wr  )r   ).0r   r  s     r   	<genexpr>zEParamWrapper._move_adapter_to_device_of_base_layer.<locals>.<genexpr>  s     H188t#Hs   rY  )r  r   r]   r=   rB   r  re   rV   rW   rX   r
   r   
parametersr   is_floating_point
is_complexr   )r{   r   r   r  adapter_layer_nameadapter_layerr  s         @r   r   z2ParamWrapper._move_adapter_to_device_of_base_layer  s     !((||F# "&":":T=S=S"S 	U#D*<dCMmbmmR=M=Mz-Z[=0H]-E-E-GHH{{,,0F0F.;L.I.L.LV[`[f[f.L.gl+.;L.I.L.LV.Tl+	Ur   c                N    t        | j                         | j                        }|S r  )r  rd   r  )r{   r  s     r   r  zParamWrapper.get_param  s"    ++-t/B/BCr   c                @   | j                   dk(  rt        j                  | |g|i |}n| j                  |   j                  }| j
                  |   j                  }|j                  | j                   d|j                  d         }|j                  |j                  d   d| j                         }t        j                  d||      | j                  |   z  }| j                         }| j                         }|j                  |j                  |j                        }|S )Nr   r   r   zo r e, e r i -> e i o)r  rf   rE  r9   rp   r:   reshaperq   r]   einsumr@   rd   r  r   r   r   )	r{   r   r  rc   rJ  r]  r^  rU   r  s	            r   rE  zParamWrapper.get_delta_weight  s    q !224WWPVWL{{<077H{{<077H''(8(8"hnnR>PQH''q(92t?O?OPH <<(?8TW[WcWcdpWqqL((*
 #u||U[[Ar   c              #  L   K   |rt         fd|D              sd  y d }|D ]9  }| j                  vr| j                  |      }&| j                  |      z   };  j                         } j	                         j
                  }t        j                  j                  j                  | j                  t        |             |j                   j                     j                  j                  |       	 d   j                          y #  j                          w xY ww)Nc              3  :   K   | ]  }|j                   v   y wr  )r9   )r  r  r{   s     r   r  z.ParamWrapper._activate_lora.<locals>.<genexpr>  s     )`W'T[[*@)`s   )r   r9   rE  rd   r  r   rV   r   r  register_parametrizationr  r  parametrizationsoriginalrequires_grad__remove_parametrizations)r{   r   rJ  r!   rU   requires_grad_befores   `     r   _activate_lorazParamWrapper._activate_lora  s    c)`P_)`&`- 	TNT[[0##44^D+d.C.CN.SS	T ((*
#~~/==
55++-@-N	
 	##D$7$78AAPPQef	,))+D))+s   C6D$:D >D$D!!D$c                   | j                         }| j                  }||j                  vrt        d      |j                  |   }t	        |      dk(  r-t
        j                  j                  j                  ||d       y t        t        t	        |                  }|D ]  }||   }t        |t              s||=  y  t        j                  d|  d       y )NzbSomething went wrong, please report this issue on PEFT: https://github.com/huggingface/peft/issuesr   F)leave_parametrizedz+Could not find any LoRA parametrization on z], please open an issue on https://github.com/huggingface/peft/issues and report this warning.)rd   r  r"  rt   r   rV   r   r  remove_parametrizationsreversedrangere   r  rw   rx   )r{   rU   r  
param_listreversed_indicesr&  r   s          r   r%  z%ParamWrapper._remove_parametrizations  s    ((*
,,!<!<<t   00@
z?aHH  88^hm8n $E#j/$:;! 
	A]F&"56qM	
	 MM=dV DV Vr   c                J   t        | |      }|sy |D ]  }|| j                  j                         v s!| j                         }t	        || j
                        }|r|j                  j                         }|j                  }| j                  |      }||j                  |      z  }t        j                  |      j                         st        d| d      ||_        n&| j                  |      }|xj                  |z  c_        | j                  j!                  |        y )NrB  rC  )r   r9   r   rd   r  r  r   rD  r   rE  r   r]   rF  rG  rt   rZ   r   )	r{   rH  r  r!   rU   r  r"   rI  rJ  s	            r   rL  zParamWrapper.merge  s   /mD+ 	<N!1!1!33!002

D,?,?@ #(**"2"2"4K!,!2!2J#'#8#8#HL<??:#>>K >>+6::<(OP^O__rs  "-EJ $(#8#8#HLJJ,.J$$++N;/	<r   c                   | j                   st        j                  d       y t        | j                        dkD  r| j                  j                         }|| j                  j                         v ret        | j                         | j                        }|j                  }| j                  |      }|xj                  |j                  |      z  c_        t        | j                        dkD  ry y )NrP  r   )r  rw   rx   r   rZ   r  r9   r   r  rd   r  r   rE  r   r   )r{   r!   r  rI  rJ  s        r   r)   zParamWrapper.unmerge  s    {{MM<=$&&'!+!11557N!1!1!33 3 3 5t7J7JK"[[
#44^D

looj99
 $&&'!+r   c                    |j                  dd      r#t        d| j                  j                   d      t	        |   |g|i | y)r  r  Nrg  z, does not support mixed adapter batches yet.)r   rt   ru   r3   r9  r  r  s       r   r  z ParamWrapper._check_forward_args&  sH    ::ot,uT^^%<%<$==ijkk#A777r   c                    | j                   }|rU| j                  ||       t        |t              r0|j                  ||       |j                   }t        |t              r0|S | j	                         }|S )Nr  )rU   rL  re   r  rd   )r{   rL  rH  r  rU   s        r   r  z/ParamWrapper.unload_and_optionally_merge_module,  sp    __
JJ*MJJZ6  Jm T'22
 Z6
  ,,.Jr   c                    | j                   |g|i | |j                  dd       }| j                  r4| j                  r| j	                           | j
                  |g|i |}|S |#t        d| j                  j                   d      | j                  r | j
                  |g|i |}|S | j                  | j                        5   | j
                  |g|i |}d d d        |S # 1 sw Y   S xY w)Nr  rg  z' does not support mixed batch inference)r  r  rb  r  r)   rU   rt   ru   r3   r'  r   )r{   r+   r  rc   r  r,   s         r   r-   zParamWrapper.forward8  s       4T4V4

?D9  {{$T__Q888F  &uT^^%<%<$==deff[[$T__Q888F  $$T%9%9: =(<T<V<==s   C..C8c                    t         |          }|j                  d      dz   }|d |  d| j                   d||d   }d|z   S )N(r   z
  parameter_name='z',rg  )r9  ri  findr  )r{   rk  idxru   s      r   ri  zParamWrapper.__repr__I  sX    g hhsma Tc/0C0C/DBs34ykR}r   rm  )r   r/   r  r/   r>   r   r?   r   rA   rZ  r   r,  r;  r,  r   rn  r   r,  r[   r,  r\   r,  r0   r1   r.  r0  r  )r   r/   r   zOptional[torch.device]r0   r1   )r   r3  ro  rp  rr  )rL  r,  rH  r,  r  rq  rt  ru  )r3   r4   r5   r6   r|   r   r   r  rE  r   r'  r%  rL  r)   r  r  r-   ri  rv  rw  s   @r   r  r    s2   	  !$(-.2 6
 6
 	6

 6
 6
 6
 6
 "&6
 ,6
 6
 6
 6
 
6
@  !#O/ O/ O/ O/ O/bU,$ , ,6@<@:8
" r   r  c                Z   d }t        | t              r| j                         }n| }|t        | |fd|i|}|S t        |t        j
                  j                        rL|j                         }|j                  dd        |j                  |j                         t        | |fi |}|S t        |t        j
                  j                        r*|j                  |j                         t        | |fi |}|S t        |t        j
                  j                        r*|j                  |j                         t        | |fi |}|S t        |t
        j                        r*|j                  |j                         t        | |fi |}|S t        |t        j
                  j                        r*|j                  |j                         t        | |fi |}|S t        |t        j
                  j                        rP|d   r!t!        j"                  d       dx|d<   |_        |j                  |j                         t        | |fi |}|S t        |t&              rP|d   s!t!        j"                  d       dx|d<   |_        |j                  |j                         t        | |fddi|}|S )Nr  r   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.Fzafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.Tr;  )re   r   rd   r  r]   rV   rl   r   r  r   loftq_configrj   rk   rg   rr   rf   rw   rx   r   r	   )targetr   lora_configr  rc   
new_moduletarget_base_layerembedding_kwargss           r   dispatch_defaultr?  R  s    J&.)"113"!!&,`~`Y_`
H G 
%uxx'9'9	:!;;=-t4 8 89v|H7GH
> = 
%uxx	7k../FL;F;
8 7 
%uxx	7k../FL;F;
2 1 
%ryy	1k../FL;F;
, + 
%uxx'B'B	Ck../'GG
& % 
%uxx	7"#MM3 EJIF#${'Ak../FL;F;
  
%v	.&'MMs EIHF#${'Ak../FLY$YRXY
r   r  )
r:  ztorch.nn.Moduler   r/   r;  r   r  zOptional[str]r0   zOptional[torch.nn.Module])/
__future__r   r   rw   
contextlibr   typingr   r   r   r]   torch.nnrV   torch.nn.functional
functionalr  r   transformers.pytorch_utilsr	   peft.tuners._buffer_dictr
   peft.tuners.tuners_utilsr   r   peft.utils.integrationsr   r   r   r   peft.utils.otherr   configr   r   r.   Modulerf   rl   r  rj   rg   rk   rr   r  r  r  r?  r%   r   r   <module>rM     s7   #   % ' '      - / L  ' (" ("Vk kpGRYY	 GTN		9 NbQbii Qh#W #"#W #"#W #"tI tn)")) )"(S299i St
 %)	333 3 "	3 3r   