
    bi̅                     &   d dl Z d dlmZmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlZd dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) e&e(e)fZ* G d d      Z+ G d de+      Z, G d de+      Z-de.de.fdZ/dej`                  jb                  dejd                  fdZ3dee4ejd                  f   fdZ5de$fd Z6dejn                  fd!Z8d" Z9dej`                  jb                  d#edee$   d$e:d%ee:   d&ee:   d'ee:e.e4e:f   df   d(e;d)e;de.fd*Z<dej`                  jb                  d+e.d,e4fd-Z= ej|                         de9e6e8d.d/d/fdej`                  jb                  d#edee$   d%ee:   d&ee:   d'ee:e.e4e:f   df   d,e4d(e;d)e;de.fd0       Z? ej|                         dde9e6e8d.d/d/fdej`                  jb                  d#ee   d+ee.   d%ee:   d&ee:   d'ee:e.e4e:f   df   d,e4d(e;d)e;fd1       Z@y)2    N)Counterdefaultdict)IterableMapping)nullcontext)deepcopy)partial)cycle)OptionalUnion)tqdm)Conv1D)_find_minimal_target_modulescheck_target_module_exists)#MIN_TARGET_MODULES_FOR_OPTIMIZATION)IncrementalPCA)_get_submodulesget_pattern_key   )
LoraConfig)	Embedding	LoraLayerMultiheadAttention_ConvNdc                       e Zd ZdZ	 	 ddedee   defdZe	de
j                  fd       Z e
j                         d	        Zd
 Zy)_HookzD
    A base class for hooks that prepares layer inputs for EVA.
    Nnameprepare_layer_inputs_fngather_distributed_inputsc                 r    || _         || _        || j                  | _        d | _        y || _        d | _        y N)r   r    _prepare_layer_inputs_fn_default_prepare_layer_inputs_fnmodel_input)selfr   r   r   s       O/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/lora/eva.py__init__z_Hook.__init__.   sD     	)B&"*,0,Q,QD)   -DD)    returnc                 
   t        | t        j                        rn7t        | t        t        f      r| d   } nt        dt        |        d| d      | j                  dkD  r!| j                  d| j                  d            } | S )Nr   unsupported input type & for prepare_layer_inputs_fn in layer 1, please provide a custom prepare_layer_inputs_fn   )

isinstancetorchTensortuplelist
ValueErrortypendimviewsizelayer_inputr$   
layer_names      r&   r"   z&_Hook._prepare_layer_inputs_fn_default<   s    k5<<0eT]3%a.K)${*;)<<bcmbn oB B 
 a%**2{/?/?/CDKr(   c                 P    | j                  || j                  | j                        S r!   )r#   r$   r   )r%   r;   s     r&   prepare_layer_inputsz_Hook.prepare_layer_inputsL   s!    ,,[$:J:JDIIVVr(   c                    t        j                         rf| j                  rYt        j                         }t	        j
                  |j                  d   g|j                        }t	        j                  ||j                  |j                        }t        j                  ||       |j                         }|j                  t        |      g|j                  dd        }||d |j                  d    t        |      D cg c]  }t	        j                  |       }}t        j                   ||j#                                t%        ||      D 	cg c]
  \  }}	|d |	  }}}	t	        j&                  |d      S |S c c}w c c}	}w )Nr   )device)dtyper@   r   )dim)distis_initializedr   get_world_sizer1   tensorshaper@   emptyrA   all_gather_into_tensortolist	new_zerosmaxrange
zeros_like
all_gather
contiguouszipcat)
r%   r;   
world_size
local_size	all_sizespadded_input_gathered_inputsrF   r9   s
             r&   gather_layer_inputsz_Hook.gather_layer_inputsP   sW    T%C%C,,.J {'8'8';&<[EWEWXJJj6F6F{OaOabI''	:>!((*I '00#i.1Y;CTCTUVUWCX1YZL3>L/;,,Q/0 HMZGXY!u//=YOYOOO\-D-D-FG BE_V_A`avet}aOa 99_!44 Z bs   E>F)NT)__name__
__module____qualname____doc__strr   callableboolr'   staticmethodr1   r2   r"   no_gradr>   rY    r(   r&   r   r   )   sz     7;*.	   "*(!3  $(	  RWR^R^   U]]_W Wr(   r   c                   x     e Zd ZdZdedeeej                  f   f fdZ	 ej                         d        Z xZS )SVDHooka*  
    A forward hook for calculating incremental SVD on layer inputs. The hook is designed to be registered to a PyTorch
    module using the `register_forward_hook` method.

    This hook performs a step of incremental Singular Value Decomposition (SVD) on the inputs of a specified layer
    during the forward pass of a neural network. The hook also tracks convergence of the computed components using
    cosine similarity between the current and previous components.

    Args:
        name (str): Name of the layer to which this hook is attached.
        n_components (int): Number of principal components to compute.
        sim_thresh (Union[float, torch.Tensor]): Similarity threshold for convergence.
        prepare_layer_inputs_fn (Optional[callable]): Function to prepare layer inputs for SVD.
    n_components
sim_threshc                    t        |   di | || _        || _        t	        |t
        j                        rit        |j                        dkD  rQ|j                  d      |k(  xs |j                  d      dk(  }t        |j                        dk(  }|r|st        d      t        |ddd      | _        d | _        t        j                  |ft
        j                        | _        y )	Nr   r   z`if sim_thresh is a tensor with more than 0 dimensions it must have shape (n_components,) or (1,)T*   )rf   copylowranklowrank_seed)rA   rc   )superr'   rf   rg   r0   r1   r2   lenrG   r9   r5   r   svdr$   zerosr`   	converged)r%   rf   rg   base_class_kwargscheck1check2	__class__s         r&   r'   zSVDHook.__init__z   s     	-,-($j%,,/C
8H8H4IA4M__Q'<7R:??1;MQR;RF))*a/Fv v  "%	
  l_EJJGr(   c                    d }t        | j                  d      r2| j                  j                  j                         j	                         }| j                  |      }| j                  |      }|j                  d      | j                  k  r't        d| j                   d| j                   d       y | j                  j                  |j                  t        j                               |y | j                  j                  }t        |j                         dk(  r$|j#                  dd      }|j#                  dd      }t        j$                  j&                  j)                  ||      }|| j*                  k\  | _        y )Ncomponents_r   zskipping SVD for z because there are less than z	 examplesr   r/   )hasattrro   rw   clonedetachr>   rY   r9   rf   printr   partial_fittor1   float32rn   rG   reshapenn
functionalcosine_similarityrg   rq   )r%   modelinputoutputprevious_componentsstates
componentssims           r&   __call__zSVDHook.__call__   s2   "488]+"&(("6"6"<"<">"E"E"G**51))&1;;q>D---%dii[0MdN_N_M``ijkVYYu}}56&XX))
z A%#++Ar2J"5"="=a"Dhh!!33J@ST/r(   )rZ   r[   r\   r]   intr   floatr1   r2   r'   rb   r   __classcell__ru   s   @r&   re   re   j   sL    HH %-.H2 U]]_0 0r(   re   c                   `     e Zd ZdZ fdZed        Z ej                         d        Z	 xZ
S )HashHooka  
    A forward hook for hashing layer inputs. The hook is designed to be registered to a PyTorch module using the
    `register_forward_hook` method.

    This hook hashes the inputs of a specified layer during the forward pass of a neural network and stores the hash
    values for later analysis or comparison.

    Args:
        name (str): Name of the layer to which this hook is attached. hashed_inputs (list): List of hashed inputs.
        prepare_layer_inputs_fn (Optional[callable]): Function to prepare layer inputs for hashing.
    c                 2    t        |   di | g | _        y Nrc   )rm   r'   hashed_inputs)r%   rr   ru   s     r&   r'   zHashHook.__init__   s    -,-r(   c                 d    t        t        | j                  d      j                                     S )Nr/   )hashr3   r8   rJ   )rF   s    r&   hash_fnzHashHook.hash_fn   s#    E&++b/002344r(   c                     | j                  |      }| j                  |      }| j                  j                  | j	                  |j                                      y r!   )r>   rY   r   appendr   cpu)r%   r   r   r   xs        r&   r   zHashHook.__call__   sE    %%e,$$Q'!!$,,quuw"78r(   )rZ   r[   r\   r]   r'   ra   r   r1   rb   r   r   r   s   @r&   r   r      s=    
  5 5 U]]_9 9r(   r   
dictionaryr)   c                     t        t              }| j                         D ]  \  }}||   j                  |        |j                         D ci c]  \  }}t	        |      dkD  s|| c}}S c c}}w )a:  
    Find keys in a dictionary that have the same value.

    This function takes a dictionary and returns a new dictionary containing keys that have the same value. The keys in
    the output dictionary are the values from the input dictionary, and the values are lists of keys that share the
    same value.
    r   )r   r4   itemsr   rn   )r   
value_dictkvs       r&   find_equal_valuesr      sk     T"J  "  11Q '--/>TQ3q6A:AqD>>>s   A/&A/r   c                     t        | j                         D ch c](  }|j                  j                  dk7  s|j                  * c}      }t	        |      dkD  rt        j                  d|        y|d   S c c}w )za
    Get the device of the model's parameters. Useful if some parameters are on meta device.
    metar   z8Could not determine device, model has multiple devices: Nr   )r4   
parametersr@   r6   rn   warningswarn)r   pdevicess      r&   get_device_with_meta_paramsr      sj     e&6&6&8TAHHMMV<SAHHTUG
7|aPQXPYZ[1:	 Us
   A8A8r@   c                    t        | d      r| j                        S t        | t              r= t	        |       | j                         D ci c]  \  }}|t        |       c}}      S t        | t        t        f      r t	        |       fd| D              S t        j                  dt	        |        d       | S c c}}w )zK
    Move the inputs to the specified device. Adapted from hf.Trainer.
    r}   c              3   6   K   | ]  }t        |        y wr!   )move_inputs_to_device).0r   r@   s     r&   	<genexpr>z(move_inputs_to_device.<locals>.<genexpr>   s     M1!V<Ms   zinput of type z) could not be moved to the correct device)rx   r}   r0   r   r6   r   r   r3   r4   r   r   )inputsr@   r   r   s    `  r&   r   r      s     vtyy  &'"tF|V\\^\TQQ 5a @@\]]	FUDM	*tF|MfMMMtF|n4]^_ ]s   C 
peft_configc                 h   t        | t              st        d      | j                  dt	        j
                  | d               j                         }|j                  j                  r<t        | d      r0t	        j                  || d   |j                  j                  k7        }|j                         S )z
    Get the indices of the items that should be used for SVD.

    Attributes:
        model_input (dict): The model inputs.
        peft_config (LoraConfig): The configuration for the LoRA layers.
    zRWhen using `prepare_model_inputs_fn_language_modeling` inputs must be a dictionaryattention_mask	input_idslabels)r0   dictr5   getr1   	ones_liker`   
eva_configuse_label_maskrx   logical_andlabel_mask_valuenonzero)r$   r   masks      r&   )prepare_model_inputs_fn_language_modelingr      s     k4(mnn??+U__[=U-VW\\^D,,h1O  {8'<@V@V@g@g'gh<<>r(   c                     t        | t        j                        rn7t        | t        t        f      r| d   } nt        dt        |        d| d      | |j                  j                            S )a  
    if not all items in the input should be used for SVD, this function can be used to get the indices of the items
    that should be used.

    Attributes:
        layer_input (torch.Tensor): The layer inputs.
        model_input (torch.Tensor):
            The model inputs or if `prepare_model_inputs_fn` is not None the output of this function.
        layer_name (str): The name of the layer.

    Returns:
        torch.Tensor: The input to the SVD.
    r   r+   r,   r-   )	r0   r1   r2   r3   r4   r5   r6   Tunbindr:   s      r&   )prepare_layer_inputs_fn_language_modelingr     st     +u||,	K%	/!!n%d;&7%88^_i^j k> >
 	

 {}}++-..r(   c                      | di |S r   rc   )r   r   s     r&   forward_fn_dictr     s    ?6?r(   
dataloadertarget_module_check_fn
forward_fnprepare_model_inputs_fnr   r   show_progress_barc	           
        0 d }	t        |      dk(  rt        d      t        j                         r|rt	        j
                  d       d}
|j                  j                  }||
kD  r;t        d | j                         D              }||j                  z  }t        ||      }| j                  }t        |       }| j                          t        t!        |            }|t#        ||      }|
 |||      }nt%        |      }i }i 0d}| j'                         D ]  \  }} |||      st)        |t*              r|j-                  |d       }n|}t/        |||      }||_        |j3                  |      }||f||<   |j4                  j7                  t9        |j4                  j;                         |      |j                        }t=        ||z        0|<   ||z  } t)        |t*              r*t        |      dkD  rt        d|j;                                 || |       |j?                         D ci c]  \  }}||d   j@                  d    }}}tC        tE        |      jG                               }|D ci c]  }|d	d  D ]  }||d   
  } }}|D ]"  }!t        0fd
|!D              }"|!D ]  }#|"0|#<   	 $ tC        |j;                               D ]  }|j-                  |      \  }}|jI                          || v r,tK        0|   |j                  jL                  ||jN                  |      }| jQ                  |      }|j3                  |      }||f||<    i tS        tU        |j;                         |j;                                     | }$|rNt        j                         rt        jV                         dk(  r#tY        t!        t[        |            dd      }%d}&nt!        t[        |            }%d}&|j;                         D ci c]  }|d }'}0j]                         }(|%D ]  }|t#        ||      }|
 |||      }nt%        |      }tC        |j;                               D ]  }||   \  }}t_        j`                  |jb                  d |(|          })|'|   s|)r|r|jI                          d }d|'|<   Q|'|   r)|)s'| jQ                  |      }|j3                  |      }d|'|<   ||_        ||f||<    |&rdtC        |'jG                               | jG                         D cg c]  }|'|   	 c}z   }*|%je                  tg        |*       dt        |*       d       ta        |'jG                               r n: || |       ta        d |jG                         D              s} |	||$| |0      }( | j'                         D #+ch c]7  \  }#}+|+jh                  jG                         D ]  }t)        |tj              s|# 9 },}+}#}t        |,      dkD  rt        d|, d      i }-|(j?                         D ]  \  }}.||$|      d   }t_        j`                  |jb                  d |.       st        d| d|. d      |jl                  jn                  d |. }/|j                  jp                  r:|/|jl                  jr                  d |. ju                         jw                  dd	      z  }/|/|-|<    | jy                  |       |2|-j?                         D ci c]  \  }}||j{                  |       }-}}|-S c c}}w c c}}w c c}w c c}w c c}}+}#w c c}}w )Nc           
      r   | j                         D ci c]%  \  }}||d   j                  j                  d ||    ' }}}t        |j                         D 	cg c]  \  }}||   D ]  }	||	f  c}	}} \  }
}t	        j
                  |      j                  d      }t        |d | D cg c]  }|
|   	 c}      }|j                         D ci c]  }||j                  |d       }}|j                         D ]   \  }}||   ||   }}||k\  r||c||<   ||<   " |S c c}}w c c}	}}w c c}w c c}w )Nr   T)
descending)
r   ro   explained_variance_ratio_rQ   r1   stackargsortr   keysr   )hookslayer_hook_mapequal_inputs_maprank_budgetmax_componentsr   hexp_varsr   cr   valuesidxicountsk_hookrank	rank_hooks                     r&   _get_rank_distributionz3_get_eva_state_dict.<locals>._get_rank_distribution.  sO   [`[f[f[hiSWSTVWAqtxx99:MN1<MNNii>3G3G3Ibb4S[\`SabaaVbVbcfkk&!))T):3|+<=a$q'=>/=/B/B/DE!!VZZ1%%EE)//1 	8IAv$Qi)DD (,i%F6NF1I	8  jb=Es   *D"D(,D/D4r   zdataloader is emptyztorch.distributed is initialized and `gather_distributed_inputs` is True, therefore EVA initialization will gather tensors from all ranks. Ensure the model does not receive the same inputs on different ranks.i  c              3   F   K   | ]  }t        |j                          y wr!   )rL   rG   )r   r   s     r&   r   z&_get_eva_state_dict.<locals>.<genexpr>L  s     ?qc!''l?s   !)r   r   r   zaprepare_layer_inputs_fn is a mapping but the following module names were not found in the model: r   c              3   (   K   | ]	  }|     y wr!   rc   )r   nr   s     r&   r   z&_get_eva_state_dict.<locals>.<genexpr>  s     9aq)9s   )rf   rg   r   r   r   F)positionleaveT/z layers have convergedc              3   N   K   | ]  }t        |d    j                  d        yw)r   rw   N)rx   ro   )r   r   s     r&   r   z&_get_eva_state_dict.<locals>.<genexpr>  s     L71Q488]3Ls   #%z?Found active hooks added by EVA that weren't properly removed: zH. Please report this issue at https://github.com/huggingface/peft/issueszLayer z) has not converged but was assigned rank r/   )>rn   r5   rC   rD   r   r   r   rhorL   r   rmintrainingr   evalnextiterr   r   named_modulesr0   r   popr   r$   register_forward_hookrank_patternr   r   r   roundr   r   r4   r   r   removere   taur#   get_submoduler   rQ   get_rankr   r
   rj   r1   allrq   set_descriptionsum_forward_hooksr   ro   rw   whitensingular_values_sqrtr   trainr}   )1r   r   r   r   r   r   r   r   r   r   rho_thresholdr   max_dimrho_ceilr   r@   r   model_inputs_for_hooksr   r   r   modulefnhookhandle
layer_rankr   r   	hash_dictequal_inputsr   vvr   names	max_valuer   r   pbaruse_tqdmconvergence_dict	rank_distrq   layer_convergedmremaining_hookseva_state_dictr   ur   s1                                                   @r&   _get_eva_state_dictr  !  s    :!.// !:T	
 M

 
 
$
$C
]?E,<,<,>??kmm+#x ~~H(/F	JJL $z"#F&vv6*!8!M!)&!1ENK++- "f%dF3-w7(,,T48B(BT2Yrs1--d3Vnd --11K4499;TBKMM

  %Z#%56tz!"  )73<S8TWX8Xo&++-.0
 	
 uf6;kkmDdaAaD&&q))DID ))4;;=>L*6GQ12G2AaDGGG *9599	 	*A )N1	** UZZ\" %yyf##'-"--11$($A$A&?
 $$T*--d3Vnd% SSuzz|<=RAQRN $"5"5"74==?a;ODz*+auEE*%&*/**,7Q577##%I *q*66:F".%<V[%Q"%-f%5"& 	)D ;LD&		$..1B9T?"CDI$T*	f)- &!$'	,,T255d;). &5D.E$K!	)$ "#3#:#:#<=-=-D-D-FA() #A O   C$8#93;O:PPf!gh&&()5&! LU\\^LL*5.BRT_aop	U*qZ &+%8%8%:vvTQADTDTD[D[D]vqaklmotauqvqvOv
?aMoM^ _U U
 	

 Noo' 
!
d^D)*1-yy./Gv NY Y  HH  $'!!((**5D1668@@QGGA t
! 
KK 6D6J6J6LMda!QTT&\/MM] E H@ 8:A$ w2 Ns*   /\.6\4
\: \?5]9]]r  adapter_namec                    | j                   |   }||j                  |j                  |j                  |j                  d}g }g }g }i }i }	| j                         D ]  \  }
}|
j                  dd      }t        |t              s|j                  |       ;|j                  j                  t        |j                  j                         |
      |j                        }|j                  j                  t        |j                  j                         |
      |j                         }|
|v r|j#                  |
      }|j%                  d      }|dk(  r-t'        | |
      \  }}}t)        |||j+                                #||k7  r|j,                  j.                  r|||z  z  }||k7  s0|j0                  |   j2                  j4                  j6                  dk(  r |j8                  d||dd| |j0                  |   j2                  j;                  |       |j                  |       n) |j8                  d||dd| |j                  |       |}||j                  k7  r|||<   ||j                   k7  s||	|<     ||z   }t=        |      t>        k\  rtA        ||      }|| j                   |   _!        || j                   |   _
        |	| j                   |   _        |r tE        jF                  d	| d
tH                y y )N)r  lora_dropout
use_rslorause_dora	lora_biaszbase_model.model. r   r   eva)r   
lora_alphainit_lora_weightsTzuthe following layers were initialized with init_lora_weights=True because they were not found in the eva state_dict: z@
currently the following lora modules are not supported by EVA: rc   )%r   r  r  r  r  r   replacer0   r   r   r   r   r   r   r   alpha_patternr   r   r9   r   setattrget_base_layerr   adjust_scaling_factorslora_Aweightr@   r6   update_layercopy_rn   r   r   target_modulesr   r   UNSUPPORTED_LORA_MODULES)r   r  r  r   update_layer_kwargsmissing_eva_initsnew_target_modulesother_module_namesr   r#  r   r  name_in_base_modelr   alphawnew_rankparentrW   target_names                       r&   _load_eva_state_dictr7    s	   
 ##L1K$#00!,,(( ** LM++-  6f!\\*=rB&),%%&89$$((9Q9Q9V9V9XZ^)_alanano))--K55::<dC[E[E[
 >!""4(AvvayH1})8)E&;V-B-B-DEQ))@@X\)E1}l ; B B I I N NRX X###qh5TYq]pqMM,'..44Q7%%&89Fe!eQde$$%78H{}}$/7L+,K***05M,-A 6F ,.??
"EE9:LN`a5GEl#2 4@El#0 5BEl#155F4G H@@X?Y[	
 r(   defaultTc	                 :   d }	d }
t        | d      }|r|| j                  |   }n|t        d      |r"| j                         }t	        |	t
              }nt               }t	        |
|      }|5  t        | ||||||||	      }ddd       |S # 1 sw Y   S xY w)	a  
    Compute the SVD for each layer in the model.

    This function computes the Singular Value Decomposition (SVD) for each layer in the model. It uses the incremental
    PCA method to compute the SVD components. The function also checks for convergence of the computed components using
    cosine similarity. The rank distribution for each layer is determined based on the explained variance ratio.

    Args:
        model (torch.nn.Module): The model to compute the SVD for. Does not need to be a PeftModel.
        dataloader (Iterable): The dataloader to use for the forward pass.
        peft_config (Optional[LoraConfig]):
            The configuration for the LoRA layers. Only required if `model` is not a PeftModel.
        forward_fn (callable):
            The forward function to use for the forward pass. Takes two arguments: `model` and `inputs`. Default
            behavior is `return model(**inputs)`
        prepare_model_inputs_fn (Optional[callable]):
            This function receives the model inputs and the peft_config and passes the output to
            `prepare_layer_inputs_fn`. Can be used to modify the input to the SVD computation based on the original
            model inputs. For example for language modeling the attention mask is used to determine which indices are
            padding tokens and should not be used for SVD. Any function defined here expects two arguments:
            `model_input` and `peft_config`. `peft.tuners.lora.eva.prepare_model_inputs_fn_language_modeling` is used
            by default.
        prepare_layer_inputs_fn (Union[callable, Dict[str, callable], None]):
            This function receives the layer inputs, the model inputs (potentially modified by
            `prepare_model_inputs_fn`) and the name of the layer and returns the inputs that should be used for SVD for
            that particular layer. Any custom function defined here expects three arguments: `layer_input`,
            `model_input`, and `layer_name` and should return a 2d tensor. The default logic can be found in
            peft.tuners.lora.eva.prepare_layer_inputs_fn_language_modeling and works for language modeling. In this
            case model_inputs is the mask used to determine which indices should be used for SVD (created by
            `prepare_model_inputs_fn_language_modeling`).
        adapter_name (str): The name of the adapter to compute the SVD for.
        gather_distributed_inputs (bool):
            Whether to gather the layer inputs from all ranks. Default is True meaning in a distributed setting the
            layer inputs will be gathered from all ranks for the SVD computation. For non-distributed settings this
            argument is ignored. Set to False if you are using a non-distributed dataloader in a distributed setting.
        show_progress_bar (bool): Whether to show a progress bar. Default is True.

    Returns:
        eva_state_dict (dict): The state dictionary containing the SVD components for each layer.
    c                 8    t        |d      xr t        ||       S )z?check if a module is an adapter module via base_layer attribute
base_layer)rx   r0   )r   r  unsupported_lora_moduless      r&   !target_module_check_fn_peft_modelz=get_eva_state_dict.<locals>.target_module_check_fn_peft_modelf  s    v|,aZH`5a1aar(   c                     d}|j                   t        ||       }t        |t        j                  j
                  t        f      xr |S )z9check if a module is an adapter module via target_modulesT)r+  r   r0   r1   r   Linearr   )r   r  r   is_target_modules       r&   target_module_check_fn_defaultz:get_eva_state_dict.<locals>.target_module_check_fn_defaultj  sA    %%19+tL&588??F";<QAQQr(   r   Nz3peft_config is required if model is not a PeftModel)r<  )r   )	r   r   r   r   r   r   r   r   r   )rx   r   r5   disable_adapterr	   r,  r   r  )r   r   r   r   r   r   r  r   r   r=  rA  is_peft_modelctxr   r  s                  r&   get_eva_state_dictrE  1  s    jbR E=1M ,''5		NOO ##%!(-H`"
 m!()GU`!a	 
,!##9!$;$;&?/


 
 s   1BBc	           
      $   t        | d      st        d      t        | j                        dkD  rt        d      | j                  |   j
                  dk7  rt        d      | |t        d      t        | |||||||	      }t        | ||       y)
a  
    Initialize the weights of the LoRA layers using the EVA method.

    This function initializes the weights of the LoRA layers using the EVA method. It computes the SVD for each adapter
    layer and updates the weights accordingly.

    Args:
        model (PeftModel): The peft model to compute the SVD for.
        dataloader (Optional[Iterable]):
            The dataloader to use for the forward pass. If None, eva_state_dict needs to be provided.
        eva_state_dict (Optional[dict]):
            The state_dict to load into the model. If None, a dataloader needs to be provided and the state_dict will
            be computed using `get_eva_state_dict`.
        forward_fn (callable):
            The forward function to use for the forward pass. Takes two arguments: `model` and `inputs`. Default
            behavior is `return model(**inputs)`
        prepare_model_inputs_fn (Optional[callable]):
            This function receives the model inputs and the peft_config and passes the output to
            `prepare_layer_inputs_fn`. Can be used to modify the input to the SVD computation based on the original
            model inputs. For example for language modeling the attention mask is used to determine which indices are
            padding tokens and should not be used for SVD. Any function defined here expects two arguments:
            `model_input` and `peft_config`. `peft.tuners.lora.eva.prepare_model_inputs_fn_language_modeling` is used
            by default.
        prepare_layer_inputs_fn (Union[callable, Dict[str, callable], None]):
            This function receives the layer inputs, the model inputs (potentially modified by
            `prepare_model_inputs_fn`) and the name of the layer and returns the inputs that should be used for SVD for
            that particular layer. Any custom function defined here expects three arguments: `layer_input`,
            `model_input`, and `layer_name` and should return a 2d tensor. The default logic can be found in
            peft.tuners.lora.eva.prepare_layer_inputs_fn_language_modeling and works for language modeling. In this
            case model_inputs is the mask used to determine which indices should be used for SVD (created by
            `prepare_model_inputs_fn_language_modeling`).
        adapter_name (str): The name of the adapter to initialize the weights for.
        gather_distributed_inputs (bool):
            Whether to gather the layer inputs from all ranks. Default is True meaning in a distributed setting the
            layer inputs will be gathered from all ranks for the SVD computation. For non-distributed settings this
            argument is ignored. Set to False if you are using a non-distributed dataloader in a distributed setting.
        show_progress_bar (bool): Whether to show a progress bar. Default is True.

    Returns:
        model (torch.nn.Module): The model with the initialized LoRA weights.
    r   zmodel must be a PeftModelr   zO`initialize_lora_eva_weights` currently only works with a single active adapterr  zM`initialize_lora_eva_weights` can only be used with `init_lora_weights='eva'`Nz8dataloader is required if eva_state_dict is not provided)r   r   r   r   r   r  r   r   )rx   r5   rn   active_adaptersr   r!  rE  r7  )	r   r   r  r   r   r   r  r   r   s	            r&   initialize_lora_eva_weightsrH    s    j 5-(455 5  !A%jkk &88EAhii WXX+!!$;$;%&?/	
 =r(   )Ar   collectionsr   r   collections.abcr   r   
contextlibr   rj   r   	functoolsr	   	itertoolsr
   typingr   r   r1   torch.distributeddistributedrC   r   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.constantsr   peft.utils.incremental_pcar   peft.utils.otherr   r   configr   layerr   r   r   r   r,  r   re   r   r   r   r   Moduler@   r   r^   r   r   r2   r   r   r_   r`   r  r7  rb   rE  rH  rc   r(   r&   <module>rY     s@    , - "    "     - ] D 5 =  D D &'97C > >B>0e >0F9u 98?$ ?4 ?uxx 5<< %U\\0A*B 
  /W\WcWc /8E88??EE *%E %	E
 "E &h/E #8T#x--@$#FGE  $E E 
EPE
88??E
E
 E
P  )-%42[Js!&*"^88??^^ *%^ "	^
 &h/^ #8T#x--@$#FG^ ^  $^ ^ 
^ ^B  &*%)%42[Js!&*"O>88??O>"O> TNO> "	O>
 &h/O> #8T#x--@$#FGO> O>  $O> O> O>r(   