
    bi*                         d dl Z d dlmZ d dlZd dlmZ d dlmc mZ d dl	m
Z
 d dlmZmZ d dlmZ  G d de      Z G d d	ej"                  e      Zy)
    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transposec                   v    e Zd ZdZdej
                  fdZedefd       Z		 	 dde
deded	ed
ededefdZd Zy)VBLoRALayer)vblora_logits_Avblora_logits_Bvblora_vector_bank
base_layerc                 @   || _         i | _        i | _        t        j                  i       | _        t        j                  i       | _        t        j                  i       | _        d| _	        g | _
        | j                         }t        |t        j                        r|j                  |j                  }}nUt        |t               rEt#        |j$                  d      r|j$                  j&                  n|j$                  j(                  \  }}| _        | _        || _        y )NFds_shape)r   rtopknn
ModuleDictvblora_dropoutParameterDictr
   r   _disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr   shapekwargs)selfr   r    r   r   s        S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/vblora/layer.py__init__zVBLoRALayer.__init__   s    $	 mmB/  "//3!//3 "'!((*
j")),(2(>(>
@W@WK
F+.5j6G6G.T
!!**ZdZkZkZqZq &K '(    returnc                 ,    t        | j                        S N)boolr   )r!   s    r"   mergedzVBLoRALayer.merged9   s    D(())r$   adapter_namer   r   num_vectorsvector_lengthr   init_logits_stdc	                    |dk  rt        d| d      |dk  rt        d| d      | j                  |z  dk7  rt        d| j                   d|       | j                  |z  dk7  rt        d| j                   d|       || j                  |<   || j                  |<   |dkD  rt        j                  |	      }	nt        j                         }	| j                  j                  t        j                  ||	i             t        j                  t        j                  || j                  |z  |      d
      | j                  |<   t        j                  t        j                  | j                  |z  ||      d
      | j                  |<   || _        | j#                  ||       | j%                  |       | j'                  | j(                         y )Nr   z`r` z# should be a positive integer valuez`topk` z`in_features` z& must be divisible by `vector_length` z`out_features`         )pT)requires_grad)
ValueErrorr   r   r   r   r   DropoutIdentityr   updater   	Parametertorchzerosr
   r   r   reset_vblora_logits%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)
r!   r*   r   r   r   r+   r,   r   r-   vblora_dropout_layers
             r"   update_layerzVBLoRALayer.update_layer=   s    6tA3&IJKK19wtf,OPQQm+q0~d.>.>-??efsetuvv},1!$"3"3!44Z[hZij   !|"&		,C#%::#? #%;;= ""2==,@T1U#VW-/\\KK4++}<kJZ^.
\* .0\\KK))]:A{K[_.
\* #5  ?22<@--.r$   c                 H   || j                   j                         v rzt        j                         5  t        j
                  j                  | j                   |   d|       t        j
                  j                  | j                  |   d|       d d d        y y # 1 sw Y   y xY w)Nr   )r
   keysr7   no_gradr   initnormal_r   )r!   r*   r-   s      r"   r9   zVBLoRALayer.reset_vblora_logitsf   s    4//4466 X 4 4\ BAW 4 4\ BAWX X 7X Xs   ABB!N)r/   {Gz?)__name__
__module____qualname__adapter_layer_namesr   Moduler#   propertyr(   r)   strintfloatr>   r9    r$   r"   r	   r	      s    V299 4 * * * !$!%'/'/ 	'/
 '/ '/ '/ '/ '/RXr$   r	   c                   N    e Zd Z	 	 	 	 	 ddedededededededed	ed
df fdZddedee	e      d
dfdZ
ddZdej                  d
ej                  fdZdd
eej                  ej                  f   fdZd
ej                  fdZdej                  d
ej                  fdZ xZS )r   r*   r   r+   r,   r   r   r-   fan_in_fan_outis_target_conv_1d_layerr%   Nc           
          t         t        j                  |           t	        j                  | |fi | |
| _        || _        | j                  ||||||||	       || _        y r'   )	superr   r   r#   r	   rP   _active_adapterr>   rQ   )r!   r   r   r*   r   r+   r,   r   r   r-   rP   rQ   r    	__class__s                r"   r#   zLinear.__init__o   sg      	bii')T:88,+,a{MSacr	
 (?$r$   
safe_mergeadapter_namesc                    t        | |      }|sy|D ]  }|| j                  j                         v s | j                         }|r||j                  j
                  j                         }|| j                  |      z  }t        j                  |      j                         st        d| d      ||j                  _        n.|j                  xj
                  | j                  |      z  c_        | j                  j                  |        y)a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r
   r@   r   r   datacloneget_delta_weightr7   isfiniteallr2   r   append)r!   rV   rW   active_adapterr   orig_weightss         r"   mergezLinear.merge   s     0mD+ 	<N!5!5!:!:!<<!002
 $.#4#4#9#9#?#?#AL D$9$9.$IIL >>,7;;=(OP^O__rs  .:J%%*%%**d.C.CN.SS*$$++N;	<r$   c                    | j                   st        j                  d       y t        | j                        dkD  r| j                  j                         }|| j                  j                         v r<| j                         j                  xj                  | j                  |      z  c_
        t        | j                        dkD  ry y )Nz Already unmerged. Nothing to do.r   )r)   warningswarnlenr   popr
   r@   r   r   rY   r[   )r!   r_   s     r"   unmergezLinear.unmerge   s    {{MM<=$&&'!+!11557N!5!5!:!:!<<##%,,11T5J5J>5ZZ1 $&&'!+r$   logitsc                     |j                  |d      \  }}t        j                  |d      }|j                  d      ||   z  j	                  d      S )N)dim)r   Fsoftmax	unsqueezesum)r!   rh   r   r   top_k_logitsindicestopk_weightss          r"   _get_low_rank_matrixzLinear._get_low_rank_matrix   sP     &Db 9gyy26&&r*-?-HHMMbQQr$   c                 h   | j                   |   }| j                  |   }| j                  r,|d   j                         j	                         rt        d      | j                  |   j                  |j                        }| j                  |   }|r0|j                         }|j                         }|j                         }| j                  |||      j                  |j                  d   d      }| j                  |||      j                  dd      j                  d|j                  d         }||fS )N)r   r   zoFound infinity values in VB-LoRA logits. Ensure training was not resumed from a `save_only_topk_weights` model.r   rj         )r
   r   trainingisinfanyRuntimeErrorr   todevicer   rM   rt   reshaper   r   )	r!   adaptercast_to_fp32r
   r   r   r   ABs	            r"   _get_lora_matriceszLinear._get_lora_matrices   s2   ..w7..w7 ==_T288:>>@ B  "44W=@@AWAWXyy! -335O-335O!3!9!9!; %%o7I4PXXYhYnYnopYqsuv %%o7I4PYq!_WR..q12 	

 !tr$   c                    | j                   |   j                  }| j                   |   j                  }|j                  dk(  xr |t        j
                  k(  }| j                  ||      \  }}t        ||z  | j                        }|S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpu)	r
   r}   dtypetyper7   float16r   r   rP   )r!   r   r}   r   r   r   r   output_tensors           r"   r[   zLinear.get_delta_weight   s~     %%g.55$$W-33{{e+F0F&&w=1!!a%)<)<=r$   xc           	         |j                   }| j                  r3| j                  r| j                           | j                  |g|i |}n| j                  r | j                  |g|i |}n | j                  |g|i |}| j
                  D ]  }|| j                  j                         vr | j                  |      \  }}|j                  | j                  |   j                         }| j                  |   }	|t        j                  t        j                   |	|      |      |      z   } |j                  |      }|S r'   )r   disable_adaptersr)   rg   r   r<   r
   r@   r   r|   r   r   rm   linear)
r!   r   argsr    previous_dtyperesultr_   r   r   dropouts
             r"   forwardzLinear.forward   s&     {{$T__Q888F[[$T__Q888F$T__Q888F"&"6"6 G!)=)=)B)B)DD..~>1DD00@FFG--n=!((188GAJ+BA"FFG >*r$   )rw   r/   rD   FF)FN)r%   N)F)rE   rF   rG   rK   rL   rM   r(   r#   r   listra   rg   r7   tensorTensorrt   tupler   r[   r   __classcell__)rU   s   @r"   r   r   m   s     #!%$(-? 	?
 ? ? ? ? ? ? ? "&? 
?2!< !<Xd3i=P !<\` !<F[R5<< RV[VbVb R
u||UZUaUaGaAb <5<<  5<< r$   r   )rc   typingr   r7   torch.nnr   torch.nn.functional
functionalrm   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   r	   r   rN   r$   r"   <module>r      sG          - L &OX. OXdLRYY Lr$   