
    bi#                         d dl Z d dlZd dlmZ d dlZd dlmZ d dlmc mZ	 d dl
mZmZ  G d de      Z G d dej                  e      Zy)    N)Optional)BaseTunerLayercheck_adapters_to_mergec                   P    e Zd ZdZdZdej                  fdZ	 d
defdZ	d Z
d Zy	)
ShiraLayer)shira_weight)rscalingshira_indices
base_layerc                    || _         i | _        i | _        t        j                  i       | _        i | _        |j                  j                  | _	        d| _
        g | _        | j                         }t        |t        j                        r|j                  |j                   }}nt#        d      || _        || _        || _        y )NFz)Only nn.Linear layers supported currently)r   r	   r
   nnParameterDictr   r   weightshapeweight_shape_disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresNotImplementedErrorkwargs)selfr   r   r   r   s        R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/shira/layer.py__init__zShiraLayer.__init__    s    $,,R0&--33 "'!((*
j")),(2(>(>
@W@WK%&QRR&(    init_weightsc                    |dk  rt        d|       || j                  |<   d| j                  |<   || j                  | j                  z   z  }|| j                  | j                  z  kD  r)t        d| d| j                  | j                  z   d      |rt        j                  |      nt        j                  |      }t        j                  |j                  | j                  j                  j                        j                  | j                  j                  j                        d      | j                  |<   |t        j                   |dk(        }t        j"                  |d   j%                  d      |d	   j%                  d      gd      j                  t
        j&                        | j(                  |<   | j(                  |   j                  | j                  j                  j                        | j(                  |<   | j(                  |   j*                  d	   | j                  |   j*                  d   k7  rt        d
| d| j                         | j-                  |       | j/                  | j0                         y )Nr   z?`r` should be a positive integer value but the value passed is g      ?zThe set rank zP results in more shira params than the total number of params in the base layer z and this is not allowed.T)requires_grad   zFThe SHiRA indices and weights are not the same dimensions for adapter z
 in layer )
ValueErrorr	   r
   r   r   torchzerosrandnr   	Parametertor   r   dtypedevicer   wherecat	unsqueezeintr   r   %_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r   adapter_namemaskr	   r    num_shira_weightshira_init_weightmask_indicess           r   update_layerzShiraLayer.update_layer6   sz    6^_`^abcc | 	\"  0 043D3D DEd..1B1BBBs"rsw  tD  tD  GK  GX  GX  tX  sY  Yr  s  >JEKK(89u{{[kOl*,,,  !7!7!=!=>AA$//BXBXB_B_`+
,'
  ;;ts{3L/4yya**1-|A/H/H/KLa0bm |, 04/A/A,/O/R/RSWSbSbSiSiSpSp/qD|,!!,/55a8D<M<Ml<[<a<abc<dd \]i\jjtuy  vE  vE  uF  G  	22<@--.r   c                 \    t         j                  j                  | j                  |          y N)r   initzeros_r   )r   r3   s     r   reset_shira_parametersz!ShiraLayer.reset_shira_parametersd   s    
t((67r   c                 @    || j                   vry || j                   |<   y r:   )r
   )r   adapterscales      r   	set_scalezShiraLayer.set_scaleg   s    $,,& %Wr   N)T)__name__
__module____qualname__adapter_layer_namesother_param_namesr   Moduler   boolr8   r=   rA    r   r   r   r      s=    +9299 6 ",/
 ,/\8&r   r   c                        e Zd Z	 	 	 ddededededdf
 fdZdded	eee      ddfd
Z	ddZ
dej                  fdZdej                  dej                  fdZdef fdZ xZS )r   r3   r	   fan_in_fan_outr    returnNc                     t         |           t        j                  | |fi | || _        | j                  | j                         urt        d      || _        | j                  ||||       y )Nz)SHiRA does not support nested base layers)r    )	superr   r   rK   r   r   r$   _active_adapterr8   )	r   r   r4   r3   r	   rK   r    r   	__class__s	           r   r   zLinear.__init__p   sm     	D*77,??$"5"5"77HII+,alKr   
safe_mergeadapter_namesc                    t        | |      }|sy|D ]  }|| j                  j                         v s | j                         }|r||j                  j
                  j                         }|| j                  |      z  }t        j                  |      j                         st        d| d      ||j                  _        n.|j                  xj
                  | j                  |      z  c_        | j                  j                  |        y)a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   keysr   r   datacloneget_delta_weightr%   isfiniteallr$   r   append)r   rQ   rR   active_adapterr   orig_weightss         r   mergezLinear.merge   s     0mD+ 	<N!2!2!7!7!99!002
 $.#4#4#9#9#?#?#AL D$9$9.$IIL >>,7;;=(OP^O__rs  .:J%%*%%**d.C.CN.SS*$$++N;%	<r   c                    | j                   st        j                  d       y t        | j                        dkD  r| j                  j                         }|| j                  j                         v r<| j                         j                  xj                  | j                  |      z  c_
        t        | j                        dkD  ry y )Nz Already unmerged. Nothing to do.r   )mergedwarningswarnlenr   popr   rT   r   r   rU   rW   )r   r[   s     r   unmergezLinear.unmerge   s    {{MM<=$&&'!+!11557N!2!2!7!7!99##%,,11T5J5J>5ZZ1 $&&'!+r   c                    | j                   |   j                  | j                  |   j                        | j                   |<   t	        j
                  | j                   |   | j                  |   | j                  |   z  | j                        S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        )r   r)   r   r+   r%   sparse_coo_tensorr
   r   )r   r?   s     r   rW   zLinear.get_delta_weight   s     '+&8&8&A&D&DTEVEVW^E_EfEf&g7#&&w'):):7)CdllSZF[)[]a]n]n
 	
r   xc                    | j                   r4| j                  r| j                           | j                  |g|i |}|S | j                  r | j                  |g|i |}|S t	        j
                  | j                  j                  j                        }| j                  D ]3  }|| j                  j                         vr || j                  |      z  }5 t        j                  ||| j                  j                        }|S )N)bias)disable_adaptersr_   rd   r   copydeepcopyr   rU   r2   r   rT   rW   Flinearri   )r   rg   argsr   result
new_weightr[   s          r   forwardzLinear.forward   s      {{$T__Q888F  [[$T__Q888F  t'='='B'BCJ"&"6"6 D!):):)?)?)AAd33NCC
D
 XXa$//2F2FGFr   c                 *    t         |          }d|z   S )Nzshira.)rN   __repr__)r   reprP   s     r   rt   zLinear.__repr__   s    g #~r   )r   FT)FN)rL   N)rB   rC   rD   strr/   rH   r   r   listr]   rd   r%   TensorrW   rr   rt   __classcell__)rP   s   @r   r   r   n   s     $!L 	L
 L L L 
L&%< %<Xd3i=P %<\` %<N[
5<< 
 5<< $#  r   r   )rk   r`   typingr   r%   torch.nnr   torch.nn.functional
functionalrm   peft.tuners.tuners_utilsr   r   r   rG   r   rI   r   r   <module>r      sB           LQ& Q&hiRYY
 ir   