
    bi;                         d dl Z d dlmZ d dlZd dlmZ d dlmc mZ d dl	m
Z
 d dlmZmZ d dlmZ ddlmZ  G d d	ej$                  j&                        Z G d
 de      Z G d dej,                  e      Zy)    N)Optional)Conv1D)BaseTunerLayercheck_adapters_to_merge)	transpose   )
BufferDictc                   ,    e Zd Zed        Zed        Zy)UniqueBaseGradc                 V    |d d d d d f   |z  |d   z  }| j                  |||       |S )NN)save_for_backward)ctx
randlora_Arandlora_lambdarandlora_gammaouts        U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/randlora/layer.pyforwardzUniqueBaseGrad.forward   s8    aDj)J69NNj/>J
    c                 .   | j                   \  }}}|j                  |j                        |j                  |j                        |j                  |j                        }}}t        j                  d|||      }t        j                  d|||      }d ||fS )Nzkbj,kvj,bj->kbzkbj,kvj,kb->bj)saved_tensorstodtypetorcheinsum)r   grad_outputr   r   r   grad_randlora_lambdagrad_randlora_gammas          r   backwardzUniqueBaseGrad.backward%   s    696G6G3
O^MM+++,{001k//0 &4O

  %||,<k:Wef#ll+;[*Vef)+>>>r   N)__name__
__module____qualname__staticmethodr   r     r   r   r   r      s(     
 	? 	?r   r   c                   `    e Zd ZdZdZdej                  fdZede	fd       Z
dedefd	Zd
 Zy)RandLoraLayer)r   r   )r   
randlora_B
base_layerc                 j   || _         i | _        i | _        t        j                  i       | _        t        j                  i       | _        t        j                  i       | _        d | _	        d | _
        d| _        g | _        d| _        | j                         }t        |t        j                         r|j"                  |j$                  }}nUt        |t&              rEt)        |j*                  d      r|j*                  j,                  n|j*                  j.                  \  }}| _        | _        || _        y )NFTds_shape)r)   rscalingnn
ModuleDictrandlora_dropoutParameterDictr   r   r   r(   _disable_adaptersmerged_adapterscast_input_dtype_enabledget_base_layer
isinstanceLinearin_featuresout_featuresr   hasattrweightr+   shapekwargs)selfr)   r=   r8   r9   s        r   __init__zRandLoraLayer.__init__7   s   $ "b 1  "//3 ..r2 1504 "'! )-%((*
j")),(2(>(>
@W@WK
F+.5j6G6G.T
!!**ZdZkZkZqZq &K '(r   returnc                 ,    t        | j                        S r   )boolr3   )r>   s    r   mergedzRandLoraLayer.mergedY   s    D(())r   r   r(   c           	         |dk  rt        d|       || j                  |<   |dkD  rt        j                  |      }nt        j                         }| j
                  j                  t        j                  ||i             t        | j                  | j                        |z  }	|	j                         rt        |	      nt        |	      dz   | _        t        j                  t        j                   || j                        d      | j"                  |<   t        j                  t        j$                  | j                  t        | j                  | j                              t'        | j                  | j                        z  d      | j(                  |<   ||z  | j*                  |<   || _        || _        ||vrt1        | j,                        dk  rt        d      t3        | j,                  j5                               d   }
t3        | j.                  j5                               d   }d	}t'        | j                  | j                        t        | j                  | j                        }}|j6                  d   |k  r)t        |j9                  d
|j6                  d   |            |
j6                  d   |k  r)t        |j9                  d|
j6                  d   |            d}|
j6                  d   | j                  |   k  r6t        |j9                  d|
j6                  d   | j                  |               |j6                  d   | j                  |   k  r6t        |j9                  d
|j6                  d   | j                  |               |
| j,                  |<   || j.                  |<   |r| j;                  |       | j=                  |       | j?                  | j@                         y )Nr   z?`r` should be a positive integer value but the value passed is         )p   T)requires_gradzfThe `randlora_A` and `randlora_B` buffers are empty. This should not happen. Please report this issue.z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter was added after the first one with incompatible shapes.r(   r   z{} has a size of {} but {} or greater is required; this probably happened because an additional RandLora adapter with a lower rank was added after the first one; loading the adapters in reverse order may solve this.)!
ValueErrorr,   r.   DropoutIdentityr0   updater/   minr8   r9   
is_integerint	num_bases	Parameterr   randnr   onesmaxr   r-   r   r(   lenlistvaluesr<   formatreset_randlora_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r>   adapter_namer   r(   r,   randlora_alphar0   init_weightsrandlora_dropout_layerrQ   randlora_A_paramrandlora_B_param
error_tmplmax_dimmin_dims                  r   update_layerzRandLoraLayer.update_layer]   su    6^_`^abcc |c!%'ZZ2B%C"%'[[]"$$R]]LBX3Y%Z[ (($*;*;<q@	+4+?+?+AYs9~XYGY-/\\%++a:Xhl-m\*,.LLJJt~~s4+<+<d>N>N'OP$##T%5%567-
L) &4a%7\" %$z)4??#a' |   $DOO$:$:$<=a@#DOO$:$:$<=a@R   #4#3#3T5F5FGTM]M]_c_p_pIqWG%%a(72 !2!2<AQAWAWXYAZ\c!dee%%b)G3 !2!2<AQAWAWXYAZ\c!dee3 
  %%a(466,+?? !2!2<AQAWAWXYAZ\`\b\bco\p!qrr%%b)DFF<,@@ !2!2<AQAWAWXZA[]a]c]cdp]q!rss,<DOOL),<DOOL)**<822<@--.r   c           	         || j                   j                         v rt        j                         5  t        j
                  j                  | j                   |          t        j
                  j                  | j                  |   dt        | j                  |   j                        z         d d d        y y # 1 sw Y   y xY w)NrG   )r   keysr   no_gradr.   initzeros_	constant_r   rU   r<   )r>   r^   s     r   rZ   z'RandLoraLayer.reset_randlora_parameters   s    4//4466 wt33LAB!!$"5"5l"CQTM`M`amMnMtMtIuEuvw w 7w ws   A=B88CN)r!   r"   r#   adapter_layer_namesother_param_namesr.   Moduler?   propertyrB   rC   r	   rg   rZ   r%   r   r   r'   r'   2   s]    ?4 299  D * * *L/ L/ 	L/\wr   r'   c                   (    e Zd Z	 	 	 	 	 	 ddedededededededed	ed
df fdZddede	e
e      d
dfdZddZdd
eej                  ej                  f   fdZd
ej                  fdZdej                  d
ej                  fdZd
ef fdZ xZS )r7   r   r(   r^   r,   r_   r0   fan_in_fan_outis_target_conv_1d_layerr`   r@   Nc           	          t         t        j                  |           t	        j                  | |fi | || _        || _        | j                  |||||||
       |	| _        y r   )	superr.   r7   r?   r'   rs   _active_adapterrg   rt   )r>   r)   r   r(   r^   r,   r_   r0   rs   rt   r`   r=   	__class__s               r   r?   zLinear.__init__   s_     	bii')tZ:6:,+,
J>Sceqr'>$r   
safe_mergeadapter_namesc                    t        | |      }|sy|D ]-  }|| j                  j                         v s!| j                         }|j                  j
                  }|r|j                  j                  j                         }|| j                  |      z  }t        j                  |      j                         st        d| d      |j                  |      |j                  _        n?| j                  |      }|j                  xj                  |j                  |      z  c_        | j                  j                  |       0 y)a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   ri   r5   r;   r   datacloneget_delta_weightr   isfiniteallrJ   r   r3   append)r>   ry   rz   active_adapterr)   
orig_dtypeorig_weightsdelta_weights           r   mergezLinear.merge   s    0mD+ 	<N!5!5!:!:!<<!002
'..44
 $.#4#4#9#9#?#?#AL D$9$9.$IIL >>,7;;=(OP^O__rs  .:__Z-HJ%%*#'#8#8#HL%%**looj.II*$$++N;-	<r   c                    | j                   st        j                  d       yt        | j                        dkD  r| j                         }|j                  j                  }| j                  j                         }|| j                  j                         v r?| j                  |      }|j                  xj                  |j                  |      z  c_        t        | j                        dkD  ryy)zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )rC   warningswarnrV   r3   r5   r;   r   popr   ri   r~   r|   r   )r>   r)   r   r   r   s        r   unmergezLinear.unmerge   s     {{MM<=$&&'!+,,.J#**00J!11557N!5!5!:!:!<<#44^D!!&&,//**EE& $&&'!+r   c                    | j                   |   }| j                  |   }||j                  }|j                  }|j                  dk(  xr( |t
        j                  k(  xs |t
        j                  k(  }| j                  |   j                  |      }| j                  |   j                  |      }|r@|j                         }|j                         }|j                         }|j                         }t        | j                  | j                        t        | j                  | j                        }
}	|ddd| j                   d|	f   j                  |      }|d|
d| j                   ddf   j                  |      }|j#                  d      }t$        j'                  |||      j#                  d      }|	| j                  k(  r||fS |j(                  |j(                  fS )a4  
        Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the correct
        order to fit the target layers' dimensions

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        NcpurG   )	start_dim)end_dim)r   r(   devicer   typer   float16bfloat16r   r   r   floatrN   r9   r8   rU   rQ   flattenr   applyT)r>   adapterr   r   r(   r   cast_to_fp32r   r   rf   re   sliced_Asliced_Bupdate_Bupdate_As                  r   get_scaled_baseszLinear.get_scaled_bases  s    __W-
__W-
>&&F  
 {{e+c%--1G1b5TYTbTbKb..w7::6B,,W588@#))+J#))+J-335O+113N t00$2B2BCSIZIZ\`\l\lEm
 a!14>>!18G8;<??Ghwh(8$..(8!;<??G ##a#0!''/>RZZcdZe d&&&X%%zz8::%%r   c                     | j                  |      \  }}|j                  |j                  z  j                  }t        || j                        }| j                  |   }||z  S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        )r   r   r   rs   r-   )r>   r   r   r   rM   output_tensorr-   s          r   r~   zLinear.get_delta_weight4  s\     "227;(**xzz),,!&$*=*=>,,w'w&&r   xc           	         |j                   }| j                  r3| j                  r| j                           | j                  |g|i |}n| j                  r | j                  |g|i |}n | j                  |g|i |}| j
                  D ]  }|| j                  j                         vr | j                  |   }| j                  ||j                        \  }}	|j                  |	j                         }| j                  |   }
|t        j                  t        j                   ||      |      |	      |
z  z   } |j                  |      }|S )N)r   )r   disable_adaptersrC   r   r)   r]   r   ri   r0   r   r   r   r-   Flinear)r>   r   argsr=   previous_dtyperesultr   dropoutr   r   r-   s              r   r   zLinear.forwardE  s:     {{$T__Q888F[[$T__Q888F$T__Q888F"&"6"6 _!)=)=)B)B)DD//?%)%:%:>RSRZRZ%:%["(DD(,,~6!((188GAJ+I8"TW^"^^_ >*r   c                 *    t         |          }d|z   S )Nz	randlora.)rv   __repr__)r>   reprx   s     r   r   zLinear.__repr__Z  s    g S  r   )r   r   rE   FFT)FN)r@   Nr   )r!   r"   r#   r	   strrP   r   rB   r?   r   rW   r   r   tupler   Tensorr   r~   r   r   __classcell__)rx   s   @r   r7   r7      s    "%$(-!? ? 	?
 ? ? ?  ? ? "&? ? 
?,(< (<Xd3i=P (<\` (<TF .&ellELL>X8Y .&`'5<< '" 5<< *!# ! !r   r7   )r   typingr   r   torch.nnr.   torch.nn.functional
functionalr   transformers.pytorch_utilsr   peft.tuners.tuners_utilsr   r   peft.utils.otherr   _buffer_dictr	   autogradFunctionr   r'   r7   r%   r   r   <module>r      s`          - L & %?U^^,, ?*}wN }w@j!RYY j!r   