
    bii7                        d dl mZ d dlZd dlZd dlZd dlmZmZmZ d dl	Z	d dl
mZ d dlmc mZ d dlmZmZ d dlmZ d dlmZ ddlmZ  G d	 d
ej.                  e      Z	 	 	 	 	 	 	 	 ddZy)    )annotationsN)AnyOptionalUnion)BaseTunerLayercheck_adapters_to_merge)	transpose)gather_params_ctx   )	LoraLayerc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZdej                  ddfdZddZdddZ	ddZ
dd	Zd fd
Z xZS )LoraParallelLineara  
    When the target layer parallel_linear is RowParallelLinear, in order to keep the input and output shapes
    consistent, we need to split the lora matrix A into rows, and the lora_B at this time should be a complete linear
    layer; In the same way, when the target layer is ColumnParallelLinear, we perform column segmentation on lora_B,
    while lora_A is still a complete linear layer.
    FTc                   |r"t        | j                  j                   d      t        |           t        j                  | fd|i| |r"t        | j                  j                   d      || _        t        ||j                        | _	        || _
        || _        |d   }d|i}t        j                  }t        |d      r|j                  }d}d}| j                  r|j                   }n|j"                  } | j$                  ||f|||	|
||||d| |r"t        | j                  j                   d	      d| _        y )
Nz0 does not support lora_bias yet, set it to False
base_layerz2 does not support DoRA yet, please set it to Falsemegatron_configinit_methodTF)
lora_alphalora_dropoutinit_lora_weights
use_rslorause_dorar   input_is_parallelgather_outputzB does not support target_conv_1d_layer yet, please set it to False)
ValueError	__class____name__super__init__r   backend
isinstanceRowParallelLinearis_parallel_afan_in_fan_out_active_adapterinitxavier_normal_hasattrr   r   r   update_layeris_target_conv_1d_layer)selfr   adapter_namer   rr   r   r#   r)   r   r   r   	lora_biaskwargsr   parallel_linear_kwargsr   r   r   r   s                      T/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/lora/tp_layer.pyr   zLoraParallelLinear.__init__(   sh      7 788hijj4AJA&A 7 788jkll'
G4M4MN,+ !23"3_!E))?M2)55K  * < <&44M	
 "%/!#/'	
 %	
 #>>**++mn  (-$    c           	        t               j                         }|d= |dk  rt        d|       || j                  |<   || j                  |<   |dkD  rt        j                  |      }nt        j                         }|| j                  |<   |d   }t        j                  |_        | j                  r^| j                  j                  | j                  |d|	d||	      }t        j                   || j"                  dt        j                  
      }n\t        j                   | j                  |dt        j                  
      }| j                  j%                  || j"                  d|
||      }|| j&                  |<   || j(                  |<   |r&|t+        j,                  |      z  | j.                  |<   n||z  | j.                  |<   || j0                  |<   t3        |t4              rQ|j7                  d      r@t9        | j;                         j<                        5  | j?                  ||       d d d        nt3        |t4              rP|j7                  d      r?t9        | j;                         j<                        5  | jA                  ||       d d d        nt3        |t4              rQ|jC                         dk(  r>t9        | j;                         j<                        5  | jE                  |       d d d        nW|dk(  r>t9        | j;                         j<                        5  | jG                  |       d d d        n|r| jI                  ||       | jK                  |       || jL                  v r  | jL                  |   jN                  | fi | | jQ                  | jR                         y # 1 sw Y   dxY w# 1 sw Y   pxY w# 1 sw Y   |xY w# 1 sw Y   xY w)Nr*   r   z?`r` should be a positive integer value but the value passed is         )pr   FT)
input_sizeoutput_sizebiasr   skip_bias_addr   config)in_featuresout_featuresr7   dtype)r5   r6   r7   r   r   r9   pissacordaoloraloftq)*localscopyr   r,   r   nnDropoutIdentityr   torchfloat32params_dtyper"   r   r!   r:   Linearr;   ColumnParallelLinearlora_Alora_Bmathsqrtscalingr   r    str
startswithr
   get_base_layerweight
pissa_init
corda_initlower
olora_init
loftq_initreset_lora_parameters%_move_adapter_to_device_of_base_layerlora_variantr%   set_adapteractive_adapters)r*   r+   r,   r   r   r   r   r   r   r   r   r/   r.   lora_dropout_layerr   lora_alora_bs                    r0   r(   zLoraParallelLinear.update_layere   sn    6N6^_`^abcc |(2%#!#l!;!#*<,'01BC',}}$\\33++"3"'& 4 F YY14;L;LSX`e`m`mnFYY4+;+;!RW_d_l_lmF\\66 --+'& 7 F %+L!$*L!)3diil)BDLL&)3aDLL&&.l# '-2C2N2Nw2W"4#6#6#8#?#?@ A.?@A A)3/4E4P4PQX4Y"4#6#6#8#?#?@ A.?@A A)3/4E4K4K4MQX4X"4#6#6#8#?#?@ .-. .')"4#6#6#8#?#?@ .-. .&&|5FG 	22<@4,,,0Dl+00@@--.)A AA A. .. .s0   N'8N3N?O'N03N<?OOc           	     >    | j                   |g|i | |j                  dd       }| j                  r9| j                  r| j	                           | j
                  |g|i |\  }}||fS |"t        | j                  j                   d      | j                  r | j
                  |g|i |\  }}||fS  | j
                  |g|i |\  }}|j                  }| j                  D ]  }|| j                  j                         vr | j                  |   }	| j                  |   }
| j                  |   }| j                  |   }| j!                  ||	j"                  j                        }| |
 |	 ||                  |z  z   } |j%                  |      }||fS )Nadapter_namesz* does not support mixed_batch_forward yet.)_check_forward_argspopdisable_adaptersmergedunmerger   r   r   r   r<   r]   rK   keysrL   r   rO   _cast_input_dtyperS   to)r*   xargsr.   rb   resultr7   torch_result_dtypeactive_adapterrK   rL   dropoutrO   s                r0   forwardzLoraParallelLinear.forward   s      4T4V4

?D9   {{*4??1>t>v>LFD& t|% & 7 788bcdd[[*4??1>t>v>LFD t| +4??1>t>v>LFD!'"&"6"6 G!)9)9);;^4^4++N;,,~6**1fmm.A.AB&
);"<w"FFG YY12Ft|r1   c                6   t        | |      }|sy|D ]  }|| j                  j                         v s!| j                         }|r~|j                  j
                  j                         }| j                  |      }||z   }t        j                  |      j                         st        d| d      ||j                  _        n9| j                  |      }|j                  j
                  |z   |j                  _        | j                  j                  |        y)a^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   rK   rh   rR   rS   datacloneget_delta_weightrF   isfiniteallr   merged_adaptersappend)r*   
safe_mergerb   ro   r   orig_weightsdelta_weights          r0   mergezLoraParallelLinear.merge   s    0mD+ 	<N!1!1!33!002
 $.#4#4#9#9#?#?#AL#'#8#8#HL#/,#>L >>,7;;=(OP^O__rs  .:J%%*#'#8#8#HL-7->->-C-Cl-RJ%%*$$++N;)	<r1   c                   | j                   st        j                  d       yt        | j                        dkD  r| j                  j                         }|| j                  j                         v r@| j                         j                  }| j                  |      }|xj                  |z  c_        t        | j                        dkD  ryy)zW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   )rf   warningswarnlenrx   rd   rK   rh   rR   rS   ru   rs   )r*   ro   rS   r|   s       r0   rg   zLoraParallelLinear.unmerge  s     {{MM<=$&&'!+!11557N!1!1!33,,.55#44^D|+ $&&'!+r1   c                   | j                   |   j                  j                  }| j                   |   j                  j                  }|j                  dk(  xr( |t
        j                  k(  xs |t
        j                  k(  }| j                  |   j                  }| j                   |   j                  }|r |j                         }|j                         }t        ||z  | j                        | j                  |   z  }|rl|j                  |      }|j                  |      | j                  |   j                  _        |j                  |      | j                   |   j                  _        |S )z
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        cpu)r<   )rL   rS   devicer<   typerF   float16bfloat16rK   floatr	   r#   rO   rj   rs   )r*   adapterr   r<   cast_to_fp32weight_Aweight_Boutput_tensors           r0   ru   z#LoraParallelLinear.get_delta_weight  s.    W%,,33G$++11
 {{e+c%--1G1b5TYTbTbKb;;w'..;;w'..~~'H~~'H!(X"5t7J7JKdll[bNcc),,5,9M 08{{5/ADKK '',/7{{5/ADKK '',r1   c                *    t         |          }d|z   S )Nzlora.)r   __repr__)r*   repr   s     r0   r   zLoraParallelLinear.__repr__1  s    g }r1   )	r   r   r3   FFTFFF)r+   rP   r,   intr   r   r   r   r#   boolr)   r   r   zUnion[bool, str]r   r   r   r   r-   r   )rk   torch.Tensorrl   r   r.   r   )FN)rz   r   rb   zOptional[list[str]]returnNone)r   r   )r   r   )r   rP   )r   
__module____qualname____doc__r   r%   r&   r(   rq   r}   rg   ru   r   __classcell__)r   s   @r0   r   r       s     !$(-.2 ;- ;-
 ;- ;- ;- ;- "&;- ,;- ;- ;- ;-J ''T/l<&<P, D r1   r   c                `   d }t        | t              r| j                         }n| }|j                  r t	        j
                  |j                        }nd }|rt        ||j                  j                  |j                  j                  f      r|j                         }|j                  }t        |t              r2|j                  j                  j                  }	 |	di |j                  }||d<   |d   r!t        j                   d       dx|d<   |_        t%        d| ||j                  d|}|S )Nr   r#   zfan_in_fan_out is set to True but the target module is `ColumnParallelLinear` or `RowParallelLinear`. Setting fan_in_fan_out to False.F)r   r+   r    )r    r   rR   r   	importlibimport_modulemegatron_coretensor_parallelrJ   r!   rB   dicttransformertransformer_configTransformerConfigr   r   r#   r   )
targetr+   lora_configr.   
new_moduletarget_base_layerr   megatron_kwargsr   transformer_config_classs
             r0   dispatch_megatronr   6  s.    J&.)"113"""!//0I0IJ		&	&	;	;]=Z=Z=l=lm !++-%55ot,'4'@'@'S'S'e'e$6U9T9TUO-<)*+,MM3
 NSRO,-0J' 
L-B_B_
cr

 r1   )r   ztorch.nn.Moduler+   rP   r.   r   r   zOptional[torch.nn.Module])
__future__r   r   rM   r   typingr   r   r   rF   torch.nnrC   torch.nn.initr%   peft.tuners.tuners_utilsr   r   
peft.utilsr	   peft.utils.integrationsr
   layerr   Moduler   r   r   r1   r0   <module>r      so    #    ' '     L   5 SI Sl''' 	'
 'r1   