
    biy                    R   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZ d dlmc mZ d dlmZ d dlmZmZ daed        Zd Z G d	 d
e      Z G d dej4                        Z G d de      Z G d dej4                  e      Z G d dej4                  e      Zy)    )annotationsN)contextmanager)AnyOptionalUnion)Function)BaseTunerLayercheck_adapters_to_mergec               +    K   i }| j                         D ]Y  \  }}|j                         }|t        j                  v rt        j                  |   ||<   t	        |      t        j                  |<   [ d | D ]M  }|j                         }||v r||   t        j                  |<   .t        j                  j                  |d       O yw)a  
    A context manager that will add each keyword argument passed to `os.environ` and remove them when exiting.

    Will convert the values in `kwargs` to strings and upper-case all the keys.

    Example:

    ```python
    >>> import os
    >>> from accelerate.utils import patch_environment

    >>> with patch_environment(FOO="bar"):
    ...     print(os.environ["FOO"])  # prints "bar"
    >>> print(os.environ["FOO"])  # raises KeyError
    ```
    N)itemsupperosenvironstrpop)kwargsexisting_varskeyvalues       Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/boft/layer.pypatch_environmentr   &   s     $ Mlln %
Uiik"**!#CM#e*

3	% 
 &iik-+C0BJJsOJJNN3%&s   CCc                    t         t         S ddlm}  t        j                  j                  t              }	 t        dd      5   | d| d| dgd	      }d d d        a t         S # 1 sw Y   xY w# t        $ rA}t        j                  d
| d       t        j                  d       d }Y d }~|a t         S d }~ww xY w)Nr   )loadgcc)CCCXXfbd_cudaz/fbd/fbd_cuda.cppz/fbd/fbd_cuda_kernel.cuT)namesourcesverbosez#Failed to load the CUDA extension: z, check if ninja is available.zHSetting boft_n_butterfly_factor to 1 to speed up the finetuning process.)	_FBD_CUDAtorch.utils.cpp_extensionr   r   pathdirname__file__r   	Exceptionwarningswarn)r   curr_dirr   es       r   get_fbd_cudar+   J   s      /wwx(H%U3 	$:%67H:E\9]^H	 I	 	  ;A3>\]^`aIs/   A4 A(A4 (A1-A4 4	B>=0B99B>c                  0    e Zd ZdZed        Zed        Zy)FastBlockDiaga  
    Implements a custom autograd Function for a fast block diagonal operation using CUDA.

    This function is optimized for 4D tensors where the last two dimensions are equal, representing block diagonal
    matrices for efficient computation on CUDA devices.
    c                `    t               j                  |      d   }| j                  |       |S )a|  
        The forward method for FastBlockDiag.

        Computes the block diagonal operation on the input tensor using a CUDA-optimized function. This method assumes
        that the input is a 4D tensor where the last two dimensions are equal, which represent the blocks to be
        diagonalized.

        Parameters:
        ctx: A context object that can be used to stash information for backward computation.
        input (Tensor): The input tensor of shape (N, D, H, H), where `N` is the batch size,
                        `D` represents one additional dimension (In BOFT, the number of BOFT blocks), and `H` is the
                        size of the square blocks along the last two dimensions (In BOFT, the block size).

        Returns:
        Tensor: The resulting tensor after applying the block diagonal operation,
                will have the shape (N, DxH, DxH).
        r   )r+   forwardsave_for_backward)ctxinputoutputs      r   r/   zFastBlockDiag.forwardo   s.    & ''.q1e$    c                \    | j                   \  }t               j                  ||      d   }|S )Nr   )saved_tensorsr+   backward)r1   grad_outputr2   
grad_inputs       r   r7   zFastBlockDiag.backward   s.    $$!^,,[%@C
r4   N)__name__
__module____qualname____doc__staticmethodr/   r7    r4   r   r-   r-   g   s/      ,  r4   r-   c                  *     e Zd ZdZd fd	Zd Z xZS )MultiplicativeDropoutLayerz?
    Implements the multiplicative dropout layer for BOFT.
    c                0    t         |           || _        y)z
        Initializes the multiplicative dropout layer.

        Parameters:
        p (float): The probability of dropping out a block. Defaults to 0.0.
        N)super__init__p)selfrE   	__class__s     r   rD   z#MultiplicativeDropoutLayer.__init__   s     	r4   c                   | j                   rf|j                  d   |j                  d   k7  rt        d      |j                  \  }}}}t        j                  d|d      j                         }t        | j                  |z        }||z
  }t        j                  t        j                  ||j                        t        j                  ||j                        g      }	|	t        j                  |         j                  d|dd      }	t        j                  ||dd|j                        }
|	|
|<   t        j                  ||j                        j                  ||dd      }d|
z
  |z  |
|z  z   }|S )a  
        Applies multiplicative dropout to the input tensor.

        Parameters:
        x (Tensor): The input tensor of shape (N, D, H, H), where `N` is the batch size, `D` represents
                    one additional dimension (In BOFT, the number of BOFT blocks), and `H` is the size of the square
                    blocks along the last two dimensions (In BOFT, the block size).
        z4The last two dimensions of input should be the same!r   )   devicerK   )trainingshape
ValueErrortorchrandintitemintrE   catonesrM   zerosrandpermvieweyerepeat)rF   xNDH_n_randomnum_to_replace	num_zerosmask	full_mask
eye_matrixs               r   r/   z"MultiplicativeDropoutLayer.forward   s?    ==wwr{aggbk) !WXXJAq!Q }}Q40557H !!_NN*I 99ejjI5;;W`ijiqiqKrstD q)*//1a;DAq!Qqxx@I"&Ih 1QXX6==aAqIJY!#i*&<<Ar4   )        )r:   r;   r<   r=   rD   r/   __classcell__rG   s   @r   rA   rA      s    #r4   rA   c                  X    e Zd ZdZdZdZddZd ZddZdddZ	d	 Z
d
 Zd ZddZd Zy)	BOFTLayerz$
    Implements the BOFT layer.
    )boft_Rboft_s)boft_block_sizeboft_block_numboft_dropoutc                8   || _         i | _        i | _        t        j                  i       | _        t        j                  i       | _        t        j                  i       | _        d| _	        g | _
        d| _        || _        | j                         }t        |t        j                        r|j                   |j"                  }}nJt        |t        j$                        r|j&                  |j(                  }}nt+        dt-        |             || _        || _        y)z
        Initializes the BOFT layer.

        Note, currently only support linear layer and convolutional layer, with further support for other layers to be
        added soon.

        Parameters:
        base_layer: the pretrained model layer
        FTzUnsupported layer type N)
base_layerrn   ro   nn
ModuleDictrp   ParameterDictrl   rm   _disable_adaptersmerged_adapterscast_input_dtype_enabledr   get_base_layer
isinstanceLinearin_featuresout_featuresConv2din_channelsout_channelsrP   type)rF   rr   r   r|   r}   s        r   rD   zBOFTLayer.__init__   s     %! MM"-&&r*&&r*!&!(,%((*
j")),(2(>(>
@W@WK
BII.(2(>(>
@W@WK6tJ7G6HIJJ&(r4   c                L    || j                   vry t        j                  d       y )NGScaling operation for BOFT not supported! Automatically set scale to 1.)scalingr'   r(   )rF   adapterscales      r   	set_scalezBOFTLayer.set_scale   s    $,,&_`r4   c                    |dk(  ry | j                   D ]4  }|| j                  j                         vr t        j                  d       6 y )NrK   r   active_adaptersrl   keysr'   r(   rF   r   active_adapters      r   scale_layerzBOFTLayer.scale_layer   sH    A:"22 	eNT[[%5%5%77MMcd		er4   Nc                    | j                   D ]4  }|| j                  j                         vr t        j                  d       6 y )Nz?Unscaling operation for BOFT not supported! Keeping scale to 1.r   r   s      r   unscale_layerzBOFTLayer.unscale_layer   s>    "22 	]NT[[%5%5%77MM[\		]r4   c           	        t               s
d| _        d}nd| _        |dz
  }|dk  rt        d|dz    d      |dkD  rt        |      }nt	        j
                         }| j                  j                  t	        j                  ||i             |dk(  r|dk7  r| j                  |z  dk7  rt        d	| j                   d
| d      |dk7  rV|t        t        j                  |            kD  rt        d|dz    d| d      |d|z  z  dk7  rt        d| d|dz    d      t        | j                  |z        }n|dk7  r|dk(  r| j                  |z  dk7  rt        d	| j                   d| d      |dk7  rq| j                  |d|z  z  k  r"t        d| j                   d|dz    d| d      | j                  |d|z  z  z  dk7  r"t        d| j                   d|dz    d| d      t        | j                  |z        }nt        d      |dk7  r.|dz  dk7  rt        d| d      |dz  dk7  rt        d| d      t        j                  |dz   | j                  | j                  f      }t        |dz         D ]Q  }	| j!                  | j                  t        |d|	z  z        t        |dz        |      }
| j#                  |
      }|||	<   S | j%                  d|d       t	        j&                  t        j(                  |dz   |||            | j*                  |<   t	        j&                  t        j,                  t        | j.                        d            | j0                  |<   | j3                  ||       || j4                  |<   || j6                  |<   | j9                  |       | j;                  | j<                         y)zf
        Update the linear layer with trainable BOFT weights. Override for other layer types.
        FrK   Tr   -You can only specify boft_n_butterfly_factor ! to be a positive integer number.rg   rE   zin_features (') must be divisible by boft_block_num ()!0Invalid combination of boft_n_butterfly_factor () and boft_block_num (   boft_block_num (J) must be a multiple of 2 raised to the power of boft_n_butterfly_factor (() must be divisible by boft_block_size (z$Invalid combination of in_features (), boft_n_butterfly_factor () and boft_block_size (ZSomething went wrong, please report this error: https://github.com/huggingface/peft/issues) must be an even number!boft_block_size (boft_P
persistentN)r+   fbd_cuda_availablerP   rA   rs   Identityrp   updatert   r|   rT   mathlog2rQ   emptyrangeblock_butterfly_permperm2matregister_buffer	ParameterrW   rl   rV   r}   rm   reset_boft_parametersrn   ro   %_move_adapter_to_device_of_base_layerset_adapterr   )rF   adapter_namern   ro   boft_n_butterfly_factorrp   init_weightsboft_dropout_layerPipermperm_mats               r   update_layerzBOFTLayer.update_layer  s    ~&+D#&'#&*D# #:A"="Q&?@WZ[@[?\\}~ 
 #!;l!K!#  >P/Q!RSaNa$7.0A5 #D$4$4#55\]k\llno  '!+*S>1J-KK$JKbefKfJgg}  M  ~N  NP  Q  "Q(?%?@AE$*>*:  ;E  F]  `a  Fa  Eb  bd  e  "$"2"2n"DEO!n&9/1Q6 #D$4$4#55]^m]nnpq  '!+##!=T:T'UV$>t?O?O>PPl  nE  HI  nI  mJ  Ja  bq  ar  rt  u  ##!=T:T'UVZ[[$>t?O?O>PPl  nE  HI  nI  mJ  Ja  bq  ar  rt  u  !!1!1_!DEN l 
 #a'!Q& #3N3CC\!]^^"a' #4_4EE^!_`` KK014d6F6FHXHXYZ.23 	A,,  #na&A"BCZ[H[D\^uD }}T*HAaD	 	XqU;$&LLKK/!3^_Vef%
L! %'LLC@Q@Q<RTU1V$WL!""<> .=\*,:L)22<@--.r4   c                   |du r_t         j                  j                  | j                  |   dd       t         j                  j                  | j                  |   dd       y|| j                  j                         v rk|du rYt         j                  j                  | j                  |          t         j                  j                  | j                  |          yt        d|      y)	z,
        Reset the BOFT parameters.
        Frg   皙?)meanstdg      ?NTz$Unknown initialization init_weights=)	rs   initnormal_rl   rm   r   zeros_ones_rP   )rF   r   r   s      r   r   zBOFTLayer.reset_boft_parametersm  s     5 GGOODKK5CSOIGGOODKK5CSOI4;;++--t#t{{<89dkk,78 #H</!JKK .r4   c                ~    t        |      }t        j                  ||f      }t        |      D ]  \  }}d|||f<    |S )z
        Convert permutation indices to permutation matrix.

        Args:
        indices: A list of indices representing the permutation.
        rK   )lenrQ   rW   	enumerate)rF   indicesnr   r   idxs         r   r   zBOFTLayer.perm2mat~  sP     L ;;1v&  ( 	!FAs HQV	! r4   c                   |dk(  rt        j                  |      S ||z  dz  |kD  rt        d      t        ||z        }t        j                  |      }d } |||      }t	        d||      D ]  }	|	|z   }
||	|
 }||   ||	|
  |S )a0  
        Define the permutation matrix for the block butterfly permutation.

        Args:
        n: size of the permutation matrix
        b: desired number of blocks after multiplying with the permutation matrix
        r: base block size of the block diagonal matrix, e.g. 2x2, 3x3, 5x5 etc.
        r   r   zInvalid number of blocks!c                   | |z  }t        j                  |       }t        j                  | t         j                        }t        j                  d|d      }t        j                  d|d      }t        j                  ||fd      }t        |      D ]C  \  }}	|t        |	|z        t        |	|z  |z          |t        ||z        t        ||z  |z          E |S )N)dtyper   r   rK   )dim)rQ   aranger   longrU   r   rT   )
brstepinitial_ordersorted_orderevensodds
sorted_seqr   poss
             r   
sort_blockz2BOFTLayer.block_butterfly_perm.<locals>.sort_block  s    q5D!LLOM ;;q

;LLLD!,E<<4+DE4=a8J#J/ k3<I#cTUg,Y\]`cd]dgh]hYi<jSQZ#a!eai.9kr4   )rQ   r   rP   rT   r   )rF   r   r   r   n_butterfly_factor
block_sizer   r   r   r   	block_endtmp_indicess               r   r   zBOFTLayer.block_butterfly_perm  s     "<<?"q519q=899a[
,,q/
	  "*a0q!Z( 	=AJI!!I.K#.|#<GAi 	= r4   c                (   |j                   \  }}}d||j                  dd      z
  z  }t        j                  ||j                        j                  d      j                  |||      }t        j                  j                  ||z   ||z
  d      }|S )z
        Perform the Cayley parametrization on a batch of skew-symmetric matrices.

        Args:
            data: A batch of skew-symmetric matrices of shape (b, r, c).
        g      ?rK   r   rL   r   F)left)	rO   	transposerQ   rZ   rM   	unsqueezeexpandlinalgsolve)rF   datar   r   cskew_matid_matQs           r   cayley_batchzBOFTLayer.cayley_batch  s     **1a$1!5561T[[1;;A>EEaAN LLv0&82C%Pr4   )rr   	nn.ModulereturnNone)r   floatr   r   Nr   r   )   rK   )r:   r;   r<   r=   adapter_layer_namesother_param_namesrD   r   r   r   r   r   r   r   r   r?   r4   r   rk   rk      sJ    
 /M!)Fae]d/LL"&%Nr4   rk   c                       e Zd ZdZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd
ddZddZddZddZd fdZ	 xZ
S )r{   z,
    BOFT implemented in a dense layer.
    c
                    t         |           t        j                  | |fi |
 || _        || _        | j                  ||||||       |	| _        y r   )rC   rD   rk   fan_in_fan_out_active_adapterr   is_target_conv_1d_layer)rF   rr   r   rn   ro   r   rp   r   r   r   r   rG   s              r   rD   zLinear.__init__  s]     	46v6,+/>;RT`bn	
 (?$r4   c                   t        | |      }|sy|D ];  }|| j                  j                         v s!| j                         }|j                  j
                  }|r|j                  j                  j                         }| j                  |      \  }}t        j                  |dd      }t        j                  ||j                  |j
                              }t        j                  |dd      }||z  }t        j                  |      j                         st        d| d      |j!                         j                  |      | j"                  j                  _        n| j                  |      \  }}|j                  j                  j                         }t        j                  |dd      }t        j                  ||j                  |j
                              }t        j                  |dd      }||z  }|j!                         j                  |      | j"                  j                  _        | j$                  j'                  |       > y)^  
        Merge the active adapter weights into the base weights

        Args:
            safe_merge (`bool`, *optional*):
                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
                before merging the weights. This is useful if you want to check if the merge operation will produce
                NaNs. Defaults to `False`.
            adapter_names (`List[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.
        Nr   rK   z1NaNs detected in the merged weights. The adapter z seems to be broken)r
   rl   r   ry   weightr   r   cloneget_delta_weightrQ   r   mmtoisfiniteallrP   
contiguousrr   rw   append	rF   
safe_mergeadapter_namesr   rr   
orig_dtypeorig_weightbutterfly_oft_matrm   s	            r   mergezLinear.merge  s    0mD+ 	<N!1!1!33!002
'..44
 #-"3"3"8"8">">"@K040E0En0U-%v"'//+q!"DK"'((+<knnM^MdMd>e"fK"'//+q!"DK"-"6K >>+6::<(OP^O__rs  3>2H2H2J2M2Mj2YDOO**/040E0En0U-%v","3"3"8"8">">"@K"'//+q!"DK"'((+<knnM^MdMd>e"fK"'//+q!"DK"-"6K2=2H2H2J2M2Mj2YDOO**/$$++N;=	<r4   c                   | j                   st        j                  d       yt        | j                        dkD  r@| j                  j                         }| j                         }|j                  j                  }|| j                  j                         v r| j                  |      \  }}|j                  j                  j                         }t        j                  |dd      }t        j                   |j#                         |j%                  |j                              }t        j                  |dd      }|d|z  z  j%                  |      |j                  _        t        | j                        dkD  r?yyzW
        This method unmerges all merged adapter layers from the base weights.
        z Already unmerged. Nothing to do.Nr   rK   )mergedr'   r(   r   rw   r   ry   r   r   rl   r   r   r   r   rQ   r   r   tr   rF   r   rr   r  r	  rm   r  s          r   unmergezLinear.unmerge  s+    {{MM<=$&&'!+!11557N,,.J#**00J!1!1!33,0,A,A.,Q)!6(//44::<#ook1a@#hh'8':':'<knnM^MdMd>ef#ook1a@*5V*D)H)H)T
!!& $&&'!+r4   c                   | j                   |   }| j                  |   }|j                  \  }}}}|j                  ||z  ||      }| j	                  |      }|j                  ||||      }| j
                  rt        j                  |      }	nG|j                  d      }t        j                  t        j                  |       }	|	j                  d      }	| j                  j                  |	j                        }
t        j                   |	|
j#                  ddd            }t        j                   |
|      }|d   }t%        d|j                  d         D ]
  }||   |z  } ||fS )
        Compute the delta weight for the given adapter.

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        r   r   rK   )rl   rm   rO   rY   r   r   r-   applysqueezerQ   
block_diagunbindr   r   r   rM   bmmpermuter   rF   r   rl   rm   r]   r^   r_   r`   orth_rotate_butterflyblock_diagonal_butterflyr   butterfly_oft_mat_batchr	  r   s                 r   r   zLinear.get_delta_weight/  sa    W%W%\\
1aQUAq) $ 1 1& 9 5 : :1aA F""'4':':;P'Q$$9$A$A!$D!','7'7F[9\']$'?'I'I!'L$ 8 ? ?@"')),DfnnUVXY[\F]"^"'))F4K"L3A6q177:; 	OA 7 :=N N	O !&((r4   c           	        |j                   }| j                  r4| j                  r| j                           | j                  |g|i |}n| j                  r | j                  |g|i |}nnt        j                  | j                  |j                  |      }t        j                  t        | j                        df|j                  |      }| j                  D ]  }|| j                  j                         vr!| j                  |   }	| j                  |   }
| j                   |   }|	j"                  \  }}}}|	j%                  ||z  ||      }	| j'                  |	      }|j%                  ||||      } ||      }| j(                  rt*        j-                  |      }nG|j/                  d      }t        j0                  t        j2                  |       }|j5                  d      }| j6                  j9                  |      }|j9                  |      }t        j:                  ||j=                  ddd            }t        j:                  ||      }|d   }t?        d|j"                  d         D ]
  }||   |z  } ||z  }|
|z  } |j9                  | jA                         jB                  jD                  j                         }| jA                         jB                  jD                  }t        jF                  |dd      }|j9                  |      }|j9                  |      }t        jH                  ||      }t        jF                  |dd      }||z  }|j9                  |      }| j                  jJ                  4| j                  jJ                  j9                  |      | j                  _%        tM        jN                  ||| j                  jJ                        }|j9                  |      }|S )NrM   r   rK   r   r   )r2   r   bias)(r   disable_adaptersr  r  rr   rQ   rZ   r|   rM   rV   rT   r}   r   rl   r   rm   rp   rO   rY   r   r   r-   r  r  r  r  r   r   r   r  r  r   ry   r   r   r   r   r  Flinear)rF   r\   argsr   previous_dtyperesultboft_rotation
boft_scaler   rl   rm   dropoutr]   r^   r_   r`   r  r  r   r  r	  r   r  rotated_weightscaled_rotated_weights                            r   r/   zLinear.forwardO  ss     {{$T__Q888F[[$T__Q888F!IId&6&6qxx~^MS):):%;Q$?XfgJ"&"6"6 1!)9)9);;^4^4++N;#\\
1aQUAq1(,(9(9&(A%(=(B(B1aA(N%(/0E(F%**/</B/BCX/Y,,A,I,I!,L)/4/?/?NcAd/e,/G/Q/QRS/T, *+C+F+Fq+I(*/))4Lfnn]^`acdNe*f'*/))F<S*T'$;A$>!q"9"?"?"BC WA(?(BEV(V%W !2M A#j0
=1@ T((*1166<<=A--/66;;K//+q!<K),,^<M%..8K"XXm[AN"__^QBN$2Z$?!$9$<$<^$L!##/'+';';'>'>~'N$XXA.C$//J^J^_F>*r4   c                *    t         |          }d|z   S Nzboft.rC   __repr__rF   reprG   s     r   r.  zLinear.__repr__      g }r4   )   r   r   r   FTF)r   r   rn   rT   ro   rT   r   rT   rp   r   r   boolr   Union[bool, str]r   r3  r   r   FNr  r3  r  zOptional[list[str]]r   r   r   r   z!tuple[torch.Tensor, torch.Tensor]r\   torch.Tensorr#  r   r   r   r   r9  r   r   )r:   r;   r<   r=   rD   r
  r  r   r/   r.  rh   ri   s   @r   r{   r{     s      !'(!$)-(-? ? 	?
 ? "%? ? ? '? "&? 
?00<dU,)@>@ r4   r{   c                       e Zd ZdZ	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZd ZdddZddZddZddZ	d fdZ
 xZS )r~   z-
    BOFT implemented in a Conv2d layer.
    c                    t         	|           t        j                  | |       || _        | j	                  ||||||       y r   )rC   rD   rk   r   r   )
rF   rr   r   rn   ro   r   rp   r   r   rG   s
            r   rD   zConv2d.__init__  sF     	4,+/>;RT`bn	
r4   c           	        t               s
d| _        d}nd| _        |dz
  }|dk  rt        d|dz    d      |dkD  rt        |      }nt	        j
                         }| j                  j                  t	        j                  ||i             | j                         }| j                  |j                  d   z  |j                  d   z  }	|dk(  r|dk7  r|	|z  dk7  rt        d	|	 d
| d      |dk7  rV|t        t        j                  |            kD  rt        d|dz    d| d      |d|z  z  dk7  rt        d| d|dz    d      t        |	|z        }n|dk7  r||dk(  rw|	|z  dk7  rt        d	|	 d| d      |dk7  rI|	|d|z  z  k  rt        d|	 d|dz    d| d      |	|d|z  z  z  dk7  rt        d|	 d|dz    d| d      t        |	|z        }nt        d      |dk7  r.|dz  dk7  rt        d| d      |dz  dk7  rt        d| d      t        j                   |dz   |	|	f      }
t#        |dz         D ]G  }| j%                  |	t        |d|z  z        t        |dz        |      }| j'                  |      }||
|<   I | j)                  d|
d       t	        j*                  t        j,                  |dz   |||            | j.                  |<   t	        j*                  t        j0                  dt        | j2                                    | j4                  |<   | j7                  ||       || j8                  |<   || j:                  |<   | j=                  |       | j?                  | j@                         y)zF
        Update the conv2d layer with trainable BOFT weights.
        FrK   Tr   r   r   rg   r   z Convolutional kernel dimension (r   r   r   r   r   r   r   r   z7Invalid combination of convolutional kernel dimension (r   r   r   r   r   r   r   N)!r+   r   rP   rA   rs   r   rp   r   rt   ry   r|   kernel_sizerT   r   r   rQ   r   r   r   r   r   r   rW   rl   rV   r}   rm   r   rn   ro   r   r   r   )rF   r   rn   ro   r   rp   r   r   rr   conv_filter_dimr   r   r   r   s                 r   r   zConv2d.update_layer  s    ~&+D#&'#&*D# #:A"="Q&?@WZ[@[?\\}~ 
 #!;l!K!#  >P/Q!RS ((*
**Z-C-CA-FFI_I_`aIbb aNa$7/14 66GGno}n~  A  B  '!+*S>1J-KK$JKbefKfJgg}  M  ~N  NP  Q  "Q(?%?@AE$*>*:  ;E  F]  `a  Fa  Eb  bd  e  "/^"CDO!n&90A5 66GGop  pA  AC  D  '!+"o<S9S&TU$QRaQbb~  @W  Z[  @[  \  \s  tC  sD  DF  G  #o<S9S&TUYZZ$QRaQbb~  @W  Z[  @[  \  \s  tC  sD  DF  G  !O!CDN l 
 #a'!Q& #3N3CC\!]^^"a' #4_4EE^!_`` KK014oWX.23 	A,,^qQx%@!A3YZGZC[]tD }}T*HAaD	 	XqU;$&LLKK/!3^_Vef%
L! %'LLAs4CTCT?U1V$WL!""<> .=\*,:L)22<@--.r4   c                b   t        | |      }|sy|D ]  }|| j                  j                         v s!| j                         }|j                  j
                  }|r\|j                  j                  j                         }| j                  |      \  }}|j                  | j                  | j                  |j                  d   z  |j                  d   z        }t        j                  |dd      }t        j                  ||j!                  |j
                              }t        j                  |dd      }||z  }|j                  | j                  | j                  |j                  d   |j                  d         }|j#                         j!                  |      | j$                  j                  _        nZ| j                  |      \  }}|j                  j                  j                         }|j                  | j                  | j                  |j                  d   z  |j                  d   z        }t        j                  |dd      }t        j                  ||j!                  |j
                              }t        j                  |dd      }||z  }|j                  | j                  | j                  |j                  d   |j                  d         }|j#                         j!                  |      | j$                  j                  _        | j&                  j)                  |        y)r   Nr   rK   )r
   rl   r   ry   r   r   r   r   r   rY   r}   r|   r>  rQ   r   r   r   r  rr   rw   r  r  s	            r   r
  zConv2d.merge  s    0mD+ '	<N!1!1!33!002
'..44
 #-"3"3"8"8">">"@K040E0En0U-%v"-"2"2))4+;+;j>T>TUV>W+WZdZpZpqrZs+s#K #(//+q!"DK"'((+<knnM^MdMd>e"fK"'//+q!"DK"-"6K"-"2"2))4+;+;Z=S=STU=VXbXnXnopXq#K 3>2H2H2J2M2Mj2YDOO**/040E0En0U-%v","3"3"8"8">">"@K"-"2"2))4+;+;j>T>TUV>W+WZdZpZpqrZs+s#K #(//+q!"DK"'((+<knnM^MdMd>e"fK"'//+q!"DK"-"6K"-"2"2))4+;+;Z=S=STU=VXbXnXnopXq#K 3>2H2H2J2M2Mj2YDOO**/$$++N;O'	<r4   c                   | j                   st        j                  d       yt        | j                        dkD  r| j                  j                         }| j                         }|j                  j                  }|| j                  j                         v rS| j                  |      \  }}|j                  j                  j                         }|j                  | j                  | j                   |j"                  d   z  |j"                  d   z        }t%        j&                  |dd      }t%        j(                  |j+                         |j-                  |j                              }t%        j&                  |dd      }|d|z  z  }|j                  | j                  | j                   |j"                  d   |j"                  d         }|j-                  |      |j                  _        t        | j                        dkD  ryyr  )r  r'   r(   r   rw   r   ry   r   r   rl   r   r   r   r   rY   r}   r|   r>  rQ   r   r   r  r   r  s          r   r  zConv2d.unmergeS  s    {{MM<=$&&'!+!11557N,,.J#**00J!1!1!33,0,A,A.,Q)!6(//44::<)..%%$$z'='=a'@@:CYCYZ[C\\ $ook1a@#hh'8':':'<knnM^MdMd>ef#ook1a@)QZ8)..%%$$**1-**1-	 *5
)C
!!&/ $&&'!+r4   c                   | j                   |   }| j                  |   j                  dd      }|j                  \  }}}}|j	                  ||z  ||      }| j                  |      }|j	                  ||||      }| j                  rt        j                  |      }	nG|j                  d      }t        j                  t        j                  |       }	|	j                  d      }	| j                  j                  |	j                         }
t        j"                  |	|
j%                  ddd            }t        j"                  |
|      }|d   }t'        d|j                  d         D ]
  }||   |z  } ||fS )r  r   rK   r   )rl   rm   r   rO   rY   r   r   r-   r  r  rQ   r  r  r   r   r   rM   r  r  r   r  s                 r   r   zConv2d.get_delta_weights  sn    W%W%//15\\
1aQUAq) $ 1 1& 9 5 : :1aA F""'4':':;P'Q$$9$A$A!$D!','7'7F[9\']$'?'I'I!'L$ 8 ? ?@"')),DfnnUVXY[\F]"^"'))F4K"L3A6q177:; 	OA 7 :=N N	O !&((r4   c           	     	   |j                   }| j                  r4| j                  r| j                           | j                  |g|i |}n}| j                  r | j                  |g|i |}nYt        j                  | j                  | j                  j                  d   z  | j                  j                  d   z  |j                  |j                         }t        j                  t        | j                        df|j                  |j                         }| j                  D ]  }|| j                  j                         vr!| j                  |   }	| j                   |   j#                  dd      }
| j$                  |   }|	j&                  \  }}}}|	j)                  ||z  ||      }	| j+                  |	      }|j)                  ||||      } ||      }| j,                  rt.        j1                  |      }nG|j3                  d      }t        j4                  t        j6                  |       }|j9                  d      }| j:                  j=                  |      }|j=                  |      }t        j>                  ||jA                  ddd            }t        j>                  ||      }|d   }tC        d|j&                  d         D ]
  }||   |z  } ||z  }|
|z  } |j=                  | j                  jD                  jF                  j                         }| j                  jD                  jF                  }|j)                  | j                  | j                  | j                  j                  d   z  | j                  j                  d   z        }t        j"                  |dd      }t        jH                  ||      }t        j"                  |dd      }||z  }|j)                  | j                  | j                  | j                  j                  d   | j                  j                  d         }| jK                  ||j                         }| jK                  | j                  jL                  |j                         }tO        jP                  |||| j                  jR                  d   | j                  jT                  d         }|j=                  |      }|S )Nr   r  rK   r   )r2   r   r  paddingstride)+r   r   r  r  rr   rQ   rZ   r|   r>  rM   rV   rT   r}   r   rl   r   rm   r   rp   rO   rY   r   r   r-   r  r  r  r  r   r   r   r  r  r   r   r   r   _cast_input_dtyper  r!  conv2drD  rE  )rF   r\   r#  r   r$  r%  r&  r'  r   rl   rm   r(  r]   r^   r_   r`   r  r  r   r  r	  r   r  r)  r*  r  s                             r   r/   zConv2d.forward  sA     {{$T__Q888F[[$T__Q888F!II  4??#>#>q#AADOOD_D_`aDbbxxggM
 S):):%;Q$?XYX_X_`J"&"6"6 1!)9)9);;^4^4>>q!D++N;#\\
1aQUAq1(,(9(9&(A%(=(B(B1aA(N%(/0E(F%**/</B/BCX/Y,,A,I,I!,L)/4/?/?NcAd/e,/G/Q/QRS/T,*+C+F+Fq+I(*/))4Lfnn]^`acdNe*f'*/))F<S*T'$;A$>!q"9"?"?"BC WA(?(BEV(V%W !2M A#j0
;1> T__++00667A//0055K%**!!  4??#>#>q#AADOOD_D_`aDbbK  //+q!<K"XXm[AN"__^QBN$2Z$?!$9$>$>!!4#3#3T__5P5PQR5SUYUdUdUpUpqrUs%! &&q*?*E*EFA))$//*>*>@U@[@[\DXX,//2--a0F >*r4   c                *    t         |          }d|z   S r,  r-  r/  s     r   r.  zConv2d.__repr__  r1  r4   )r2  r   r   r   T)rr   r   r   r   rn   rT   ro   rT   r   rT   rp   r   r   r4  r   r   r5  r6  r   r7  r8  r:  )r:   r;   r<   r=   rD   r   r
  r  r   r/   r.  rh   ri   s   @r   r~   r~     s      !'(!)-

 
 	

 
 "%
 
 '
 

&j/X9<vD@)BKZ r4   r~   )
__future__r   r   r   r'   
contextlibr   typingr   r   r   rQ   torch.nnrs   torch.nn.functional
functionalr!  torch.autogradr   peft.tuners.tuners_utilsr	   r
   r!   r   r+   r-   ModulerA   rk   r{   r~   r?   r4   r   <module>rR     s   $ #  	  % ' '     # L 	  &  &F:#H #L2 2jE EPGRYY	 GTORYY	 Or4   