
    bi                        d dl mZ d dlZd dlmZmZ d dlmZmZm	Z	 d dl
mZ d dlmZ ddlmZ e G d	 d
e             Zy)    )annotationsN)	dataclassfield)LiteralOptionalUnion)
PeftConfig)PeftType   )random_maskc                      e Zd ZU dZ edddi      Zded<    eddd	i      Zd
ed<    edddi      Zded<    edddi      Z	ded<    edddi      Z
ded<    edddi      Zded<    edddi      Zded<    fdZ xZS )ShiraConfiga  
    This is the configuration class to store the configuration of a [`ShiraModel`].

    Args:
        r (`int`, *optional*, defaults to `32`):
            For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor
            dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter.
            SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.
        mask_type (`str`, defaults to `random`):
            Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the
            mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting
            `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask
            function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters =
            r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and
            dtype. Please see mask_functions.py for more details and to see the default random sparse mask
            implementation.
        random_seed (`int`, *optional*, defaults to `None`):
            random seed for the torch generator for random_mask.
        target_modules (`Union[List[str], str]`):
            List of module names or regex expression of the module names to replace with SHiRA. For example, ['q', 'v']
            or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.
        fan_in_fan_out (`bool`):
            Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses
            `Conv1D` which stores weights like (fan_in, fan_out) and hence this should be set to `True`.
        init_weights (`bool`, defaults to `True`):
            Initialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values
            instead of zeros and this is used only for testing.
        modules_to_save (`List[str]`):
            List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint.
        helpa3  For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter. SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.)defaultmetadataintrrandomao  Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters = r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and dtype. Please see mask_functions.py for more details and to see the default random sparse mask implementation.zLiteral['random']	mask_typeNz3random seed for the torch generator for random_maskzOptional[int]random_seedzList of module names or regex expression of the module names to replace with SHiRA.For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.zOptional[Union[list[str], str]]target_modulesFzMSet this to True if the layer to replace stores weight like (fan_in, fan_out)boolfan_in_fan_outTzInitialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values instead of zeros and this is used only for testing.init_weightsa  List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint. For example, in Sequence Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved.zOptional[list[str]]modules_to_savec                n   t         |           t        j                  | _        t        | j                  t              rt        | j                        n| j                  | _        | j                  dk(  rt        | _        y | j                  s#t        j                  d| j                  d       d | _        y )Nr   zArgument self.mask_type=ze is not recognized, please supply your own masking function by calling `config.mask_fn = my_mask_fn`.)super__post_init__r
   SHIRA	peft_type
isinstancer   listsetr   r   mask_fninference_modewarningswarn)self	__class__s    S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/shira/config.pyr   zShiraConfig.__post_init__t   s    !(243F3F(MC##$SWSfSf 	 >>X%&DL&&/0  1V  W  DL    )__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   r   r   __classcell__)r*   s   @r+   r   r      s   > s
	As 	 $)z
$I   "'(]^"K  7<4
	7N3 	 !ijND    v
L$  ,1k
	,O( 	   r,   r   )
__future__r   r'   dataclassesr   r   typingr   r   r   peft.configr	   
peft.utilsr
   mask_functionsr   r    r,   r+   <module>r:      s=    #  ( + + "  ' e * e  e r,   