
    biU^                       d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& 	 	 	 	 	 	 ddZ' G d de      Z(y)    )annotationsN)asdict)Enum)OptionalUnion)is_bf16_available)tqdm)Conv1D)is_bnb_4bit_availableis_bnb_available)	BaseTunerBaseTunerLayercheck_target_module_exists)6TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPINGModulesToSaveWrapper_get_submodules   )
BufferDict) _maybe_include_all_linear_layers   )RandLoraConfig)LinearRandLoraLayerc                |   t        | t              r@t        j                  | t	               rt        j
                  nt        j                        }n| }t        j                         5  t        j                  j                  j                  |t        j                  d      |      }|cddd       S # 1 sw Y   yxY w)a  
    Kaiming Uniform Initialisation adapted to accept a `torch.Generator` object for PRNG.

    Args:
        tensor_or_shape (`Union[torch.Tensor, tuple[int, ...]]`):
            Tensor to initialise, or shape of new tensor to create and then initialise.
        generator: (`torch.Generator`):
            Generator object that manages the state of the PRNG algorithm in use.

    Returns:
        `torch.Tensor`: The initialised tensor.
    )dtype   )a	generatorN)
isinstancetupletorchemptyr   bfloat16float16no_gradnninitkaiming_uniform_mathsqrt)tensor_or_shaper   tensorbasiss       U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/tuners/randlora/model.py_kaiming_initr/   +   s      /5)$5$7%..U]]

 !	 ..v1QZ.[  s   'AB22B;c                      e Zd ZU dZdZded<   ddZdddZddZddZ	dd	Z
ed
        Zd Zed        Zd dZed        Zd! fdZd"d#dZd$dZd Zd Zd Zed        Z	 	 	 	 d%	 	 	 	 	 d&dZd'dZ	 d(	 	 	 	 	 d&dZd Z xZS ))RandLoraModela6  
    Creates a RandLoRA model from a pretrained transformers model.

    Args:
        model ([`~transformers.PreTrainedModel`]): The model to be adapted.
        config ([`RandLoraConfig`]): The configuration of the RandLora model.
        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
        low_cpu_mem_usage (`bool`, `optional`, defaults to `False`):
            Create empty adapter weights on meta device. Useful to speed up the loading process.

    Returns:
        `torch.nn.Module`: The RandLora model.

    Example:

        ```py
        >>> from transformers import AutoModelForCausalLM
        >>> from peft import RandLoraConfig, get_peft_model

        >>> base_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
        >>> config = RandLoraConfig(r=32)
        >>> model = get_peft_model(base_model, config)
        ```

    **Attributes**:
        - **model** ([`~transformers.PreTrainedModel`]) -- The model to be adapted.
        - **peft_config** ([`RandLoraConfig`]): The configuration of the RandLora model.
    	randlora_strprefixc                   | j                  | j                        }| j                  ||      }t        || j                        }d}| j                  j	                         D ]  \  }}| j                  ||      st        |t        j                        r|j                  |j                  f}n\t        |t              rKt        |j                  d      r|j                  j                  n|j                  j                  }|ddd   }n||}||k7  st!        d t#        ||      D              } |d}t%        |      |S )z
        Finds the largest input and output dimensions across linear layers that have been wrapped with RandLora.

        This will be used for determining the size of the shared randlora_A and randlora_B matrices.
        Nds_shapec              3  :   K   | ]  \  }}t        ||        y wN)max).0r   bs      r.   	<genexpr>z*RandLoraModel._find_dim.<locals>.<genexpr>   s     %]DAqc!Qi%]s   z_No layers types compatible with RandLora were found. Please check `peft_config.target_modules`.)get_model_configmodel_prepare_adapter_configr   named_modules_check_target_module_existsr   r&   r   out_featuresin_featuresr
   hasattrweightr6   shaper    zip
ValueError)	selfconfigmodel_configpeft_configlargest_shapekeymodulemodule_shapemsgs	            r.   	_find_dimzRandLoraModel._find_dimh   s)    ,,TZZ8226<H6{DJJO::335 	^KC33KE&")),%22F4F4FFFF+9@PZ9[v}}55agananatat+DbD1$ ,}, %%]C|<\%] ]#	^&  sCS/!    c                   | j                  |      \  }}t        ||      t        ||      }}t        i |j                        | _        t        i |j                        | _        t        j                  d      j                  |j                        }t        j                  |j                  d|f|      }	||j                  z  }
|
j                         rt        |
      nt        |
      dz   }
t        j                  ||
|j                  f|      }t        j                  |j                         }t        j                  |	j                         }d||dd|z  z  k  <   d||ddd|z  z  z
  kD  <   d||	dd|z  z  k  <   d||	ddd|z  z  z
  kD  <   ||j#                         z  ||j#                         z  }}	|	| j
                  |<   || j                  |<   y)	zn
        Sparse random projections as described in https://cs-people.bu.edu/evimaria/cs565/kdd-rp.pdf
        
persistentcpudevicer   r   r7   r   N)rS   r:   minr   save_projection
randlora_A
randlora_Br!   	Generatormanual_seedprojection_prng_keyrandr
is_integerintzerosrG   std)rJ   rK   adapter_namesparsitylinear_out_dimlinear_in_dimmax_dimmin_dimr   r^   	num_basesr_   randlora_B_sparserandlora_A_sparses                 r.   "_init_randlora_A_randlora_B_sparsez0RandLoraModel._init_randlora_A_randlora_B_sparse   s   
 )-v(>%~}=s>S`?a %RF4J4JK$RF4J4JK OO51==f>X>XY	 ZZ1g 6)L
 fhh&	&/&:&:&<C	N#i.STBT	ZZ)VXX >)T
 "KK
(8(89!KK
(8(89=?*qAL'99:AB*q1H+='==>=?*qAL'99:AB*q1H+='==>  1 5 5 77 1 5 5 77 
 )3%(2%rT   c           
         | j                  |      \  }}t        ||      t        ||      }}t        i |j                        | _        t        i |j                        | _        t        j                  d      j                  |j                        }t        |j                  d|f|      }t        ||      |j                  z  }	|	j                         rt        |	      nt        |	      dz   }	t        j                  t!        |	      D 
cg c]  }
t        |d|j                  f|       c}
d      }||j#                         z  ||j#                         z  }}|| j
                  |<   || j                  |<   y c c}
w )NrV   rX   rY   r   r[   )dim)rS   r:   r\   r   r]   r^   r_   r!   r`   ra   rb   r/   rd   re   rf   catrangerh   )rJ   rK   ri   rk   rl   rm   rn   r   r^   ro   _r_   s               r.   _init_randlora_A_randlora_Bz)RandLoraModel._init_randlora_A_randlora_B   sD   (,v(>%~}=s>S`?a %RF4J4JK$RF4J4JK OO51==f>X>XY	 #FHHa#9YO
 6A	&/&:&:&<C	N#i.STBT	YYQVW`QabA]GQ1YGbhi


 ",jnn.>!>
Z^^M]@]J
(2%(2% cs   !E;c           
        |j                   rF| j                  |      \  }}| j                  ||t        j                  t        ||                   y |j                  r| j                  ||d       y | j                  ||       y )N)rj      )very_sparserS   rr   r)   r*   r\   sparserx   )rJ   r?   rK   ri   rk   rl   s         r.   _pre_injection_hookz!RandLoraModel._pre_injection_hook   sw    ,0NN6,B)NM33tyy^]9[/\ 4  ]]33FLST3U,,V\BrT   c                   t        | j                        dkD  r1|j                  dk7  r"t        | j                  j
                   d      | j                  j                         D ]F  }||u r|j                  |j                  k7  s"t        d|j                  d|j                   d       t        | j                  j                         D ch c]  }|j                   c}      }t        |      dkD  rt        d|       yc c}w )	z
        A helper method to check the config when a new adapter is being added.

        Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.

        r   nonezf supports only 1 adapter with bias. When using multiple adapters, set bias to 'none' for all adapters.zcRandLora PRNG initialisation key must be the same for all adapters. Got config.projection_prng_key=z but previous config had .zgRandLora projection weights must be saved for all adapters or none, but got multiple different values: N)
lenrM   biasrI   	__class____name__valuesrb   sortedr]   )rJ   rK   existing_configsave_project_unique_valuess       r.   _check_new_adapter_configz'RandLoraModel._check_new_adapter_config   s       !A%FKK6,A>>**+ ,7 7 
  $//668 		O&(22f6P6PP z_e_y_y^{ |++:+N+N*OqR 		 &,RVRbRbRiRiRk,lV-C-C,l%m")*Q.y-.0  / -ms   Dc                    t        | |      S r9   )r   )randlora_configrO   s     r.   rB   z)RandLoraModel._check_target_module_exists   s    )/3??rT   c                   |t        d      |j                  }t        |d      xr |j                  d u}	||j                  |j
                  |j                  |j                  t        | j                  dd      t        | j                  dd      d}
|	|
d<   t        |t              rJ|j                  || j                  | j                  ||j                  |j
                  |j                         y  | j                  || j                  | j                  ||fi |
}|| j                   vr|j#                  d       | j%                  ||||       y )NzCurrent Key shouldn't be `None`r   is_loaded_in_8bitFis_loaded_in_4bit)rd   randlora_alpharandlora_dropoutfan_in_fan_outinit_weightsloaded_in_8bitloaded_in_4bit)rI   rd   rE   r   r   r   r   r   getattrr?   r   r   update_layerr^   r_   _create_new_moduleactive_adapterrequires_grad__replace_module)rJ   r   ri   targettarget_nameparentcurrent_keyoptional_kwargsrd   r   kwargs
new_modules               r.   _create_and_replacez!RandLoraModel._create_and_replace  s?    >??vv&B6;;d+B-<< / @ @-<<+88%djj2EuM%djj2EuM
 vff%..00,, 100$//<QW[aJ 4#6#66))%0  j&IrT   c                   t        | ||       t        |d      r|j                  }t        |d      s.|j                  |_        t        |d      r|j                  |_        t        |dd       ^t        |d      r|j                  |j                  _        n|j                  |_        |j                  |j                  j                         t        j                  d      |j                         D ]R  \  }}d|v st        fd|j                         D              r.|j                  |j                  j                         T y )N
base_layerr   statemetar2   c              3  <   K   | ]  }|j                   k(    y wr9   rY   )r;   pr   s     r.   r=   z0RandLoraModel._replace_module.<locals>.<genexpr>H  s     I188t+Is   )setattrrE   r   rF   r   r   r   torZ   r!   rA   any
parameters)r   
child_namer   childnamerP   r   s         @r.   r   zRandLoraModel._replace_module.  s    
J/
 5,'$$Ez<0 %Juf%"'**
5'4(4z<0.3kk
%%+#(;;
 MM%,,--.||F#&446 	3LD&d"IV5F5F5HIIIIell112	3rT   c                   |j                         D ]  \  }}| j                  |vsd|_         | j                  D ]  }| j                  |   j
                  }|dk(  r"|dk(  r%|j                         D ]  \  }}d|v sd|_         L|dk(  rR|j                         D ]>  }t        |t              st        |d      s!|j
                  .d|j
                  _        @ t        d| d       y )	NFr   allr   Trandlora_onlyzRequested bias: z, is not implemented.)named_parametersr4   requires_gradactive_adaptersrM   r   modulesr   r   rE   NotImplementedError)rJ   r?   nr   r   r   ms          r.    _mark_only_adapters_as_trainablez.RandLoraModel._mark_only_adapters_as_trainableK  s    **, 	(DAq{{!#"'	( #22 	ZN##N388Dv~u}!224 /DAq{*./ ( 4A!!]368JqvvOa/3,4 *,<TFBW*XYY	ZrT   c                b   t               r
dd l}ddlm} t	               rddlm} |j                  dd      }	|j                  dd      }
|j                  dd      }t        |t              r|j                         }n|}|
rt        |j                  j                        rc|j                         }|j                  |j                  j                  |j                  j                   |j"                  d	        ||||fi |S |rt        |j                  j
                        rc|j                         }|j                  |j$                  |j&                  j(                  |j&                  j*                  d
        ||||fi |S t        |t,        j                  j.                        r'|d   rmt1        j2                  d       dx|d<   | _        nKt        |t6              r,d|d<   |d   s1t1        j2                  d       dx|d<   | _        nt9        d| d      t/        ||||fd|	i|}|S )Nr   r   )Linear8bitLt)
Linear4bitr   Fr   r   )has_fp16_weights	thresholdindex)compute_dtypecompress_statistics
quant_typer   zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.Tis_target_conv_1d_layerzafan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.zTarget module z is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `transformers.pytorch_utils.Conv1D`.)r   bitsandbytesbnbr   r   r   popgetr   r   get_base_layerr&   copyupdater   r   r   r   r   rF   r   r   r!   r   warningswarnr   r
   rI   )r   r^   r_   ri   r   r   r   r   r   r   r   r   target_base_layereightbit_kwargsfourbit_kwargsr   s                   r.   r   z RandLoraModel._create_new_module`  s0    &) "'zz&%($4e<$4e<fn- & 5 5 7 &j):CFF<O<OP$kkmO""(9(?(?(P(P!2!8!8!B!B.44  j*`P_``
+<cff>O>O P#[[]N!!%6%D%D+<+C+C+W+W"3":":"E"E flJ
]n]])588??;&'7 MRQ'(?+I)6204F,-*+w MQP'(?+I  )J J  	

 
 

 rT   c                z    	 t         |   |      S # t        $ r |dk(  r t        | j                  |      cY S w xY w)z1Forward missing attributes to the wrapped module.r?   )super__getattr__AttributeErrorr   r?   )rJ   r   r   s     r.   r   zRandLoraModel.__getattr__  sB    	-7&t,, 	-w4::t,,	-s    %::c           
        i }| j                   j                         D ]U  \  }}t        |      j                         D ci c]$  \  }}|t        |t              r|j
                  n|& }}}|sQd|d<   W |<   |S c c}}w )NTinference_mode)rM   itemsr   r   r   value)rJ   	inferenceconfig_dictrO   r   kvrK   s           r.   get_peft_config_as_dictz%RandLoraModel.get_peft_config_as_dict  s    **002 	0JCKQRW=K^K^K`a41aaJq$$7Q>aFa+/'(	0 "C	 bs   )A<c                    | j                   j                         D ]*  }t        |t        t        f      s|j                  |       , y r9   )r?   r   r   r   r   enable_adapters)rJ   enabledrP   s      r.   _set_adapter_layersz!RandLoraModel._set_adapter_layers  s<    jj((* 	0F&>3G"HI&&w/	0rT   c                (    | j                  d       y )NTr   )r   rJ   s    r.   enable_adapter_layersz#RandLoraModel.enable_adapter_layers  s       .rT   c                    | j                   D ]<  }| j                  |   j                  }|dk7  s"d| d}t        j                  |       > | j                  d       y )Nr   z>Careful, disabling adapter layers with bias configured to be 'zL' does not produce the same output as the base model would without adaption.Fr   )r   rM   r   r   r   r   )rJ   r   valrR   s       r.   disable_adapter_layersz$RandLoraModel.disable_adapter_layers  sp    "22 	#N"">277Cf}TUXTY ZG G  c"	# 	   /rT   c                    | j                   j                         D ]U  }t        |t              s|j                  r%t        j                  d       |j                          |j                  |       W || _	        y )NzJAdapter cannot be set when the model is merged. Unmerging the model first.)
r?   r   r   r   mergedr   r   unmergeset_adapterr   )rJ   ri   rP   s      r.   r   zRandLoraModel.set_adapter  s^    jj((* 	1F&-0==MM"noNN$""<0	1 +rT   c                ~    | j                   0|d   t        vrt        d      t        t        |d            | _         | S )N
model_typez0Please specify `target_modules` in `peft_config`)target_modulesr   rI   set)rM   rL   s     r.   r@   z%RandLoraModel._prepare_adapter_config  sK    %%-L)1gg !STT),F|T`Gab*K& rT   c                   | j                   j                         D cg c]  \  }}d|vs| }}}d|rdndz   dz   }t        || |      D ]  }	 t        | j                   |      \  }	}
}t        |
d      r8|r|
j                  ||       | j                  |	||
j                         |
       bt        |
t              sst        |	||
j                  |
j                             | j                   S c c}}w # t        $ r Y w xY w)	Nrandloraz
Unloading zand merging  r?   )disabledescr   )
safe_mergeadapter_names)r?   rA   r	   r   r   rE   merger   r   r   r   r   modules_to_saver   )rJ   r   progressbarr   r   rO   rw   key_listr   r   r   r   s               r.   _unload_and_optionally_mergez*RandLoraModel._unload_and_optionally_merge  s    '+jj&>&>&@ZFCJVYDYCZZ~B?'Ik/E 	\C.=djj#.N+ v|,LLJmLT$$V[&:O:O:QSYZF$89V-C-CFDYDY-Z[	\ zz# [
 " s   C2C2C88	DDc                   |t        | j                  j                               vrt        d| d      | j                  |= | j                  j                         D cg c]  \  }}d|vs| }}}d}|D ]P  }t        | j                  |      \  }}}t        |t              s.|j                  |       |B|j                  dd }R |xs g | _
        | j                  ||       yc c}}w )z
        Deletes an existing adapter.

        Args:
            adapter_name (str): Name of the adapter to be deleted.
        zAdapter z does not existr   N)new_active_adapters)listrM   keysrI   r?   rA   r   r   r   delete_adapterr   _delete_auxiliary_adapter)rJ   ri   rO   rw   r   new_adapterr   s          r.   r  zRandLoraModel.delete_adapter  s     tD$4$4$9$9$;<<x~_EFF\* '+jj&>&>&@ZFCJVYDYCZZ 	;C*4::s;LAvq&-0%%l3&"("7"7":K	; */R&&|&U [s   C),C)c                *    | j                  |||      S )aL  
        This method merges the RandLora layers into the base model. This is needed if someone wants to use the base
        model as a standalone model.

        Args:
            progressbar (`bool`):
                whether to show a progressbar indicating the unload and merge process
            safe_merge (`bool`):
                whether to activate the safe merging check to check if there is any potential Nan in the adapter
                weights
            adapter_names (`list[str]`, *optional*):
                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
                to `None`.

        Example:

        ```py
        >>> from transformers import AutoModelForCausalLM
        >>> from peft import PeftModel

        >>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
        >>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
        >>> model = PeftModel.from_pretrained(base_model, peft_model_id)
        >>> merged_model = model.merge_and_unload()
        ```
        )r   r   r   r   )rJ   r   r   r   s       r.   merge_and_unloadzRandLoraModel.merge_and_unload  s#    : 00#
- 1 
 	
rT   c                &    | j                  d      S )z
        Gets back the base model by removing all the RandLora modules without merging. This gives back the original
        base model.
        F)r   r  r   s    r.   unloadzRandLoraModel.unload2  s    
 00u0==rT   )returnztuple[int, int])rz   )rK   r   ri   r3   rj   rf   r	  None)rK   r   ri   r3   r	  r
  )r?   	nn.ModulerK   r   ri   r3   r	  r
  )rK   r   r	  r
  )r?   r  r	  r
  )r   r3   )F)r   bool)T)TFFN)r   r  r   r  r   zOptional[list[str]])ri   r3   )FFN)r   
__module____qualname____doc__r4   __annotations__rS   rr   rx   r}   r   staticmethodrB   r   r   r   r   r   r   r   r   r   r   r@   r   r  r  r  __classcell__)r   s   @r.   r1   r1   H   s   : FC#J'3R36	C D @ @*JX 3 38Z* C CJ-0
/	0+   ! -1  	
 +6V2 im

59
Re
B>rT   r1   )r+   z$Union[torch.Tensor, tuple[int, ...]]r   ztorch.Generatorr	  ztorch.Tensor))
__future__r   r)   r   dataclassesr   enumr   typingr   r   r!   torch.nnr&   accelerate.utils.importsr   r	   transformers.pytorch_utilsr
   peft.import_utilsr   r   peft.tuners.tuners_utilsr   r   r   
peft.utilsr   r   r   _buffer_dictr   tuners_utilsr   rK   r   layerr   r   r/   r1    rT   r.   <module>r!     sv    #     "   6  - E Z Z  & ; " (9 :o>I o>rT   