
    bi                        d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ 	 	 	 	 	 	 	 	 	 	 ddZy)zD
This module contains the implementation of the LoraPlus optimizer.
    )annotations)
attrgetterN)	Optimizer)ALL_LAYERNORM_LAYERS)get_parameter_names   )	PeftModel)	Embeddingc                  t        | t              }|D cg c]	  }d|vs| }}i i i i d}| j                         D ]m  \  }}|j                  s t	        |      |       }	t        |	t              r	||d   |<   =d|v s|j                  dk(  r||v r	||d   |<   ]||d   |<   f||d   |<   o ||d	<   |j                  d
d      }
|j                  dd      }t        |d   j                               |
|dt        |d   j                               |
|dt        |d   j                               |
||z  dt        |d   j                               d||z  dg} ||fi |}g d}|j                  |v rmddl}|j                  j                  j                         }| j!                         D ]2  }	t        |	t"        j                        s|j%                  |	dddi       4 |S c c}w )ug  
    Creates a LoraPlus optimizer.

    Efficient Low Rank Adaptation of Large Models: https://huggingface.co/papers/2402.12354

    Reference: https://github.com/nikhil-ghosh-berkeley/loraplus/

    Args:
        model (`torch.nn.Module`): The model to be optimized.
        optimizer_cls (`torch.optim.Optimizer`): The optimizer class to be used.
        lr (`float`): The learning rate to be used for the optimizer.
        loraplus_lr_ratio (`float`):
            The ratio of learning ηB/ηA where ηA (lr) is passed in as the optimizer learning rate. Should be ≥1. Should
            be set in tandem with the optimizer learning rate (lr); should be larger when the task is more difficult
            and the model needs to update its features to learn well. In this case, it helps to make the learning rate
            slightly smaller (e.g., by a factor of 2) than typical vanilla LoRA learning rates
        loraplus_lr_embedding (optional `float`):
            If LoRA modules are added to embedding layers your can specify a different learning rate for them. Default
            value 1e-6.
        kwargs (`dict`): Additional keyword arguments to be passed to the optimizer.

    Returns:
        `torch.optim.Optimizer`: An instance of the specified optimizer class configured with the model's parameters
        organized into groups with custom learning rates.
    bias)groupAgroupBgroupB_no_decay	embeddingr   lora_B   r   r   r   lrloraplus_weight_decayg        loraplus_lr_embeddinggư>)paramsweight_decayr   )Adam8bit	AdamW8bitPagedAdam8bitPagedAdamW8bitr   Nweight
optim_bits    )r   r   named_parametersrequires_gradr   
isinstancer
   ndimpoplistvalues__name__bitsandbytesoptimGlobalOptimManagerget_instancemodulesnnregister_module_override)modeloptimizer_clsr   loraplus_lr_ratiokwargsdecay_parametersnameparam_groupsparammoduler   r   optimizer_grouped_parameters	optimizereight_bit_namesr'   managers                    S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/peft/optimizers/loraplus.pycreate_loraplus_optimizerr<       s;   : +52FG)9PV4=OPP	L --/ 1e""!D!%(fi(.3L%d+q''/4X&t,8=./5+0L"4(1 F4L"JJ'>D"JJ'>E <188:;1	
 <4;;=>1'	
 <188:;1((	
 <(9:AACD((	
!$ . :EfEIRO0$$77DDFmmo 	WF&",,/00LRTCUV	W w Qs
   	G#G#)
r.   r	   r/   ztype[Optimizer]r   floatr0   r=   returnr   )__doc__
__future__r   operatorr   torch.nnr,   torch.optimr   transformers.pytorch_utilsr   transformers.trainer_pt_utilsr   
peft_modelr	   tuners.lora.layerr
   r<        r;   <module>rJ      sW    #   ! ; = " )YY%4Y=BYW\YYrI   