
    biw              	       V   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ  e       rd d
lmZmZm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& g dZ' G d de	jP                        Z)	 dde)dee*   dee+   de)fdZ, G d de      Z-y)    N)deepcopy)Optional)PartialState)hf_hub_download)EntryNotFoundErrorHFValidationErrorLocalEntryNotFoundErrorRepositoryNotFoundError)	load_file)GenerationMixinPreTrainedModelis_torch_npu_availableis_torch_xpu_available)is_peft_available)
PeftConfig	PeftModelPeftModelForCausalLMPeftModelForSeq2SeqLMPromptLearningConfigget_peft_modelprepare_model_for_kbit_training)is_deepspeed_zero3_enabled)ztransformer.h.{layer}zmodel.decoder.layers.{layer}zgpt_neox.layers.{layer}zmodel.layers.{layer}c                        e Zd ZdZdZdZdZdZ e       se	ne	e
efZ	 d fd	Zed        Ze	 	 	 dd       Zed        Zed	        Ze	 dd
       Zd Zd Zd Zd ZddZ xZS )PreTrainedModelWrappera.  
    A wrapper class around a (`transformers.PreTrainedModel`) to be compatible with the (`~transformers.PreTrained`)
    class in order to keep some attributes and methods of the (`~transformers.PreTrainedModel`) class.

    Attributes:
        pretrained_model (`transformers.PreTrainedModel`):
            The model to be wrapped.
        parent_class (`transformers.PreTrainedModel`):
            The parent class of the model to be wrapped.
        supported_args (`list`):
            The list of arguments that are supported by the wrapper class.
    N)v_head)scorec                    t         |           || _        |j                  | _        |j                  | _        t        |dd      | _        t        |dd      | _        d| _        t        |d      r|j                  | _
        t        |d      r|j                  | _        t        |d      r|j                  | _        || _        || _        d| _        ||| _        y y )Nis_loaded_in_8bitFis_loaded_in_4bitgradient_checkpointing_disablegradient_checkpointing_enableenable_input_require_gradsdefault)super__init__pretrained_modelconfigprepare_inputs_for_generationgetattrr   r   is_sequential_parallelhasattrr    r!   r"   supports_rm_adapterrm_adapter_namepolicy_adapter_namer   )selfr&   score_moduler,   r-   kwargs	__class__s         S/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/models/modeling_base.pyr%   zPreTrainedModelWrapper.__init__S   s     	 0&---=-[-[*!()9;NPU!V!()9;NPU!V&+##%EF2B2a2aD/#%DE1A1_1_D.#%AB.>.Y.YD+#6 .#, #%DJ $    c                 Z   |p|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }| j                  |      \  }}	}
|	j                  dd      }nd}d}i }i }	i }
d}t        |t              st        d      d}| j                         }t        |t              rd	|	v r|	d	   nd}d
|	v r|	d
   nd}nt        |dd      }t        |dd      }|s|r d|	vrt        j                  d       d|i|	d<   t               r|t        |t              st        d      t        |t              rzt               r	 t        |d|      }nd}t"        j$                  j'                  t"        j$                  j)                  |d            }|s|t               r|t        j                  d|        |rt        j*                  |      }n4t"        j$                  j-                  |      }t        j*                  |      } | j.                  j*                  |j0                  g|i |	}t3        j*                  ||||      }t        j4                  d       n | j.                  j*                  |g|i |	}||s|rt7        |fi |
}t9        ||      }t        j4                  d       nst        || j:                        rF|}|Yt        |t<              rI|s|rt7        |fi |
}t9        ||      }t        j4                  d       nt        dt?        |             t               rCt        |t2              r3d}tA        |d      r%t        |jB                  tD              rt        d      |s|t        d      |r|| jG                  |||      }|d|d}nddi} | |fi ||}d}t        |t              rt"        j$                  j)                  |d      }t"        j$                  j)                  |d      }t"        j$                  j)                  |d       }t"        j$                  j)                  |d!      }d}t"        j$                  j'                  |      }t"        j$                  j'                  |      s^t"        j$                  j'                  |      s?| jI                  ||||      \  }}}}|!|| jI                  ||||dd!"      \  }}}}d}nd}|rtJ        ntL        jN                  } |ri nd#dd$}!|rP|r0i }"D ](  }#t        ||#|      }|"jQ                   | |fi |!       * n | |s|n|fi |!}"n|jS                         }"||_*        ||_+        |r|jY                  "%       |S # t        t        t        t         f$ r d}Y Bw xY w)&ay  
        Instantiates a new model from a pretrained model from `transformers`. The pretrained model is loaded using the
        `from_pretrained` method of the `transformers.PreTrainedModel` class. The arguments that are specific to the
        `transformers.PreTrainedModel` class are passed along this method and filtered out from the `kwargs` argument.

        Args:
            pretrained_model_name_or_path (`str` or `transformers.PreTrainedModel`):
                The path to the pretrained model or its name.
            *model_args (`list`, *optional*)):
                Additional positional arguments passed along to the underlying model's `from_pretrained` method.
            **kwargs (`dict`, *optional*):
                Additional keyword arguments passed along to the underlying model's `from_pretrained` method. We also
                pre-process the kwargs to extract the arguments that are specific to the `transformers.PreTrainedModel`
                class and the arguments that are specific to trl models. The kwargs also support
                `prepare_model_for_kbit_training` arguments from `peft` library.
        Npeft_configreward_adapterreward_adapter_nameis_trainableFtokenzThe `reward_adapter` argument should be a string representing the name of local path or the Hub id to the Reward Modeling adapter.load_in_8bitload_in_4bitr   r   
device_mapzThe `device_map` argument is not provided. We will override the device_map argument. to set the entire model on the current device. If you want to set the model on multiple devices, please provide a custom `device_map` argument. zLThe `peft_config` argument should be an instance of `peft.PeftConfig` class.zadapter_config.jsonr:   zE`peft_config` argument ignored since a peft config file was found in )r9   r:   zTrained peft adapter loadedzpeft adapter initialisedzNpretrained_model_name_or_path should be a string or a PreTrainedModel, but is Tactive_peft_configz7PromptLearningConfig is not supported for PPO training.z2reward_adapter can only be used with a PeftModel. )r0   r,   r-   r,   zmodel.safetensorspytorch_model.binpytorch_model.bin.index.jsonzmodel.safetensors.index.json)r:   
model_namemodel_index_namecpumap_locationweights_only)
state_dict)-pop_split_kwargsget
isinstancestr
ValueError_get_current_devicer)   loggingwarningr   r   r   r   r	   r   r
   ospathexistsjoinfrom_pretraineddirnametransformers_parent_classbase_model_name_or_pathr   infor   r   (supported_pretrained_model_architecturesr   typer+   r@   r   $add_and_load_reward_modeling_adapter_get_checkpoint_from_hubsafe_load_filetorchloadupdaterI   is_peft_modelcurrent_device	post_init)$clspretrained_model_name_or_path
model_argsr1   r6   r7   r8   r9   trl_model_argspretrained_kwargspeft_quantization_kwargsr:   rd   re   r   r   remote_adapter_configlocal_adapter_presenttrained_adapter_configremote_adapter_dirr&   r0   multi_adapter_argsmodelis_resuming_trainingsafe_filenamefilenamesharded_index_filenamesafe_sharded_index_filename
is_shardeduse_safefiles_to_downloadloading_funcload_kwargsrI   
shard_files$                                       r3   rW   z&PreTrainedModelWrapper.from_pretrainedn   sJ   $  **]D9K#ZZ(8$?N"(**-BDT"U!::ne<LJMJ[J[\bJcGN-/G%))'48EK LN "')$E%j.M U  0023S9ESWhEh 1. AnsESWhEh 1. Ans '(EGZ\a b '(EGZ\a b!2L]8]OO3 02>.Bl+;#::k[eCfkll 3S9 "1,;5-#-) )-%$&GGNN277<<@]_t3u$v!%)>)JPaPc*OO_89; )-7-G-GHe-f*)+9N)O&-7-G-GHZ-[* $Q3#@#@#P#P*BB$EO$Sd$ 
 $-#<#<$&CR^fk$  :;#P3#@#@#P#P1$4>$BS$  *(,=+J,,6,( (66F'T$LL!;<5s7c7cd<&:6F+X$(9'F((2($ $22BK#P 78<=>@ 
 *I6 $+-ABz$779MH %%^__ !;QRR~9CC .2EU D L !-'+#6" #8!? $M(:MnM  $3S9GGLL)FH[\Mww||$ACVWH%'WW\\2OQo%p"*,'',,7TVt*u'Jww~~m4HGGNN8,}0MPSPlPl$1*	 Qm QM+Z9M #(9(AY\YuYu(53##6)G Zv ZVM#4jBV  $H$H-5>5::L ("uVZ.[K#!#J&7 Q
#29&"'$
 #)),x*O;*OPQ ".hhM!i]h!iJ 7AACJ+-OOzO2q +,CEVXop 1,0)1s   V V*)V*c           	         d }d }d}	d}
	 t        |||      }|||
|	fS # t        t        t        t        f$ r t
        j                  j                  |      r|}nV	 t        |||      }nF# t        t        t        t        f$ r* d}	t        j                  dt        |       d| d       Y nw xY w|	rt              5 }t        j                  |      }d d d        n# 1 sw Y   nxY wt               }d   j                         D ]5  \  }t!        fd| j"                  D              s%|j%                  |       7 d}
Y *w xY w)	NTFr?   zA z model is loaded from 'zX', and no v_head weight is found. This IS expected if you are not resuming PPO training.
weight_mapc              3   &   K   | ]  }|v  
 y wN ).0moduleks     r3   	<genexpr>zBPreTrainedModelWrapper._get_checkpoint_from_hub.<locals>.<genexpr>~  s     K66Q;K   )r   r   r	   r   r
   rS   rT   rU   rQ   rR   r]   openjsonrb   setitemsanysupported_modulesadd)rg   r&   rh   index_filenamer:   rC   rD   rz   ru   rs   rx   index_file_namefindexvr   s                  @r3   r_   z/PreTrainedModelWrapper._get_checkpoint_from_hubP  sj    !#
!	"&-HD *J8LLL9 #$;=NPgh 	"ww~~n-"0&55(#'O
 +,CEVXop +0(OOT"2344KLiKj kp q $/* )a IIaLE) ) ) %(E!!,/557 1DAqKS5J5JKK)--a01 "
5	"sS    <EA+*E+A B.+E-B..E>C	EC&	"AE+EEc                     t               }t        j                  j                         s
t	               r|j
                  S t               rd|j
                   S y)a<  
        Get the current device. For GPU & XPU, we return the local process index using the `accelerate.PartialState`
        object to handle corner cases when running scripts in distributed environments.

        Returns:
            current_device (`Union[int, str]`):
                The current device.
        znpu:rE   )r   ra   cudais_availabler   local_process_indexr   )rg   states     r3   rP   z*PreTrainedModelWrapper._get_current_device  sJ     ::""$(>(@,,,#%%33455r4   c                    d}t               rddlm} d}i }i }i }|j                         D ]U  \  }}|| j                  v r|||<   n|||<   |s"|j
                  j                  v s;|||<   ||v sE|j                  |       W |||fS )z|
        Separate the kwargs from the arguments that we support inside `supported_args` and the ones that we don't.
        Fr   )r   T)r   peftr   r   supported_args__code__co_varnamesrJ   )	rg   r1   check_peft_kwargsr   supported_kwargsunsupported_kwargspeft_kwargskeyvalues	            r3   rK   z$PreTrainedModelWrapper._split_kwargs  s    
 "< $ ,,. 
	4JCc((((- %*/"3' 9BBNNN',K$00*..s3
	4  !3[@@r4   c                    |j                  ||d       |j                          t        j                  j	                  |d      }d}t        j                  j                  |      s	 t        |d|      }n|}|rt        nt        j                  }	|ri nd	dd
}
 |	|fi |
}| j                  D ](  t        fd|j                         D              s&} n i }|j                         D ]O  \  }}|v sdj	                  |j!                  d      dd       }|j#                  | j%                               ||<   Q |d   j&                  \  }}t        d |j                         D              }t)        j*                  |||      j#                  | j%                         |j,                        }|j/                  |       |j1                         D ]	  }d|_         |S # t        $ rt t        j                  j	                  |d      }d}t        j                  j                  |      s-	 t        |d|      }n# t        $ r}t        d      |d}~ww xY w|}Y w xY w)a]  
        Add and load a reward modeling adapter. This method can only be used if the model is a `PeftModel` and if you
        have initialized the model with the `reward_modeling_adapter_id` argument, pointing to the id of the reward
        modeling adapter. The latest needs also to contain the score head in order to produce the reward.
        F)r9   zadapter_model.binr?   zadapter_model.safetensorsTzYCould not find adapter model in the Hub, make sure you have the correct adapter model id.NrE   rF   c              3   &   K   | ]  }|v  
 y wr   r   )r   namescore_name_candidates     r3   r   zNPreTrainedModelWrapper.add_and_load_reward_modeling_adapter.<locals>.<genexpr>  s     VD'4/Vr   .weightc              3   $   K   | ]  }d |v  
 yw)biasNr   )r   r   s     r3   r   zNPreTrainedModelWrapper.add_and_load_reward_modeling_adapter.<locals>.<genexpr>  s     L$v~Ls   )r   )devicedtype)load_adaptertrainrS   rT   rV   rU   r   	ExceptionrO   r`   ra   rb   supported_rm_modulesr   keysr   splittorP   shapennLinearr   load_state_dict
parametersrequires_grad)rg   r&   adapter_model_idadapter_namer:   ru   safe_loadinglocal_filenameexcr{   r|   adapter_state_dict
score_name
score_dictr   paramkey_name
num_labels
hidden_dimhas_biasr   r   s                        @r3   r^   z;PreTrainedModelWrapper.add_and_load_reward_modeling_adapter  s\    	%%&6SX%Y 77<< 02EFww~~h'.!0$'", &N)5~5::(buVZ.[).HKH$'$<$< 	 V<N<S<S<UVV1
		 
-335 	KKD%T!88DJJsOBC$89',xx0G0G0I'J
8$	K
 ",H!5!;!;
JL2D2I2I2KLL		*jx@CC**,"(( D 
 	j)%%' 	(E"'E	( a  .77<<(8:UV#ww~~h/	#)8,7"'*
 % #(w"##
 &.N.s7   (G   A
I+H:9I:	IIIIIc                     t         )a  
        Push the pretrained model to the hub. This method is a wrapper around
        `transformers.PreTrainedModel.push_to_hub`. Please refer to the documentation of
        `transformers.PreTrainedModel.push_to_hub` for more information.

        Args:
            *args (`list`, *optional*):
                Positional arguments passed along to the underlying model's `push_to_hub` method.
            **kwargs (`dict`, *optional*):
                Keyword arguments passed along to the underlying model's `push_to_hub` method.
        NotImplementedErrorr/   argsr1   s      r3   push_to_hubz"PreTrainedModelWrapper.push_to_hub  s
     "!r4   c                 <   |j                  d      }|| j                         }||d<   | j                  rM|d   }t        j                  j                  |d      }t        j                  ||       |j                  dd      } | j                  j                  |i |S )a0  
        Save the pretrained model to a directory. This method is a wrapper around
        `transformers.PreTrainedModel.save_pretrained`. Please refer to the documentation of
        `transformers.PreTrainedModel.save_pretrained` for more information.

        Args:
            *args (`list`, *optional*):
                Positional arguments passed along to the underlying model's `save_pretrained` method.
            **kwargs (`dict`, *optional*):
                Keyword arguments passed along to the underlying model's `save_pretrained` method.
        rI   Nr   rA   )rL   rI   rd   rS   rT   rV   ra   saverJ   r&   save_pretrained)r/   r   r1   rI   	save_path_s         r3   r   z&PreTrainedModelWrapper.save_pretrained	  s     ZZ-
*J#-F<  QIY0CDIJJz9-

<.A4t$$44dEfEEr4   c                     t         )z@
        Return the state_dict of the pretrained model.
        r   r   s      r3   rI   z!PreTrainedModelWrapper.state_dict$  s
     "!r4   c                     t         )z
        Post initialization method. This method is called after the model is instantiated and loaded from a checkpoint.
        It can be used to perform additional operations such as loading the state_dict.
        r   r   s      r3   rf   z PreTrainedModelWrapper.post_init*  s
    
 "!r4   c           	         | j                   st        d      | j                  j                  | j                         | j                  j                          t        j                         5   | j                  d||ddd|}|j                  d   }| j                  |      }ddd       | j                  j                  | j                         | j                  j                          S # 1 sw Y   JxY w)a  
        Computes the reward score for a given input. The method has first to enable the adapter and then compute the
        reward score. After that the model disables the reward modeling adapter and enables the default ppo adapter
        again.
        z4This model does not support reward modeling adapter.T)	input_idsattention_maskoutput_hidden_statesreturn_dictr   Nr   )r,   rO   r&   set_adapterr-   evalra   no_gradhidden_statesr   r.   )r/   r   r   r1   base_model_outputlast_hidden_statesscoress          r3   compute_reward_scorez+PreTrainedModelWrapper.compute_reward_score1  s     ''STT 	))$*>*>?""$]]_ 
	4 5 5 5 !#-%) 	!
 ! "3!@!@!DZZ 23F
	4 	))$*B*BC""$
	4 
	4s   +8C,,C5)NNFN)NrA   rB   )reward_model_adapterNr   )__name__
__module____qualname____doc__rY   r   r   r   r   r   r   r   r\   r%   classmethodrW   r_   rP   rK   r^   r   r   rI   rf   r   __classcell__r2   s   @r3   r   r   ;   s     !%N#% !" 
35JK - dh&6 _ _B  &71M 1Mf  " A A: \`D DL"F6""r4   r   rr   num_shared_layerspatternreturnc                 >   t               rt        d      | j                         D cg c]  \  }}|	 }}}t        |       }|/|D ]  }|j	                  |      }d|_         |j                         S ||j                  |      }n5t        D ],  j                  |      t        fd|D              s*} n |t        d      g }	g }
d}| j                         D ]0  \  }}||v rd}|r|	j                  |        |
j                  |       2 |	D ]+  }| j	                  |      }d|_        |j	                  |      }- |
D ]  }|j	                  |      }d|_         |#t        |
      dk(  rt        j                  d       |j                         S c c}}w )	a3  
    Creates a static reference copy of a model. Note that model will be in `.eval()` mode.

    Args:
        model (`PreTrainedModelWrapper`): The model to be copied.
        num_shared_layers (`int`, *optional*):
            The number of initial layers that are shared between both models and kept frozen.
        pattern (`str`, *optional*): The shared layers are selected with a string pattern
            (e.g. "transformer.h.{layer}" for GPT2) and if a custom pattern is necessary it can be passed here.

    Returns:
        `PreTrainedModelWrapper`
    zDeepSpeed ZeRO-3 is enabled and is not compatible with `create_reference_model()`. Please instantiate your reference model directly with `AutoModelForCausalLM.from_pretrained()`.F)layerc              3   &   K   | ]  }|v  
 y wr   r   )r   r   pattern_candidates     r3   r   z)create_reference_model.<locals>.<genexpr>u  s     I$,Ir   z#Layer pattern could not be matched.Tr   zNPattern passed or found, but no layers matched in the model. Check for a typo.)r   rO   named_parametersr   get_parameterr   r   formatLAYER_PATTERNSr   appendlenrQ   rR   )rr   r   r   nr   parameter_names	ref_model
param_namer   shared_param_listunshared_param_listshared_parameterr   _param
_ref_paramr   s                  @r3   create_reference_modelr  P  s     "# A
 	
 &+%;%;%=>TQq>O>I  ) 	(J++J7E"'E	( ~~ ..'8.9!/ 	 1 8 8?P 8 QIII+		 >?? ..0 -fd?$$$T*&&t,- ( 9
##J/#,,Z8
	9 * $
''
3#$ s#671<hi>>g ?s   Fc                   v     e Zd ZdZdZdZdZd
 fd	Zd Z e	j                         d        Zd Zd Zd	 Z xZS )GeometricMixtureWrappera  
    Geometric Mixture generation wrapper that samples from the logits of two model's geometric mixture.

    Args:
        model (`PreTrainedModel`): The model to be wrapped.
        ref_model (`PreTrainedModel`): The reference model.
        generation_config (`GenerationConfig`): The generation config.
        mixture_coef (`float`, *optional* - default: 0.5): The mixture coefficient.
    r   Fc                     t         |           || _        |j                  | _        || _        || _        || _        || _        y r   )r$   r%   rr   r'   r   generation_configmixture_coefr   )r/   rr   r   r  r  r   r2   s         r3   r%   z GeometricMixtureWrapper.__init__  s>    
ll"!2(r4   c                 &     | j                   |i |S r   )forwardr   s      r3   __call__z GeometricMixtureWrapper.__call__  s    t||T,V,,r4   c                     | j                   |i |}|j                  } | j                  |i |j                  }t        j                  j
                  j                  | j                  |z  d| j                  z
  |z  z   d      |_        |S )N   r   )dim)rr   logitsr   ra   r   
functionallog_softmaxr  )r/   r   r1   model_outputsmodel_logitsref_model_logitss         r3   r  zGeometricMixtureWrapper.forward  s    "

D3F3$++)4>>4:6:AA$xx22>> 00A8I8I4I\3YY_a  ?  
 r4   c                     d|d<    | j                   j                  |i |} | j                  j                  |i |}|S )NF	use_cache)rr   r(   r   )r/   r   r1   model_inputsr   s        r3   r(   z5GeometricMixtureWrapper.prepare_inputs_for_generation  sH    #{?tzz??PP8DNN88$I&Ir4   c                 8    | j                   j                          y r   )rr   _validate_model_class)r/   s    r3   r  z-GeometricMixtureWrapper._validate_model_class  s    

((*r4   c                 8    | j                   j                  |      S r   )rr   _validate_model_kwargs)r/   model_kwargss     r3   r  z.GeometricMixtureWrapper._validate_model_kwargs  s    zz00>>r4   )g      ?N)r   r   r   r   main_input_name_supports_cache_class_supports_static_cacher%   r	  ra   inference_moder  r(   r  r  r   r   s   @r3   r  r    sS     "O!"- U	 	+?r4   r  )NN).r   rQ   rS   copyr   typingr   ra   torch.nnr   
accelerater   huggingface_hubr   huggingface_hub.utilsr   r   r	   r
   safetensors.torchr   r`   transformersr   r   r   r   transformers.utilsr   r   r   r   r   r   r   r   r   #transformers.integrations.deepspeedr   r   Moduler   intrN   r  r  r   r4   r3   <module>r+     s      	     # +  : i i 0    KRRYY Rl fjH!H6>smHU]^aUbHHV4?o 4?r4   