
    biI                         d dl Z d dlmZ d dlmZmZmZmZ ddlm	Z	  G d dej                        Z G d de	      Z G d	 d
e	      Zy)    N)AutoModelForCausalLMAutoModelForSeq2SeqLMis_torch_npu_availableis_torch_xpu_available   )PreTrainedModelWrapperc                   (     e Zd ZdZ fdZd Z xZS )	ValueHeadze
    The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
    c                 B   t         |           t        |d      s|j                  dd      }n|j                  }|rt        j                  |      nt        j                         | _        t        |d      r|j                  }t        |d      r|j                  }nPt        |d      rD|j                  r8t        |d      r,t        |j                  d      r|j                  j                  }t        j                  d      | _        t        j                         | _        y )Nsummary_dropout_probg?hidden_sizeword_embed_proj_dimis_encoder_decoderdecoderr   )super__init__hasattrpopr   nnDropoutIdentitydropoutr   r   r   r   LinearsummaryFlattenflatten)selfconfigkwargsr   r   	__class__s        Y/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/models/modeling_value_head.pyr   zValueHead.__init__   s    v56#)::.Dc#J #)#>#> ;Orzz"67UWU`U`Ub 6=) ,,K601 44KV12((WVY-G6>>=9"(.."<"<Kyya0zz|    c                    | j                  |      }|j                  | j                  j                  j                  k7  r/|j	                  | j                  j                  j                        }| j                  |      }|S N)r   dtyper   weightto)r   hidden_statesoutputs      r!   forwardzValueHead.forward2   s_    m, <<4<<..444YYt||22889Ff%r"   )__name__
__module____qualname____doc__r   r*   __classcell__r    s   @r!   r
   r
      s    $.	r"   r
   c                   X     e Zd ZdZeZdZ fdZd Z	 	 	 	 d
dZ	d Z
d Zd Zd	 Z xZS )!AutoModelForCausalLMWithValueHeadaW  
    An autoregressive model with a value head in addition to the language model head. This class inherits from
    `~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
    functions such as `from_pretrained`, `push_to_hub` and `generate`. To call a method of the wrapped model, simply
    manipulate the `pretrained_model` attribute of this class.

    Class attributes:
        - **transformers_parent_class** (`transformers.PreTrainedModel`) -- The parent class of the wrapped model. This
            should be set to `transformers.AutoModelForCausalLM` for this class.
        - **supported_args** (`tuple`) -- A tuple of strings that are used to identify the arguments that are supported
            by the `ValueHead` class. Currently, the supported args are:
            - **summary_dropout_prob** (`float`, `optional`, defaults to `None`) -- The dropout probability for the
                `ValueHead` class.
            - **v_head_initializer_range** (`float`, `optional`, defaults to `0.2`) -- The initializer range for the
                `ValueHead` if a specific initialization strategy is selected.
            - **v_head_init_strategy** (`str`, `optional`, defaults to `None`) -- The initialization strategy for the
                `ValueHead`. Currently, the supported strategies are:
                - **`None`** -- Initializes the weights of the `ValueHead` with a random distribution. This is the
                  default strategy.
                - **"normal"** -- Initializes the weights of the `ValueHead` with a normal distribution.
    r   v_head_initializer_rangev_head_init_strategyc                     t        |   |fi | | j                  |      \  }}}t        | j                  j
                  fi || _         | j                  di | y)a  
        Initializes the model.

        Args:
            pretrained_model (`transformers.PreTrainedModel`):
                The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
                `AutoModelForCausalLM` class.
            kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the `ValueHead` class.
        N )r   r   _split_kwargsr
   pretrained_modelr   v_head_init_weightsr   r9   r   v_head_kwargs_r    s        r!   r   z*AutoModelForCausalLMWithValueHead.__init__\   s_     	)4V4"008q! 5 5 < <NN+]+r"   c                 D   |j                  dd      }|j                  dd      }|y|dk(  rt| j                  j                  j                  j                  j                  d|       | j                  j                  j                  j                  j                          yy)av  
        Initializes the weights of the value head. The default initialization strategy is random. Users can pass a
        different initialization strategy by passing the `v_head_init_strategy` argument when calling
        `.from_pretrained`. Supported strategies are:
        - `normal`: initializes the weights with a normal distribution.

        Args:
            **kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the `ValueHead` class. These arguments can contain the
                `v_head_init_strategy` argument as well as the `v_head_initializer_range` argument.
        r4   皙?r5   Nnormal        meanstdr   r:   r   r&   datanormal_biaszero_r   r   initializer_rangeinit_strategys       r!   r;   z/AutoModelForCausalLMWithValueHead._init_weightsl   s     #JJ'A3G

#94@ h&KK&&++33BS3TKK$$))//1 'r"   c                    d|d<   ||d<   | j                   r4| j                  j                  j                  dk(  r|j	                  d        | j                  d||d|}|j
                  d   }|j                  }|j                  }	|j                  | j                  j                  j                  j                  k7  r9|j                  | j                  j                  j                  j                        }| j                  |      j                  d      }
|j                  t        j                   k7  r|j#                         }|r||	|
|j$                  fS ||	|
fS )aQ  
        Applies a forward pass to the wrapped model and returns the logits of the value head.

        Args:
            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
                Indices of input sequence tokens in the vocabulary.
            past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
                Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
                (see `past_key_values` input) to speed up sequential decoding.
            attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
                Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.
            return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
            kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the wrapped model.
        Toutput_hidden_statespast_key_valuesPREFIX_TUNING)	input_idsattention_maskr7   )is_peft_modelr9   active_peft_config	peft_typer   r(   logitslossdevicer:   r   r&   r'   squeezer%   torchfloat32floatrP   r   rR   rP   rS   return_past_key_valuesr   base_model_outputlast_hidden_state	lm_logitsrY   values              r!   r*   z)AutoModelForCausalLMWithValueHead.forward   sD   2 *.%&$3 !$"7"7"J"J"T"TXg"gJJ()1D11 
)
 
 .;;B?%,,	 %%##t{{':':'A'A'H'HH 1 4 4T[[5H5H5O5O5V5V W-.66r: ??emm+!)I!tU,=,M,MNNtU++r"   c                 :     | j                   j                  |i |S )a/  
        A simple wrapper around the `generate` method of the wrapped model. Please refer to the
        [`generate`](https://huggingface.co/docs/transformers/internal/generation_utils) method of the wrapped model
        for more information about the supported arguments.

        Args:
            *args (`list`, *optional*):
                Positional arguments passed to the `generate` method of the wrapped model.
            **kwargs (`dict`, *optional*):
                Keyword arguments passed to the `generate` method of the wrapped model.
        r9   generater   argsr   s      r!   rg   z*AutoModelForCausalLMWithValueHead.generate   s"     .t$$--t>v>>r"   c                     | j                   s | j                  j                  |i |}ni } | j                  j                  |i |}|j	                         D ]  \  }}||d| <    |S z
        Returns the state dictionary of the model. We add the state dictionary of the value head to the state
        dictionary of the wrapped model by prepending the key with `v_head.`.
        v_head.rU   r9   
state_dictr:   itemsr   ri   r   pretrained_model_state_dictv_head_state_dictkvs          r!   rn   z,AutoModelForCausalLMWithValueHead.state_dict       
 !!*J$*?*?*J*JD*[TZ*[' +-'2DKK22DCFC%++- 	;DAq9:''!6	;**r"   c                 p    | j                   | j                  _          | j                  j                  |i |S r$   r:   r9   push_to_hubrh   s      r!   rx   z-AutoModelForCausalLMWithValueHead.push_to_hub   2    '+{{$0t$$00$A&AAr"   c                    t        |j                               D ]+  }d|v s|j                  |      ||j                  dd      <   - | j                  j                  |d       ~t        | j                  d      rd| j                  j                  j                         v s&d| j                  j                  j                         v rt        d      t        t        | j                  j                  j                                     d	   t        t              r%t               rd
 nt               rd nd | j                  j!                        | _        fd}| j#                  |       d| _        yy)
        We add the state dictionary of the value head to the state dictionary of the wrapped model by prepending the
        key with `v_head.`. This function removes the `v_head.` prefix from the keys of the value head state
        dictionary.
        rl    Fstricthf_device_mapcpudiskdThe model is offloaded on CPU or disk - CPU & disk offloading is not supported for ValueHead models.r   znpu:zxpu:zcuda:c                     d}|D ]8  }t        |t        j                        r||j                        fz  }3||fz  }: |S )Nr7   
isinstancer\   Tensorr'   )moduleinputoutputs
new_outputr)   first_devices        r!   set_device_hookzDAutoModelForCausalLMWithValueHead.post_init.<locals>.set_device_hook   sO    
% 0F!&%,,7"vyy'>&@@
"vi/
	0
 "!r"   TN)listkeysr   replacer:   load_state_dictr   r9   r   values
ValueErrorsetr   intr   r   r'   register_forward_hookis_sequential_parallel)r   rn   rs   r   r   s       @r!   	post_initz+AutoModelForCausalLMWithValueHead.post_init   sY    joo'( 	IAA~7A~~a7H
199Y34	I 	##Ju#=4((/:..<<CCEET22@@GGII z   D$9$9$G$G$N$N$P QRSTUL,,)+%),#8L+-%),#8L%*<.#9L++..6DK" &&7*.D'= ;r"   NNNF)r+   r,   r-   r.   r   transformers_parent_classsupported_argsr   r;   r*   rg   rn   rx   r   r/   r0   s   @r!   r2   r2   >   sJ    , !5N, 20 $5,n?+ B
*/r"   r2   c                   f     e Zd ZdZeZg dZdZ fdZd Z	d Z
d Zd Zd	 Z	 	 	 	 dd
Zd Z xZS )"AutoModelForSeq2SeqLMWithValueHeada  
    A seq2seq model with a value head in addition to the language model head. This class inherits from
    `~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
    functions such as `from_pretrained` and `push_to_hub` and also provides some additional functionalities such as
    `generate`.

    Args:
        pretrained_model (`transformers.PreTrainedModel`):
            The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
            `AutoModelForSeq2SeqLM` class.
        kwargs:
            Additional keyword arguments passed along to the `ValueHead` class.
    )lm_head	embed_outoutput_projectionr3   c                    t        |   |fi | | j                  |      \  }}}d| _        | j	                         st        d      t        | j                  j                  fi || _	         | j                  di | y )NTzOThe model does not have a language model head, please use a model that has one.r7   )r   r   r8   r   _has_lm_headr   r
   r9   r   r:   r;   r<   s        r!   r   z+AutoModelForSeq2SeqLMWithValueHead.__init__   s}    )4V4"008q!"&  "noo 5 5 < <NN+]+r"   c                     | j                   j                         D ]%  \  }t        fd| j                  D              s% y y)Nc              3   &   K   | ]  }|v  
 y wr$   r7   .0	attributenames     r!   	<genexpr>zBAutoModelForSeq2SeqLMWithValueHead._has_lm_head.<locals>.<genexpr>/  s     K9$K   TF)r9   named_modulesanylm_head_namings)r   _moduler   s     @r!   r   z/AutoModelForSeq2SeqLMWithValueHead._has_lm_head,  sA    !22@@B 	MD'Kd6J6JKK	 r"   c                    t        |j                               D ]+  }d|v s|j                  |      ||j                  dd      <   - | j                  j                  |d       ~t        | j                  d      rd| j                  j                  j                         v s&d| j                  j                  j                         v rt        d      | j                  j                         D ];  \  }t        fd	| j                  D              s%|j                  j                   n | j                  j!                        | _        fd
}| j#                  |       d| _        yy)r{   rl   r|   Fr}   r   r   r   r   c              3   &   K   | ]  }|v  
 y wr$   r7   r   s     r!   r   z?AutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.<genexpr>J  s     OYyD(Or   c                     d}|D ]8  }t        |t        j                        r||j                        fz  }3||fz  }: |S )a  
                A hook that sets the device of the output of the model to the device of the first parameter of the
                model.

                Args:
                    module (`nn.Module`):
                        The module to which the hook is attached.
                    input (`tuple`):
                        The input to the module.
                    outputs (`tuple`):
                        The output of the module.
                r7   r   )r   r   r   r   r)   lm_head_devices        r!   r   zEAutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.set_device_hookQ  sQ      
% 0F!&%,,7"vyy'@&BB
"vi/
	0
 "!r"   TN)r   r   r   r   r:   r   r   r9   r   r   r   r   r   r   r&   rZ   r'   r   r   )r   rn   rs   r   r   r   r   s        @@r!   r   z,AutoModelForSeq2SeqLMWithValueHead.post_init3  s@    joo'( 	IAA~7A~~a7H
199Y34	I 	##Ju#=4((/:..<<CCEET22@@GGII z 
 !% 5 5 C C E fO$:N:NOO%+]]%9%9N ++..8DK"* &&7*.D'Q ;r"   c                     | j                   s | j                  j                  |i |}ni } | j                  j                  |i |}|j	                         D ]  \  }}||d| <    |S rk   rm   rp   s          r!   rn   z-AutoModelForSeq2SeqLMWithValueHead.state_dicti  ru   r"   c                 p    | j                   | j                  _          | j                  j                  |i |S r$   rw   rh   s      r!   rx   z.AutoModelForSeq2SeqLMWithValueHead.push_to_huby  ry   r"   c                 D   |j                  dd      }|j                  dd      }|y|dk(  rt| j                  j                  j                  j                  j                  d|       | j                  j                  j                  j                  j                          yy)z>
        We initialize the weights of the value head.
        r4   r@   r5   NrA   rB   rC   rF   rK   s       r!   r;   z0AutoModelForSeq2SeqLMWithValueHead._init_weights~  s     #JJ'A3G

#94@ h&KK&&++33BS3TKK$$))//1 'r"   c                    ||d<   | j                   r4| j                  j                  j                  dk(  r|j	                  d        | j                  d||dd|}|j
                  d   }|j                  }|j                  }	| j                  |      j                  d      }
|j                  t        j                  k7  r|j                         }|r||	|
|j                  fS ||	|
fS )NrP   rQ   T)rR   rS   rO   rT   r7   )rU   r9   rV   rW   r   decoder_hidden_statesrX   rY   r:   r[   r%   r\   r]   r^   rP   r_   s              r!   r*   z*AutoModelForSeq2SeqLMWithValueHead.forward  s     %4 !$"7"7"J"J"T"TXg"gJJ()1D11 
)!%
 	
 .CCBG%,,	 %%-.66r: ??emm+!)I!tU,=,M,MNNtU++r"   c                 :     | j                   j                  |i |S )z:
        We call `generate` on the wrapped model.
        rf   rh   s      r!   rg   z+AutoModelForSeq2SeqLMWithValueHead.generate  s"     .t$$--t>v>>r"   r   )r+   r,   r-   r.   r   r   r   r   r   r   r   rn   rx   r;   r*   rg   r/   r0   s   @r!   r   r   	  sU     !6CON
,4/l+ B
2  $ ,D?r"   r   )r\   torch.nnr   transformersr   r   r   r   modeling_baser   Moduler
   r2   r   r7   r"   r!   <module>r      sF      t t 1%		 %PH/(> H/Vi?)? i?r"   