
    biU                     \    d dl mZmZ d dlmZmZ d dlmZ ddlm	Z	 e G d de	             Z
y)	    )	dataclassfield)AnyOptional)TrainingArguments   )	SFTConfigc                   j    e Zd ZU dZej
                  dgz   Z edddi      Zee	d<    eddd	i      Z
ee	d
<    edddi      Zee	d<    edddi      Zee	d<    edddi      Zee   e	d<    edddi      Zeeeef      e	d<    edddi      Zee	d<    edddi      Zee	d<    fdZ xZS )	GKDConfiga8  
    Configuration class for [`GKDTrainer`].

    This class includes only the parameters that are specific to GKD training. For a full list of training arguments,
    please refer to the [`~transformers.TrainingArguments`] and [`SFTConfig`] documentation.

    Args:
        temperature (`float`, *optional*, defaults to `0.9`):
            Temperature for sampling. The higher the temperature, the more random the completions.
        lmbda (`float`, *optional*, defaults to `0.5`):
            Lambda parameter that controls the student data fraction (i.e., the proportion of on-policy
            student-generated outputs).
        beta (`float`, *optional*, defaults to `0.5`):
            Interpolation coefficient between `0.0` and `1.0` of the Generalized Jensen-Shannon Divergence loss. When
            beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.
        max_new_tokens (`int`, *optional*, defaults to `128`):
            Maximum number of tokens to generate per completion.
        teacher_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
            Model name or path of the teacher model. If `None`, the teacher model will be the same as the model being
            trained.
        teacher_model_init_kwargs (`dict[str, Any]]` or `None`, *optional*, defaults to `None`):
            Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model
            from a string.
        disable_dropout (`bool`, *optional*, defaults to `True`):
            Whether to disable dropout in the model.
        seq_kd (`bool`, *optional*, defaults to `False`):
            Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on
            teacher-generated output).
    teacher_model_init_kwargsg?helpzVTemperature for sampling. The higher the temperature, the more random the completions.)defaultmetadatatemperatureg      ?zwLambda parameter that controls the student data fraction (i.e., the proportion of on-policy student-generated outputs).lmbdazInterpolation coefficient between `0.0` and `1.0` of the Generalized Jensen-Shannon Divergence loss. When beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.beta   z4Maximum number of tokens to generate per completion.max_new_tokensNzrModel name or path of the teacher model. If `None`, the teacher model will be the same as the model being trained.teacher_model_name_or_pathzwKeyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model from a string.Tz'Whether to disable dropouts in `model`.disable_dropoutFzSeq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on teacher-generated output).seq_kdc                     t         |           | j                  dk  s| j                  dkD  rt        d      | j                  dk  s| j                  dkD  rt        d      y )Ng        g      ?z&lmbda must be in the range [0.0, 1.0].z%beta must be in the range [0.0, 1.0].)super__post_init__r   
ValueErrorr   )self	__class__s    Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/gkd_config.pyr   zGKDConfig.__post_init__j   sW    ::tzzC/EFF99s?dii#oDEE .    )__name__
__module____qualname____doc__r   _VALID_DICT_FIELDSr   r   float__annotations__r   r   r   intr   r   strr   dictr   r   boolr   r   __classcell__)r   s   @r   r   r      sI   < +==A\@]]rsK   *
E5   
D%   PQNC  16 #
1  ;@ +
;xS#X7  "CDOT   /
FD F Fr   r   N)dataclassesr   r   typingr   r   transformersr   
sft_configr	   r    r   r   <module>r1      s4    )   * ! XF	 XF XFr   