
    bi[#                     P    d dl mZmZ d dlmZmZ d dlmZ e G d de             Zy)    )	dataclassfield)AnyOptional)TrainingArgumentsc                       e Zd ZU dZej
                  dgz   Z edddi      Zee	d<    eddd	i      Z
ee	d
<    edddi      Zee   e	d<    edddi      Zee   e	d<    edddi      Zee   e	d<    edddi      Zee   e	d<    edddi      Zee	d<    edddi      Zee	d<    eddg dd      Zee	d <    ed!dd"i      Zee	d#<    ed$dd%i      Zee	d&<    ed'dd(i      Zee	d)<    ed*dd+i      Zee	d,<    eddd-i      Zee   e	d.<    ed/d0d/d1gd      Zee	d2<    ed3dd4i      Zee	d5<    eddd6i      Zee   e	d7<    eddd8i      Zeeee f      e	d<    eddd9i      Z!ee   e	d:<    fd;Z" xZ#S )<	CPOConfiguo  
    Configuration class for the [`CPOTrainer`].

    This class includes only the parameters that are specific to CPO training. For a full list of training arguments,
    please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
    differ from those in [`~transformers.TrainingArguments`].

    Using [`~transformers.HfArgumentParser`] we can turn this class into
    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
    command line.

    Parameters:
        max_length (`int` or `None`, *optional*, defaults to `1024`):
            Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
            to use the default data collator.
        max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
            Maximum length of the prompt. This argument is required if you want to use the default data collator.
        max_completion_length (`int` or `None`, *optional*, defaults to `None`):
            Maximum length of the completion. This argument is required if you want to use the default data collator
            and your model is an encoder-decoder.
        beta (`float`, *optional*, defaults to `0.1`):
            Parameter controlling the deviation from the reference model. Higher β means less deviation from the
            reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in
            the [paper](https://huggingface.co/papers/2310.12036).
        label_smoothing (`float`, *optional*, defaults to `0.0`):
            Label smoothing factor. This argument is required if you want to use the default data collator.
        loss_type (`str`, *optional*, defaults to `"sigmoid"`):
            Type of loss to use. Possible values are:

                - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
                - `"hinge"`: hinge loss on the normalized likelihood from the
                  [SLiC](https://huggingface.co/papers/2305.10425) paper.
                - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
                - `"simpo"`: SimPO loss from the [SimPO](https://huggingface.co/papers/2405.14734) paper.

        disable_dropout (`bool`, *optional*, defaults to `True`):
            Whether to disable dropout in the model.
        cpo_alpha (`float`, *optional*, defaults to `1.0`):
            Weight of the BC regularizer in CPO training.
        simpo_gamma (`float`, *optional*, defaults to `0.5`):
            Target reward margin for the SimPO loss, used only when the `loss_type="simpo"`.
        label_pad_token_id (`int`, *optional*, defaults to `-100`):
            Label pad token id. This argument is required if you want to use the default data collator.
        padding_value (`int` or `None`, *optional*, defaults to `None`):
            Padding value to use. If `None`, the padding value of the tokenizer is used.
        truncation_mode (`str`,*optional*,  defaults to `"keep_end"`):
            Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
            This argument is required if you want to use the default data collator.
        generate_during_eval (`bool`, *optional*, defaults to `False`):
            If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
        is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
            When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
            you need to specify if the model returned by the callable is an encoder-decoder model.
        model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
            Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
            string.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of processes to use for processing the dataset.
    model_init_kwargsgư>helpz$The initial learning rate for AdamW.)defaultmetadatalearning_rate
   zLog every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.logging_stepsNzWhether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if `fp16` is not set.bf16i   zCMaximum length of the sequences (prompt + completion) in the batch.
max_lengthi   zMaximum length of the prompt. This argument is required if you want to use the default data collator and your model is an encoder-decoder.max_prompt_lengthzMaximum length of the completion. This argument is required if you want to use the default data collator and your model is an encoder-decoder.max_completion_lengthg?uv   Parameter controlling the deviation from the reference model. Higher β means less deviation from the reference model.betag        zLabel smoothing factor.label_smoothingsigmoidzType of loss to use.)r   hingeiposimpo)r   choices	loss_typeTz(Whether to disable dropout in the model.disable_dropoutg      ?z-Weight of the BC regularizer in CPO training.	cpo_alphag      ?zPTarget reward margin for the SimPO loss, used only when the `loss_type='simpo'`.simpo_gammaizLabel pad token id.label_pad_token_idzLPadding value to use. If `None`, the padding value of the tokenizer is used.padding_valuekeep_endz3Truncation mode to use when the prompt is too long.
keep_starttruncation_modeFzRIf `True`, generates and logs completions from the model to W&B during evaluation.generate_during_evalz.Whether the model is an encoder-decoder model.is_encoder_decoderzoKeyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a string.z6Number of processes to use for processing the dataset.dataset_num_procc                 v    | j                   | j                   n| j                   | _         t        |           y )N)r   fp16super__post_init__)self	__class__s    Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/cpo_config.pyr+   zCPOConfig.__post_init__   s*    '+yy'8Odii	    )$__name__
__module____qualname____doc__r   _VALID_DICT_FIELDSr   r   float__annotations__r   r   r   boolr   intr   r   r   r   r   strr   r   r   r    r!   r$   r%   r&   r
   dictr   r'   r+   __classcell__)r-   s   @r.   r	   r	      s   :x +==AT@UU !@AM5  ! D
M5  ! !
D(4.  !&_`!J  (- =
(x}  ,1 =
,8C=   #
D%  #34OU  *;
Is  "DEOT  IJIu  lmK  $/0  $)hi$M8C=  !I"L1
OS  "'no"$  */JK*  38 
3xS#X/  ',RS'hsm 
   r/   r	   N)	dataclassesr   r   typingr   r   transformersr   r	    r/   r.   <module>r@      s/    )   * j ! j  j r/   