
    bi|                       d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlmc mZ d dlmZ d dlmZ d d	l m!Z!m"Z" d d
lm#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z;m<Z< ddl=m>Z>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZF ddlGmHZHmIZImJZJ ddlKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZX  e<       rd dlYmZZZm[Z[m\Z\m]Z]  e;       rd dl^m_Z_  e1       rd dl`Z`dej                  debdej                  fdZce G d  d!e3             Zd G d" d#e/      Zey)$    N)defaultdict)contextmanagernullcontext)	dataclass)Path)AnyCallableLiteralOptionalUnion)PartialState)tqdm)DatasetIterableDataset)autocast)
DataLoader)AutoModelForCausalLMAutoTokenizerBaseImageProcessorDataCollatorFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTraineris_comet_availableis_wandb_available)DataCollatorMixin)$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES)TrainerCallback)EvalLoopOutput)is_liger_kernel_availableis_peft_available   )maybe_apply_chat_templatemaybe_extract_prompt)create_reference_modelprepare_deepspeed)prepare_fsdp   )SyncRefModelCallback)	DPOConfigFDivergenceConstantsFDivergenceType)RunningMomentscap_expdisable_dropout_in_modelempty_cache
flush_leftflush_rightgenerate_model_cardget_comet_experiment_urllog_table_to_comet_experimentpadpad_to_lengthpeft_module_casting_to_bf16selective_log_softmax)
PeftConfig	PeftModelget_peft_modelprepare_model_for_kbit_training)LigerFusedLinearDPOLoss	input_idsdecoder_start_token_idreturnc                     | j                  | j                        }| ddddf   j                         |ddddf<   ||dddf<   y)zAShift input ids one token to the right, and pad with pad_token_idNr*   r   )	new_zerosshapeclone)rA   rB   shifted_input_idss      R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/dpo_trainer.pyshift_tokens_rightrK   W   sM    !++IOO<(CRC0668ae4ad    c                   f    e Zd ZU dZeed<   dZeed<   dee	ee   e
eee
f   f      deee
f   fdZy)	DataCollatorForPreferencea[  
    Data collator used for preference data. Inputs are dynamically padded to the maximum length of a batch if they are
    not all of the same length.

    Args:
        pad_token_id (`int`):
            Token ID to use for padding.
        return_tensors (`str`, *optional*, defaults to `"pt"`):
            Type of Tensor to return. Only `"pt"` is currently supported.

    Examples:
    ```python
    >>> from trl import DataCollatorForPreference

    >>> collator = DataCollatorForPreference(pad_token_id=0)
    >>> examples = [
    ...     {"prompt_input_ids": [1, 2, 3], "chosen_input_ids": [4, 5], "rejected_input_ids": [6]},
    ...     {"prompt_input_ids": [7, 8], "chosen_input_ids": [9, 10], "rejected_input_ids": [11, 12, 13]},
    ... ]
    >>> collator(examples)
    {'prompt_input_ids': tensor([[1, 2, 3],
                                 [0, 7, 8]]),
     'prompt_attention_mask': tensor([[1, 1, 1],
                                      [0, 1, 1]]),
     'chosen_input_ids': tensor([[ 4,  5],
                                 [ 9, 10]]),
     'chosen_attention_mask': tensor([[1, 1],
                                      [1, 1]]),
     'rejected_input_ids': tensor([[ 6,  0,  0],
                                   [11, 12, 13]]),
     'rejected_attention_mask': tensor([[1, 0, 0],
                                        [1, 1, 1]])
    }
    ```
    pad_token_idptreturn_tensorsexamplesrC   c                 r   |D cg c]  }t        j                  |d          }}|D cg c]  }t        j                  |       }}|D cg c]  }t        j                  |d          }}|D cg c]  }t        j                  |       }}|D cg c]  }t        j                  |d          }}|D cg c]  }t        j                  |       }	}d|d   v r%|D cg c]  }t        j                  |d          }
}d|d   v r%|D cg c]  }t        j                  |d          }}d|d   v rSd|d   v rLt        j                  |D cg c]  }|d   	 c}      }t        j                  |D cg c]  }|d   	 c}      }i }t        || j                  d	
      |d<   t        |dd	
      |d<   t        || j                        |d<   t        |d      |d<   t        || j                        |d<   t        |	d      |d<   d|d   v rt        
d      |d<   d|d   v rt        d      |d<   d|d   v r)t        j                  |D cg c]  }|d   	 c}      |d<   d|d   v rd|d   v r
|d<   |d<   |S c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )Nprompt_input_idschosen_input_idsrejected_input_idspixel_valuesr   pixel_attention_maskref_chosen_logpsref_rejected_logpsleft)padding_valuepadding_sideprompt_attention_maskr\   chosen_attention_maskrejected_attention_mask        image_sizes)torchtensor	ones_liker8   rO   )selfrR   examplerT   rA   r^   rU   r`   rV   ra   rW   rX   rY   rZ   outputs                  rJ   
torch_callz$DataCollatorForPreference.torch_call   s   U]^'ELL1C)DE^^M] ^	!; ^ ^U]^'ELL1C)DE^^M] ^	!; ^ ^Yabgell73G+HIbbOa"b)5??9#="b"bXa[(QYZgELL)@AZLZ!Xa[0ai#jV]ELL9O1P$Q#j #j!,1ERS1T$||X`,aWW5G-H,ab!&\d.eQXw7K/L.e!f %()9IZIZio%p!"*-.CSTci*j&'%()9IZIZ%[!"*-.CST*U&''*+=TM^M^'_#$,/0GWX,Y()Xa[(%(S%IF>"!Xa[0-01EUV-WF)*HQK'$)LLX`1aW'-2H1a$bF=!!,1ERS1T)9F%&+=F'(A _ ^^ ^b"bZ#j,a.e 2bs@   JJJ1JJ8J!J J%J*5J/J4N)__name__
__module____qualname____doc__int__annotations__rQ   strlistr   r   dictrj    rL   rJ   rN   rN   ^   sW    "H NC"4d3id38n.L(M#N "SWX[]`X`Sa "rL   rN   c                       e Zd ZdZddgZ	 	 	 	 	 	 	 	 	 	 	 	 dEdeeej                  e	f   de
ee	ej                  ef      de
e   de
e   d	e
eeef      d
e
eeeeeeeef   f   f      de
eeeeef      de
eegef      de
ee      dee
ej4                  j6                     e
ej4                  j8                  j:                     f   de
eeej4                  j6                     eeef   f      de
eej@                  ej@                  gej@                  f      de
d   f fdZ!dFdedede"de	fdZ#de	de	dedede	f
dZ$de	defdZ%deeef   deeeeef   dededeeef   f
dZ&e'd        Z(e'd        Z)d Z*de+f fd Z,dGd
e
e   de+f fd!Z-e.d"        Z/d#eeej`                  f   defd$Z1e'd#eeeeej`                  f   f   d%e2deeej`                  f   fd&       Z3d'ejh                  d(ejh                  d)ejh                  d*ejh                  deejh                  ejh                  ejh                  f   f
d+Z5dej                  d#eeeeej`                  f   f   fd,Z6	 dFdej                  d#eeeeej`                  f   f   d-e"fd.Z7	 dHd#eeeeej`                  f   f   d/e8d0   fd1Z9	 	 dIdee	ej                  f   d2eeeej@                  ef   f   deej@                  eej@                  eeej@                  f   f   f   fd3Z:d#eeej`                  f   deeef   fd4Z;	 dGdee	ej                  f   d2eeeej@                  ef   f   d5e"d6e
ee      fd7Z<dHd8eee=f   d/e8d0   ddfd9Z>	 	 	 dJd:e+d;ed5e
e"   d6e
ee      d<edef fd=Z?dGd>eee=f   d?e
e=   ddf fd@Z@ fdAZA	 	 	 dKdBe
e   de
e   dCeeee   df   fdDZB xZCS )L
DPOTrainerak  
    Trainer for Direct Preference Optimization (DPO) method.

    This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods.

    Args:
        model (`Union[str, PreTrainedModel]`):
            Model to be trained. Can be either:

            - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
              path to a *directory* containing model weights saved using
              [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
              using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
              `args.model_init_kwargs`.
            - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
        ref_model (`PreTrainedModelWrapper`):
            Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
            and loss. If no reference model is provided, the trainer will create a reference model with the same
            architecture as the model to be optimized.
        args ([`DPOConfig`], *optional*, defaults to `None`):
            Configuration for this trainer. If `None`, a default configuration is used.
        data_collator (`DataCollator`, *optional*):
            Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
            Will default to [`DataCollatorForPreference`].
        train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
            Dataset to use for training. DPO supports [preference](#preference) type and. The format of the samples can
            be either:

            - [Standard](dataset_formats#standard): Each sample contains plain text.
            - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
              and content).
        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
            Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
        processing_class ([`~transformers.PreTrainedTokenizerBase`], *optional*, defaults to `None`):
            Processing class used to process the data. If `None`, the processing class is loaded from the model's name
            with [`~transformers.AutoTokenizer.from_pretrained`].
        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
            The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return
            a dictionary string to metric values. *Note* When passing TrainingArgs with `batch_eval_metrics` set to
            `True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered
            after the last eval batch to signal that the function needs to calculate and return the global summary
            statistics rather than accumulating the batch-level statistics.
        callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
            List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
            in [here](https://huggingface.co/docs/transformers/main_classes/callback).

            If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`]
            method.
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
            A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
            model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
        optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
            A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
            `args`. Incompatible with the `optimizers` argument.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
            A function that preprocess the logits right before caching them at each evaluation step. Must take two
            tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
            by this function will be reflected in the predictions received by `compute_metrics`.

            Note that the labels (second parameter) will be `None` if the dataset does not have them.
        peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
            PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
    trldpoNmodel	ref_modelargsdata_collatortrain_dataseteval_datasetprocessing_classcompute_metrics	callbacks
optimizersoptimizer_cls_and_kwargspreprocess_logits_for_metricspeft_configr<   c                 0   t        |t              r|n|j                  j                  }|"|j	                  d      d   }t        | d      }|t        j                  |      }|j                  |j                  | _        nst        |d      r|j                  |j                  | _        nIt        |d      r2|j                  j                  |j                  j                  | _        nt        d      t        |t              s||u rt        d      |j                  %t        |t              st        j                  d       t        |t              r| j!                  ||      }|j"                  %t        |t              st        j                  d	       t        |t              r| j!                  ||d
      }| j%                  ||||      }|j&                  rt)               st+               st        d      |j                  j,                  | _        |j                  j.                  t1        j2                         v | _        t7               xr t        |t8              | _        |j<                  | _        |j>                  | _        |j@                  | _         |r|| _!        n0| j:                  s|jD                  rd | _!        ntG        |      | _!        |jH                  r,tK        |       | jB                  tK        | jB                         |jL                  rbtO               stQ        d      |jR                  dk7  rt        d      tU        |jV                  |jX                  |j@                   d      | _-        d
|j\                  d<   |t_        | j                        }|j&                  | _        |jV                  | _+        |j`                  | _0        |jb                  | _1        |jd                  | _2        |jf                  | _3        |jD                  | _"        |jh                  | _4        |jj                  rR|j                  jl                  dk7  rt        j                  d       |jn                  dk(  rt        j                  d       |jj                  | _5        d| _8        d| _9        |jR                  dv r7|jt                  dkD  r(t        j                  d|jR                   dtv               |jR                  dk(  rt        d      |jX                  | _,        |jt                  | _:        |jR                  | _)        ty        |j                  dd      | _=        |j|                  | _>        ty        |j                  dd       | _?        | jz                  r)| j~                  d k(  rt        j                  d!tv               t        d"       | _A        |j                  | _B        t        j                  |j                  i| _F        |j                  | _G        | j                  |||d#      }|Zt        |t              r6|j                         D ci c]  \  }}|| j                  ||||       }}}n| j                  |||d$      }t        | 1  ||||||||	|
||%       d| _M        t        | j                  d&      r%| j                  j                  | j                         t        | d'      st        d(      | j                  rD| j                  j                  j                  j                  d)k(  r| jD                  rt        d*      | jB                  :| j:                  s| jD                  st        d+      |j                  rt        d,      | j                  r&t        | jB                  | j                        | _!        n^| j                  r&t        | jB                  | j                        | _!        n,| j                  j                  | jB                  d
-      | _!        |j                  rG| jD                  rt        d.      | j                  t        | jB                  | j                  /             | jR                  d0k(  rt        | j                        | __        y y c c}}w )1N/rE   z-DPOrO   	tokenizera  `padding_value` is not specified in `DPOConfig`, and `pad_token_id` is missing in the `processing_class`. Please either set the `padding_value` argument in `DPOConfig`, or set `tokenizer.pad_token` (e.g., `tokenizer.pad_token = tokenizer.eos_token`) before instantiating the trainer.z`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must mass a copy of it, or `None` if you use peft.zYou passed model_init_kwargs to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.zYou passed ref_model_init_kwargs to the `DPOConfig`, but your ref_model is already instantiated. The `ref_model_init_kwargs` will be ignored.T)is_refz`generate_during_eval=True` requires Weights and Biases or Comet to be installed. Please install `wandb` or `comet-ml` to resolve.zYou set `use_liger_loss=True` but the liger kernel is not available. Please install liger-kernel first: `pip install liger-kernel`sigmoidz{You set `use_liger_loss=True` but the loss type is not `sigmoid`. Please set `loss_type='sigmoid'` to use the liger kernel.F)ignore_indexbetause_ref_modelaverage_log_probestimate_tokens)rO   flash_attention_2a  Padding-free training is enabled, but the attention implementation is not set to 'flash_attention_2'. Padding-free training flattens batches into a single sequence, and 'flash_attention_2' is the only known attention mechanism that reliably supports this. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation='flash_attention_2'` in the model configuration, or verify that your attention mechanism can handle flattened sequences.r*   zYou are using a per_device_train_batch_size of 1 with padding-free training. Using a batch size of 1 anihilate the benefits of padding-free training. Please consider increasing the batch size to at least 2.)hingeipobco_pair	sppo_hardnca_pairapo_zeroapo_downr   zYou are using the z loss type that does not support label smoothing. The `label_smoothing` parameter will be ignored. Set `label_smoothing` to `0.0` to remove this warning.kto_pairzKSupport for kto_pair has been removed in DPOTrainer. Please use KTOTrainer.output_router_logitsrouter_aux_loss_coefrb   a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.c                       t        t              S N)r   rr   rt   rL   rJ   <lambda>z%DPOTrainer.__init__.<locals>.<lambda>  s    ;t3D rL   traineval)ry   r{   r|   r}   r~   r   r   r   r   r   r   add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.   zrYou cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`.z]No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`zYYou currently cannot use `ref_model=None` with TR-DPO method. Please provide `ref_model`.)evaluation_modezoYou cannot use `precompute_ref_log_probs=True` with TR-DPO method. Please set `precompute_ref_log_probs=False`.)rz   r   r   )`
isinstancerq   config_name_or_pathsplitr,   r   from_pretrainedr\   hasattrrO   r   
ValueErrormodel_init_kwargswarningswarn_create_model_from_pathref_model_init_kwargs_prepare_peft_modelgenerate_during_evalr   r   is_encoder_decoder
model_typer   keysis_vision_modelr#   r=   is_peft_modelmodel_adapter_nameref_adapter_namereference_freerz   precompute_ref_log_probsr'   disable_dropoutr1   use_liger_lossr"   ImportError	loss_typer@   label_pad_token_idr   dpo_loss_fnwarnings_issuedrN   max_prompt_lengthmax_completion_length
max_lengthtruncation_modeuse_logits_to_keeppadding_free_attn_implementationper_device_train_batch_size _precomputed_train_ref_log_probs_precomputed_eval_ref_log_probslabel_smoothingUserWarninggetattraux_loss_enableduse_weightingaux_loss_coefr   _stored_metricsf_divergence_typer-   ALPHA_DIVERGENCE_COEF_KEYf_alpha_divergence_coeff_divergence_paramsdataset_num_proc_prepare_datasetrs   itemssuper__init__model_accepts_loss_kwargsry   r   
_tag_namesAttributeErroris_deepspeed_enabledr   statedeepspeed_plugin
zero_stagesync_ref_modelr(   is_fsdp_enabledr)   prepare_modeladd_callbackr+   r/   running)rg   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   model_id
model_namekeydataset	__class__s                     rJ   r   zDPOTrainer.__init__   s   & 'uc258R8R<!,R0J
|401D #,<<XF)!%!3!3D'8=M=Z=Z=f%5%B%B");7<L<V<V<c<c<o%5%?%?%L%L" #  %%)u*<Z 
 !!-j6LMM; eS!00=E%%1*YPS:TMM? i%44YT4RI ((	;M$$.@.BFXFZD 
 #(,,"A"A$||66:^:c:c:ee.0QZy5Q"&"9"9 $ 5 5"11&DN4#@#@!DN3E:DN $U+~~)(8 ,.!T  ~~* P   7!44YY"&"5"55!&	 D 48/0  54CUCUVM$($=$=!"&"9"9!%!7!7%)%?%?"//#33(,(E(E%"&"9"9||004GGJ //14%
 !-- 16-/4, NNkk$$q(MM$T^^$4 5v v
 >>Z'jkkII	#33 '6Le T!//$U\\3I3O  T%7%73%>MM   ++DE!%!7!7$8$R$RTXTpTp#q  $ 5 5 --m=MtU\]#,- )5(:(:(< $W ..w8H$PSTT   
  $44\CSUY[ab''%-+!%=*G 	 	
" */& 4::/0JJ%%doo6t]+ j 
 $$%%66AAQF4KhKh  I  >>!&&$*G*G s  "" o  ((!24>>4CSCS!T%%!-dnnd>N>N!O!%!1!1!?!?`d!?!e,,  F  2T^^Y]YiYijk>>Z')$*:*:;DL (G s   b
model_pathr   rC   c                 H   |s|j                   xs i }n|j                  xs i }|j                  d      }t        |t        j
                        s|dk(  s|n5t        |t              rt        t        |      }||d<   nt        d| d      t        j                  |fi |}|S )z0Creates a model from a path or model identifier.torch_dtypeautozInvalid `torch_dtype` passed to `DPOConfig`. Expected either 'auto' or a string representing a `torch.dtype` (e.g., 'float32'), but got .)r   r   getr   rd   dtyperq   r   r   r   r   )rg   r   r{   r   r   r   ry   s          rJ   r   z"DPOTrainer._create_model_from_path  s     $ 6 6 <" $ : : @b (++M:k5;;/;&3HKL_S)!%5K/:m,>>I]!M  %44ZUCTUrL   c                    d| _         t               s|t        d      t               r	|t        |t              r|j                         }||j                  st        d      t        |dd      st        |dd      rht        |d      xr. dt        t        j                  t              j                        v }d|j                  i}|r|j                  |d<   t        |fi |}n| j!                  ||      }t#        ||      }|j$                  rt        |dd      rt'        |       d| _         |S | j!                  ||      }|S )	z#Prepares a model for PEFT training.FzvPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT modelsa8  You passed both a ref_model and a peft_config. For training PEFT adapters with DPO there is no need to pass a reference model. Please pass `ref_model=None` in case you want to train PEFT adapters, or pass a ref_model with `force_use_ref_model=True` in DPOTrainer's init. if you want to use a different ref_model.is_loaded_in_8bitis_loaded_in_4bitgradient_checkpointing_kwargsuse_gradient_checkpointingT)_peft_has_been_casted_to_bf16r#   r   r   r=   merge_and_unloadforce_use_ref_modelr   r   rr   inspect	signaturer?   
parametersgradient_checkpointingr   _prepare_gradient_checkpointingr>   bf16r:   )rg   ry   rz   r   r{   _support_gc_kwargsprepare_model_kwargss          rJ   r   zDPOTrainer._prepare_peft_model  sf    .3* "{'> I   [%<%+..0$T-E-E A  u159WUL_af=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV <<UDI #5+6EyyWU,?G+E2592
  88EErL   c                     |j                   r@t        |d      r|j                          |S d }|j                         j	                  |       |S )z4Prepare the gradienting checkpointing for the model.enable_input_require_gradsc                 &    |j                  d       y )NT)requires_grad_)moduleinputri   s      rJ   make_inputs_require_gradzLDPOTrainer._prepare_gradient_checkpointing.<locals>.make_inputs_require_gradS  s    ))$/rL   )r   r   r  get_input_embeddingsregister_forward_hook)rg   ry   r{   r
  s       rJ   r   z*DPOTrainer._prepare_gradient_checkpointingH  sO    
 &&u:;002 0 **,BBC[\rL   r   dataset_namec           
      d   i }t        |t              r|j                  |d<   d|d<   t               j	                         5  t        |t              r	d| d|d<    |j
                  t        fi |}t        |t              r	d| d|d<    |j
                  t        fd||j                  d	i|}t        |t              r	d
| d|d<    |j
                  | j                  s| j                  n| j                  fddg||j                  |j                  ddd|}d d d        |S # 1 sw Y   |S xY w)Nnum_proc
   writer_batch_sizezExtracting prompt in z datasetdesczApplying chat template to 	fn_kwargs)r   toolszTokenizing chosenrejectedF)r   r   r   add_special_tokens)remove_columnsr  )r   r   r   r   main_process_firstmapr&   r%   r  r   tokenize_rowprocess_rowr   r   )rg   r   r   r{   r  
map_kwargss         rJ   r   zDPOTrainer._prepare_datasetZ  sW    
gw'%)%:%:Jz".0J*+^..0 	'7+'<\N(%S
6"!gkk"6E*EG '7+'A,x%X
6"!gkk)CS^b^h^h5imwG
 '7+'2<.%I
6"!gkk)-)=)=!!4CSCS (*5(8)-)?)?-1-G-G*/ G#	< =	< s   CD%%D/c                 N   |} || d   d      d   } || d   d      d   } || d   d      d   }|r8|j                   |j                   g|z   }|j                  ||j                  gz   }||j                  gz   }||j                  gz   }||| d }|
|d| }|d| }|||dS )	a  
        Tokenize a row of the dataset.

        Args:
            features (`dict[str, str]`):
                Row of the dataset, should contain the keys `"prompt"`, `"chosen"`, and `"rejected"`.
            processing_class (`PreTrainedTokenizerBase`):
                Processing class used to process the data.
            max_prompt_length (`int` or `None`):
                Maximum length of the prompt sequence. If `None`, the prompt sequence is not truncated.
            max_completion_length (`int` or `None`):
                Maximum length of the completion sequences. If `None`, the completion sequences are not truncated.
            add_special_tokens (`bool`):
                Whether to add special tokens to the sequences. Typically used for encoder-decoder models. If `True`,
                the prompt sequence will have a bos token prepended and an eos token appended. In any case, the
                completion sequences will have an eos token appended.

        Returns:
            `dict[str, list[int]]`:
                Tokenized sequences with the keys `"prompt_input_ids"`, `"chosen_input_ids"`, and
                `"rejected_input_ids".

        Example:
        ```python
        >>> from transformers import GPT2Tokenizer

        >>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
        >>> features = {"prompt": "The sky is", "chosen": " blue", "rejected": " green"}
        >>> DPOTrainer.tokenize_row(
        ...     features, tokenizer, max_prompt_length=3, max_completion_length=3, add_special_tokens=False
        ... )
        {'prompt_input_ids': [464, 6766, 318], 'chosen_input_ids': [4171, 50256], 'rejected_input_ids': [4077, 50256]}
        ```
        promptFr  rA   r  r  N)rT   rU   rV   )bos_token_ideos_token_id)	featuresr   r   r   r  r   rT   rU   rV   s	            rJ   r  zDPOTrainer.tokenize_row  s   H %	$Xh%7ERS^_$Xh%7ERS^_&x
';PUVWbc %%1$-$:$:#;>N#N %%1#3y7M7M6N#N +y/E/E.FF/93I3I2JJ (/1B0B0CD ,/0F1FG!34J5J!K !1 0"4
 	
rL   c                    ||j                   }} || d   | d   d      }|d   d   }|d   d   }	 || d   d	      d   }
 || d
   d	      d   }|r8|j                  |j                  g|z   }|j                  ||j                  gz   }|
|j                  gz   }
||j                  gz   }||| d }|
|
d| }
|d| }||	|
|d}d|v r|d   d   |d<   d|v r|d   d   |d<   |S )zt
        Same as `tokenize_row` but for vision models. Please refer to `tokenize_row` for more information.
        imagesr  F)r%  textr  rA   r   rW   r  r   r  N)rT   rW   rU   rV   rX   rc   )r   r!  r"  )r#  r   r   r   r  	processorr   processed_featuresrT   rW   rU   rV   ri   s                rJ   r  zDPOTrainer.process_row  s   
  01A1K1K9	&hx.@xPXGYnst-k:1=).9!<$Xh%7ERS^_&x
';PUVWbc %%1$-$:$:#;>N#N %%1#3y7M7M6N#N +y/E/E.FF/93I3I2JJ (/1B0B0CD ,/0F1FG!34J5J!K !1( 0"4	
 "%77-?@V-WXY-ZF)*..$6}$Ea$HF=!rL   c                 0    | j                   
g d| _         y y )N)rT   rU   rV   rc   rY   rZ   )_signature_columnsrg   s    rJ    _set_signature_columns_if_neededz+DPOTrainer._set_signature_columns_if_needed  s    
 ""*'D# +rL   c                 4   | j                   r| j                  s| j                  j                  xs | j                  j                  }|| j
                  | j                  j                  | j                  j                  dd}| j                  j                  t        | j                  fi |      }g }g }t        |d      D ]  }| j                  |      \  }}| j                  j                  ||f      \  }}|j                  |j!                                |j                  |j!                                t#                | j                  j%                           t'        j(                  |      j+                         j-                         }	t'        j(                  |      j+                         j-                         }
| j                  j/                  d|	      | _        | j                  j/                  d|
      | _        d| _        t0        | e         S )	z
        Returns the training [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
        F
batch_size
collate_fnnum_workers
pin_memoryshufflez!Train dataset reference log probsiterabler  rY   namecolumnrZ   T)r   r   r{   precompute_ref_batch_sizer   r|   dataloader_num_workersdataloader_pin_memoryr   preparer   r}   r   compute_ref_log_probsgather_for_metricsappendcpur2   free_memoryrd   catfloatnumpy
add_columnr   get_train_dataloader)rg   r/  dataloader_paramsdata_loaderrY   rZ   padded_batchref_chosen_logpref_rejected_logpall_ref_chosen_logpsall_ref_rejected_logpsr   s              rJ   rF  zDPOTrainer.get_train_dataloader   s    ((1V1V<<e		@e@eJ("00#yy??"ii== ! **22:d>P>P3fTe3fgK!!# $k@c d 
/595O5OP\5]2!2595E5E5X5X$&7862!2 !''(;(;(=>"))*;*?*?*AB   ,,.
/ $)99-=#>#D#D#F#L#L#N %*YY/A%B%H%H%J%P%P%R"!%!3!3!>!>DV_s!>!tD!%!3!3!>!>)2H "? "D 59D1w+--rL   c                    || j                   t        d      ||n| j                   }| j                  r| j                  s| j                  j
                  xs | j                  j                  }|| j                  | j                  j                  | j                  j                  dd}| j                  j                  t        |fi |      }g }g }t        |d      D ]t  }| j                  |      \  }}	| j                  j                  ||	f      \  }}	|j!                  |j#                                |j!                  |	j#                                v t%        j&                  |      j)                         j+                         }
t%        j&                  |      j)                         j+                         }|j-                  d|
      }|j-                  d|      }| j                   || _         d	| _        t.        | a  |
      S )a  
        Returns the evaluation [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.

        Args:
            eval_dataset (`torch.utils.data.Dataset`, *optional*):
                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
                by the `model.forward()` method are automatically removed. It must implement `__len__`.
        z-Trainer: evaluation requires an eval_dataset.Fr.  z Eval dataset reference log probsr4  rY   r6  rZ   T)r~   )r~   r   r   r   r{   r9  per_device_eval_batch_sizer|   r:  r;  r   r<  r   r   r=  r>  r?  r@  rd   rB  rC  rD  rE  r   get_eval_dataloader)rg   r~   r/  rG  rH  rY   rZ   rI  rJ  rK  rL  rM  r   s               rJ   rP  zDPOTrainer.get_eval_dataloader.  s    D$5$5$=LMM'3'?|TEVEV((1U1U<<d		@d@dJ("00#yy??"ii== ! **22:l3`N_3`aK!!# $k@b c C595O5OP\5]2!2595E5E5X5X$&7862!2 !''(;(;(=>"))*;*?*?*ABC $)99-=#>#D#D#F#L#L#N %*YY/A%B%H%H%J%P%P%R"'228JSg2hL'228LUk2lL   ,$0!37D0w**EErL   c              #     K   | j                   r?| j                  s3| j                  j                  | j                        j                         n	t               5  | j                  r%| j                  j                  | j                         d | j                  r)| j                  j                  | j                  xs d       ddd       y# 1 sw Y   yxY ww)zWContext manager for handling null reference model (that is, peft adapter manipulation).Ndefault)	r   r   r   unwrap_modelry   disable_adapterr   set_adapterr   r+  s    rJ   null_ref_contextzDPOTrainer.null_ref_contexta  s     
 !!$*?*? ))$**5EEG		M
 $$

&&t'<'<=$$

&&t'>'>'K)L		M 		M 		Ms   ACA+C	CCCbatchc                    | j                   r)t        | j                  j                  j                        n	t               }t        j                         5  |5  | j                  8| j                         5  | j                  | j                  |d      }ddd       n| j                  | j                  |d      }ddd       ddd       d   |d   fS # 1 sw Y   #xY w# 1 sw Y   'xY w# 1 sw Y   +xY w)zfComputes log probabilities of the reference model for a single padded batch of a DPO specific dataset.NT)is_ref_modelchosen_logpsrejected_logps)r   r   r   devicetyper   rd   no_gradrz   rV  concatenated_forwardry   )rg   rW  compte_ref_context_managerref_model_outputs       rJ   r=  z DPOTrainer.compute_ref_log_probso  s     7;6X6XHT%%,,112^i^k 	# ]]_ 	g8 	g~~%**, g'+'@'@Uae'@'f$g g $(#<#<T^^Uae#<#f 	g 	g  /1ABR1SSS	g g	g 	g 	g 	gs<   C,C 4C'C :C,CC  C)	%C,,C5r\   c                    i }t        j                  | d   | d   gd      |d<   t        j                  | d   | d   gd      |d<   d| v r"t        j                  | d   | d   gd      |d<   d| v r"t        j                  | d   | d   gd      |d<   d| v r"t        j                  | d   | d   gd      |d<   t        | d   j                  d	   | d
   j                  d	         }t        j                  t	        | d   ||      t	        | d
   ||      f      |d<   t        j                  t	        | d   |d      t	        | d   |d      f      |d<   |S )a  
        Concatenate the `chosen` and `rejected` inputs from the batch into a single tensor for both the prompt and
        completion sequences.

        Args:
            batch (`dict[str, Union[list, torch.LongTensor]]`):
                A batch of input data. The batch must contain the following keys:

                - `"prompt_input_ids"`: Tensor of shape `(batch_size, prompt_length)` representing the prompt input
                  IDs.
                - `"chosen_input_ids"`: Tensor of shape `(batch_size, chosen_length)` representing the chosen
                  completion input IDs.
                - `"rejected_input_ids"`: Tensor of shape `(batch_size, rejected_length)` representing the rejected
                  completion input IDs.
                - `"prompt_pixel_values"` (optional): Tensor for pixel values, if available.
                - `"prompt_pixel_attention_mask"` (optional): Tensor for pixel attention masks, if available.

            padding_value (`int`):
                The padding value to use for the concatenated completion sequences (`chosen_input_ids` and
                `rejected_input_ids`).

        Returns:
            `dict[str, torch.LongTensor]`: A dictionary containing:

                - `"prompt_input_ids"`: Concatenated prompt input IDs of shape `(2 * batch_size, prompt_length)`.
                - `"completion_input_ids"`: Concatenated chosen and rejected completion input IDs of shape `(2 *
                  batch_size, max_completion_length)`.
                - `"prompt_attention_mask"`: Concatenated prompt attention masks of shape `(2 * batch_size,
                  prompt_length)`.
                - `"completion_attention_mask"`: Concatenated chosen and rejected attention masks of shape `(2 *
                  batch_size, max_completion_length)`.
                - `"pixel_values"` (optional): Concatenated pixel values if `"prompt_pixel_values"` are present.
                - `"pixel_attention_mask"` (optional): Concatenated pixel attention masks if
                  `"prompt_pixel_attention_mask"` are present.

        Notes:
            The completion input IDs and attention masks are padded to the maximum completion length of the chosen or
            rejected sequences.
        rT   r   dimr^   rW   rX   rc   rU   r*   rV   )	pad_valuecompletion_input_idsr`   ra   completion_attention_mask)rd   rB  maxrG   r9   )rW  r\   ri   r   s       rJ   concatenated_inputszDPOTrainer.concatenated_inputs|  s   V  &+YY6H0I5QcKd/ekl%m!"*/))*+U3J-KLRS+
&' U"%*YYn0Eu^G\/]cd%eF>"!U*-2YY-.6L0MNTU.F)* E!$)IIu]/CU=EY.Z`a$bF=! !$E*<$=$C$CA$FNbHcHiHijkHl m).e$679NZghe$89;P\ij*
%& /4iie$;<>S_`ae$=>@Uabc/
*+ rL   rZ  r[  rY   rZ   c                    | j                   j                  }|j                  |      | j                   |j                  |      z  z
  }|j                  |      | j                   |j                  |      z  z
  }| j                  t
        j                  j                  k(  rt        j                  }| j                  rBt        j                  | j                  v r&t        | j                  t        j                           }t        || z        t        || z        z
  |z  }	n||z
  }
| j                  r.t        j                  dg|
j                   |
j                        }n||z
  }|
j                  | j                   j                        }
|j                  | j                   j                        }|
|z
  }	| j                  t
        j"                  j                  k(  r.|	t%        j&                  |      t%        j&                  |      z
  z  }	| j(                  dk(  rft%        j*                  | j,                  |	z         d| j.                  z
  z  t%        j*                  | j,                   |	z        | j.                  z  z
  }nY| j(                  dk(  ryt%        j*                  | j,                  |	z         d| j.                  z
  z  t%        j*                  | j,                   |	z        | j.                  z  z   dd| j.                  z  z
  z  }n| j(                  dk(  rddl}| j.                  dk(  rd	| _        | j,                  |	z  j3                         t%        j*                  | j,                  |	z        |j5                  d| j.                  z
        z
  z  | j,                   |	z  j3                         t%        j*                  | j,                   |	z        |j5                  | j.                        z
  z  z   }n| j(                  d
k(  r't        j6                  d| j,                  |	z  z
        }n| j(                  dk(  r|	dd| j,                  z  z  z
  dz  }n| j(                  dk(  r||z
  }||z
  }| j,                  |z  }| j,                  |z  }t        j8                  ||fd      j;                         j=                         }| j>                  jA                  |       | j>                  j:                  }t%        j*                  | j,                  |z  |z
         t%        j*                  | j,                  |z  |z
         z
  }n| j(                  dk(  r7||z
  }||z
  }|d| j,                  z  z
  dz  |d| j,                  z  z   dz  z   }nX| j(                  dk(  rp||z
  | j,                  z  }||z
  | j,                  z  }t%        j*                  |       dt%        j*                  |       z  z
  dt%        j*                  |       z  z
  }n| j(                  dk(  r||z
  }||z
  }t        jB                  |d      \  }}t        jB                  |d      \  }}||z
  }t%        j*                  | j,                  |z         d| j.                  z
  z  t%        j*                  | j,                   |z        | j.                  z  z
  }n!| j(                  dk(  r||z
  }
||z
  }t        jB                  |
d      \  }}t        jB                  |d      \  }}||z
  }t%        j*                  | j,                  |z         d| j.                  z
  z  t%        j*                  | j,                   |z        | j.                  z  z
  }ni| j(                  dk(  rNdt%        j2                  | j,                  |z        z
  }t%        j2                  | j,                  |z        }||z   }n| j(                  dk(  rPt%        j2                  | j,                  |z        }dt%        j2                  | j,                  ||z
  z        z
  }||z   }n| j(                  dk(  r||z
  }
||z
  }|
|z
  }	|	| j,                  z  }	t        j2                  |	| jD                  jF                  z        }t%        j*                  |	       }t        jH                  |	       }|d|z
  z  ||z  z   }ntK        d| j(                   d      | j,                  |j                  |      |j                  |      z
  j=                         z  }| j,                  |j                  |      |j                  |      z
  j=                         z  }|||fS )a  
        Compute the DPO loss for a batch of policy and reference model log probabilities.

        Args:
            chosen_logps (`torch.FloatTensor`):
                Log probabilities of the model for the chosen responses. Shape: `(batch_size,)`.
            rejected_logps (`torch.FloatTensor`):
                Log probabilities of the model for the rejected responses. Shape: `(batch_size,)`.
            ref_chosen_logps (`torch.FloatTensor`):
                Log probabilities of the reference model for the chosen responses. Shape: `(batch_size,)`.
            ref_rejected_logps (`torch.FloatTensor`):
                Log probabilities of the reference model for the rejected responses. Shape: `(batch_size,)`.

        Returns:
            A tuple of three tensors: `(losses, chosen_rewards, rejected_rewards)`. The losses tensor contains the DPO
            loss for each example in the batch. The `chosen_rewards` and `rejected_rewards` tensors contain the rewards
            for the chosen and rejected responses, respectively.
        r   )r   r\  r   r*   robustr$   exo_pairNgMbP?r   r   r   r   g      ?r   aot_pairrc  aotr   r   discopopzUnknown loss type: z. Should be one of ['sigmoid', 'hinge', 'ipo', 'exo_pair', 'nca_pair', 'robust', 'bco_pair', 'sppo_hard', 'aot', 'aot_pair', 'discopop', 'apo_zero', 'apo_down'])&r   r\  tor   r   r.   ALPHA_DIVERGENCEvaluer-   ALPHA_DIVERGENCE_COEF_DEFAULTr   r   rC  r0   rd   re   r   JS_DIVERGENCEFsoftplusr   
logsigmoidr   r   mathr   logrelurB  meandetachr   updatesortr{   discopop_tauexpr   )rg   rZ  r[  rY   rZ   r\  chosen_logratiosrejected_logratios
alpha_coeflogits	logratiosref_logratioslossesrx  chosen_rewardsrejected_rewardsrewardsdeltaabchosen_logratios_sorted_rejected_logratios_sortedlogratios_sortedref_logratios_sortedlosses_chosenlosses_rejectedlog_ratio_modulationlogistic_componentexp_components                                 rJ   dpo_losszDPOTrainer.dpo_loss  s   2 !!(( (??62$:M:M6MQaQdQdekQl5ll+..v6d>Q>Q:QUgUjUjkqUr9rr!!_%E%E%K%KK .KKJ'',@,Z,Z^b^v^v,v"4#;#;<P<j<j#kl
0J;>?'JZ^h]hJhBiimwwF$~5I"" %aS	PYP`P` a 03E E!T%5%5%<%<=I),,T-=-=-D-DEM.F%%)F)F)L)LL !**%56DV9WWW
 >>Y&dii&011Q9M9M5MN,,		zF23d6J6JJK 
 ^^x'dii&011Q9M9M5MN,,		zF23d6J6JJKQ----/F
 ^^z)##q('+$ii&(113TYY/0488A@T@T<T3UU))f$--/1<<
V@S3TW[W_W_`d`t`tWu3uvwF ^^w&ZZDII$6 67F^^u$qA		M22q8F^^z)+.>>!/2D!D!YY)99N#yy+==ii1A BAFKKMTTVGLL(LL%%EllDII0@$@E#IJJQ\\))00589N F ^^{*
 //A!33A#		/)a/1sTYY3F12LLF^^z)*-==JN .1C CtyyPn--n_556&6%6778  ^^z)+.>>!/2D!D).4D!)L&#Q+0::6Ha+P(%q+.GGEdii%/00A8L8L4LM,,		zE12T5I5IIJ 
 ^^u$$~5I,/AAM"'**YA">a&+jjA&F# !$';;Edii%/00A8L8L4LM,,		zE12T5I5IIJ 
 ^^z) 		$))6F*F GGMii		4F(FGO"_4F^^z) IIdii2B&BCM!))DII9IL^9^,_"``O"_4F^^z) %~5I,/AAM.Fdii'F#(==$)):P:P1P#Q "#,,v"6!6!IIvg.M'1/C+CD}WkGkkF %dnn%5 6x x 
 loof&=@P@S@STZ@[&[%c%c%ee99(9(9&(ADVDYDYZ`Da(a'i'i'kk~'777rL   c           	         | j                   j                  |      }| j                  || j                        }i }| j                  rd|d<   d|v r|d   |d<   d|v r|d   |d<   d|v r|d   |d<   |d   }|d   }| j
                  r |j                         |d	   |d   d
      }t        |d   |j                  j                        }	 |j                         |	|d   |j                  |d   d      }
|
j                  }d }| j                  s| j                  || j                   j                  | j                        } |j                         |d	   |d   d
      } |j                         |	|d   |j                  |d   d      }|j                  }n{| j                  so| j                         5   |j                         |d	   |d   d
      } |j                         |	|d   |j                  |d   d      }|j                  }d d d        |d   }|j                         }n,t!        j"                  |d	   |d   fd      }t!        j"                  |d   |d   fd      }t!        j"                  t!        j$                  |      |fd      }| j&                  | j&                  |j)                  d      k  r| j*                  dk(  rQt-        |||      \  }}}|d d d | j&                  f   }|d d d | j&                  f   }|d d d | j&                  f   }n| j*                  dk(  ret/        |||      \  }}}|d d | j&                   d f   }|d d | j&                   d f   }|d d | j&                   d f   }t-        |||      \  }}}n*t1        d| j*                   d      t-        |||      \  }}}| j2                  rK|j5                  d      d   j7                         }|j8                  d   |z
  j;                         dz   }||d<   d|d<   | j<                  r~||j                            j?                  d      }||j                            j?                  d      }|jA                  d      |j                            j?                  d      dz
  }||d<   n||d<   tC        |d      r|j                         }n!tE        || jF                  jH                  |      } ||fddi|}|j                  d d d df   }d }| j                  s| j                  | j                   j                  | j                        }tC        |d      r|j                         }n!tE        || jF                  jH                  |      } ||fddi|}|j                  d d d df   }n| j                  swtC        |d      r|j                         }n!tE        || jF                  jH                  |      }| j                         5   ||fddi|}|j                  d d d df   }d d d        t!        jJ                  |dk7  || jL                        }|d d dd f   }|jO                         }d }d }| j                  s| j                  6| j                   j                  | j                        }|jO                         }n)| j                         5  |jO                         }d d d        jP                  }tC        |d      r|jR                  nd }| jU                  |jP                  ||tC        |d      r|jR                  nd | j                  s|nd | j                  s|nd | j                  s|nd       } | \  }!^}"}#}$}%}&}'|!|"|#|$|%|&|'d   |'d   d}(| j                  rjV                  |(d <   |(S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)!Nr_   Tr   rW   rX   rc   r^   rg  rT   )attention_maskreturn_dictrf  F)rA   r  encoder_hidden_statesencoder_attention_mask	use_cacher*   rc  
keep_startkeep_endUnknown truncation mode: '/'. Should be one of ['keep_end', 'keep_start'].as_tuplelogits_to_keepoutput_hidden_statesr   position_idsr  get_decoderr  rE   bias)r  	ref_input
ref_weightref_bias)lossrZ  r[  mean_chosen_logitsmean_rejected_logitsnll_lossr  r  aux_loss),r   rS  ri  r\   r   r   get_encoderrK   r   rB   r  last_hidden_stater   rz   rV  boolrd   rB  
zeros_liker   sizer   r3   r4   r   r   nonzerominrG   itemr   	unsqueezecumsumr   r   r{   base_model_attribute_namewherer   get_output_embeddingsweightr  r   r  ))rg   ry   rW  unwrapped_modelconcatenated_batchmodel_kwargsr^   rg  encoder_outputsdecoder_input_idsdecoder_outputshidden_statesref_hidden_statesunwrapped_ref_modelref_encoder_outputsref_decoder_outputslabels	loss_maskrA   r  first_compute_indexr  r  
base_modeloutputsref_base_modelref_outputsmasked_input_idslm_headr  r  ref_lm_headloss_outputr  rZ  r[  chosen_logits_meanrejected_logits_meanr  aux_outputsri   s)                                            rJ   _compute_loss_ligerzDPOTrainer._compute_loss_liger  s   **77>!55e4K]K]5^  37L/0 //+=n+ML(!%773EF\3]L/0..*<]*KL' 23J K$67R$S!"";o99;"#5612IJ O !3"#9:&&==!
 <o99;+12MN&5&G&G'9:Q'RO ,==M $&&4>>+E&*&6&6&C&CDNN&S#&G&9&E&E&G&'9:#56M#N $'#
 'H&9&E&E&G/#56Q#R*=*O*O+=>U+V#'# %8$I$I!((**, N*G/*E*E*G*+=>'9:Q'R$(+'
 +H/*E*E*G"3'9:U'V.A.S.S/ABY/Z"'+' )<(M(M%N ((>?F1668I 		#$679KLb9cdjkI #YY#$;<>PQl>mnN
 		!!"78:STI *tATATUVAW/W''<7 <FnV_aj;k8NIy%3A7H7H4H%IN )!->t->*> ?I )!->t->*> ?I))Z7 <G~W`bk;l8NIy )!doo-=-?*? @I%3A7G7I4I%JN )!doo-=-?*? @I;EnV_aj;k8NIy$4T5I5I4J K) )  8B.R[]f7g4	9 &&&/&7&7&7&Fq&I&M&M&O#"+//!"47J"J!P!P!RUV!V1?-.37L/0   %n&9&9&;<FFqI	%n&9&9&;<FFqI	-44Q78K8K8MNXXYZ[^__/;^,1?-. 6,88:
$_dii6Y6Y[jk
  G
 $55a"f=M !%&&4>>+E&*&6&6&C&CDNN&S#.>%8%D%D%FN%,+TYY-P-PRe&N -# #
 %0$A$A!SbS&$I!((?M:%4%@%@%BN%,_dii>a>acr%sN**, N"0!#"'# '#K
 )4(E(Ea"f(M%N  %{{9>9dF]F]^%ae,F "779 
""~~)&*&6&6&C&CDNN&S#1GGI**, J"1"G"G"IKJ$++J+2;+G{''TH &&NN!(&!9t/3/B/B')-)<)<z$%)%8%8Xd ' 
 	
l\>+=?SU]`k (,"4$8 )!n +A	
   !(!1!1F:SN NdN N,J Js%   2A^"<!^/^<"^,/^9<_rY  c                 n   |d   j                   d   }| j                  || j                        }ddi}| j                  rd|d<   d|v r|d   |d<   d	|v r|d	   |d	<   d
|v r|d
   |d
<   |d   }|d   }|d   }	|d   }
| j                  r>|	}| j
                  ||
dk(  <    |d+|||d|}|j                  }|
j                         }nt        j                  ||	fd      }t        j                  ||
fd      }t        j                  t        j                  |      |
fd      }| j                  | j                  |j                  d      k  r| j                  dk(  rQt        |||      \  }}}|ddd| j                  f   }|ddd| j                  f   }|ddd| j                  f   }n| j                  dk(  ret        |||      \  }}}|dd| j                   df   }|dd| j                   df   }|dd| j                   df   }t        |||      \  }}}n*t!        d| j                   d      t        |||      \  }}}| j"                  rK|j%                  d      d   j'                         }|j                   d   |z
  j)                         dz   }||d<   d|d<   | j*                  r~||j                            j-                  d      }||j                            j-                  d      }|j/                  d      |j                            j-                  d      dz
  }||d<   n||d<    ||fi |}|j                  }t        j0                  |dd      }t        j0                  |dd      j                         }| j"                  r|dd df   }|dd| df   }|j                   dd |j                   dd k7  r|j                   d   }|dd| df   }d|| <   t3        ||      }d|| <   t        j0                  |dd      }| j*                  rej                   \  }}t        j4                  |||j                  j6                  |j                  j8                        }|||j                         <   |}|ddddf   j;                  d      }i }| j<                  rt        j>                         5  tA        jB                  |d      }t        jD                  d|z  d      }||z
  }||z  j;                  d      |j;                  d      z  }|d| }||d }t        jF                  t        jH                  ||z         d      |d <   ddd       | jJ                  jL                  }| j                  s|d|ddf   n|d| } | j                  s|d|ddf   n|d| }!tA        jN                  t        jP                  | d!      t        jP                  |!d!      d"      |d#<   | jR                  d$k(  r||j;                  d      z  }| jJ                  jT                  $|s!|j;                  d      }"|"d| }#|"|d }$t        j&                  |#|$      }%t        j                  |%|%gd      }%|j                  d      }t        jV                  ||j6                  %      jY                  |      }||%j-                  d      k  }&||"j-                  d      k  }'|&|'z  j[                         }(|& |'z  j[                         })||(z  j;                  d      }*||)z  j;                  d      }+|*| jJ                  jT                  |+z  z   }|d| |d&<   ||d |d'<   | j*                  r^dk(  j%                  d      d   |   },|dd|,f   |dd|,f      j]                         }-|d|,df   |d|,df      j]                         }.n2|d| |d|    j]                         }-||d ||d    j]                         }.|-|d(<   |.|d)<   | j                  r|j^                  |d*<   |S # 1 sw Y   xY w),a  
        Runs the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.

        We do this to avoid doing two forward passes, because it's faster for FSDP.

        Args:
            model:
                Model to run the forward pass on.
            batch:
                Batch of input data.
            is_ref_model:
                Whether this method is being called for the reference model. If `True`, length desensitization is not
                applied.
        rT   r   r_   r  FTr   rW   rX   rc   r^   rf  rg  )rA   r  r  r*   rc  Nr  r  r  r  r  r  r  r  r  rE   )shiftsdimsr$   )r\  r   )rh  policy_weights)end_dim)r   r  r   r\  rZ  r[  r  r  r  rt   )0rG   ri  r\   r   r   r   r  r  rd   rB  r  r   r  r   r3   r4   r   r   r  r  r  r   r  r  rollr;   zerosr\  r   sumr   r^  ru  log_softmax	logsumexpclampr  r{   	rpo_alphacross_entropyflattenr   ld_alphaarange	expand_asrC  r{  r  )/rg   ry   rW  rY  num_examplesr  r  rT   r^   rf  rg  r  r  r  r  rA   r  r  r  r  seq_lenper_token_logpsr/  per_token_logps_	all_logpsri   logprobsweights_adjustment_factorper_token_logps_adjustedall_weightschosen_weightsrejected_weightschosen_logitschosen_labelscompletion_lengthschosen_lengthsrejected_lengthspublic_lengthsld_maskmask
front_mask	rear_maskfront_logps
rear_logps	split_idxr  r  s/                                                  rJ   r_  zDPOTrainer.concatenated_forwardh  s	   " /066q9!55e4K]K]5^#U+  37L/0 //+=n+ML(!%773EF\3]L/0..*<]*KL'-.@A 23J K12HI$67R$S!"")F595L5LF,12 *4 	G ^^F1668I 		#35I"JPQRI"YY(=?X'Y_`aN		!!"78:STI *tATATUVAW/W''<7 <FnV_aj;k8NIy%3A7H7H4H%IN )!->t->*> ?I )!->t->*> ?I))Z7 <G~W`bk;l8NIy )!doo-=-?*? @I%3A7G7I4I%JN )!doo-=-?*? @I;EnV_aj;k8NIy$4T5I5I4J K) )  8B.R[]f7g4	9&&
 '0&7&7&7&Fq&I&M&M&O#"+//!"47J"J!P!P!RUV!V1?-.37L/0   &n&9&9&;<FFqI	%n&9&9&;<FFqI	-44Q78K8K8MNXXYZ[^__/;^,1?-.I66G^^F ZZ	"1=F

9Ra@EEGI&&  N?#3 34%a.)9&9:	<<v||BQ//ll1oGAxyL)F 	z/?&'
#**_QQG"0"6"6J${{GGNN,A,AI]I]  7F^0023.O#AqrE*..r2	 l==R8,1OOALb,Q)+:=V+V(7)CHHLy}}]_O``!,]l!;#.|}#= +0;;uyyRbAb7cij+k'(l 99*>B>U>UF=L=#2##56[aboco[pM>B>U>UF=L=#2##56[aboco[pM "#mQ7}^_9`op"F: >>U"!IMM"$55I99),!*1!5/>N1,-@"YY~7GHN"YY'GQON%**1-G <<8N8NOYYZijL"^%=%=a%@@G"4">">q"AAD!D.//1J!D//1I*Z7<<<CK)I5::q:AJ#dii&8&8:&EEI!*=L!9~#,\]#;  
 &*33T3B1ElSI!':I:!6yJYJ7O!P!U!U!W#)!YZ-#81ij=9Q#R#W#W#Y !'!6y,7O!P!U!U!W#),-#8<=9Q#R#W#W#Y '9#$)=%&  !(!1!1F:Gl ls   B`**`4
train_eval)r   r   c                 .   i }| j                   j                  r"| j                  ||      }|d   }|d   }|d   }nW| j                  ||      }d|v rd|v r|d   }	|d   }
n| j	                  |      \  }	}
| j                  |d   |d   |	|
      \  }}}||kD  j                         }| j                   j                  || j                   j                  |d   z  z   }| j                  r||d	   z  }| j                  r|| j                  |d
   z  z   }|dk(  rdnd}| j                  j                  |      j                         j                         || d<   | j                  j                  |      j                         j                         || d<   | j                  j                  |      j                         j                         || d<   | j                  j                  ||z
        j                         j                         || d<   | j                  j                  |d         j                         j                         j                         || d<   | j                  j                  |d         j                         j                         j                         || d<   | j                  j                  |d         j                         j                         j                         || d<   | j                  j                  |d         j                         j                         j                         || d<   | j                   j                  N| j                  j                  |d         j                         j                         j                         || d<   | j                  rN| j                  j                  |d
         j                         j                         j                         || d
<   |j                         |fS )zWCompute the DPO loss and other metrics for the given batch of inputs for train or test.r  r  r  rY   rZ   rZ  r[  r  r  r  r   eval_ zrewards/chosenzrewards/rejectedzrewards/accuracieszrewards/marginszlogps/chosenzlogps/rejectedr  zlogits/chosenr  zlogits/rejected)r{   r   r  r_  r=  r  rC  r  r   r   r   r   r>  r{  r  r|  )rg   ry   rW  r  metricsmodel_outputr  r  r  rY   rZ   reward_accuraciesprefixs                rJ   get_batch_loss_metricsz!DPOTrainer.get_batch_loss_metricsE  s    99##33E5AL!&)F)*:;N+,>?44UEBL "U*/Cu/L#();#< %*+?%@"7;7Q7QRW7X4 "47;}}^,l;K.LN^`r84FN$4 ,.>>EEG99*dii11L4LLLFl+;<<F  d00<
3KKKF&&0b-1-=-=-P-PQ_-`-e-e-g-l-l-n6(.)*/3/?/?/R/RSc/d/i/i/k/p/p/r6(*+,151A1A1T1TUf1g1l1l1n1s1s1u6(,-.//AQ0QRWWY^^` 	6(/*+ //^0LMTTV[[]bbd 	6(,'( //=M0NOVVX]]_ddf 	6(.)* //=Q0RSZZ\aachhj 	6(-() //=S0TU\\^ccejjl 	6(/*+ 99*  33L4LMTTV[[]bbd vhh'(     33L4LMTTV[[]bbd vhh'( {{}g%%rL   inputsc                 \   | j                   r)t        | j                  j                  j                        n	t               }|5  | j                  ||d      \  }}d d d        j                  | j                  j                        }| j                  d       |r||fS |S # 1 sw Y   IxY w)Nr   r  )
r   r   r   r\  r]  r   r  rp  r{   store_metrics)rg   ry   r  return_outputsnum_items_in_batchcompute_loss_context_managerr  r  s           rJ   compute_losszDPOTrainer.compute_loss  s     7;6X6XHT%%,,112^i^k 	% * 	[ 77vRY7ZMD'	[ wwtyy''(7w7= 	[ 	[s   B""B+c           	      n   | j                   r)t        | j                  j                  j                        n	t               }|5  |j                  |d   |d   | j                  d| j                        }d|v r|d   }n| j                  T| j                         5  | j                  j                  |d   |d   | j                  d| j                        }ddd       n:| j                  j                  |d   |d   | j                  d| j                        }ddd       t        | j                  | j                        }| j                  j                  |d      }t        | j                  | j                        }| j                  j                  |d      }||fS # 1 sw Y   xY w# 1 sw Y   xY w)zRGenerate samples from the model and reference model for the given batch of inputs.rT   r^   T)rA   r  r   	do_samplerO   
ref_outputN)skip_special_tokens)r   r   r   r\  r]  r   generater   r\   rz   rV  ry   r9   r   batch_decode)rg   ry   rW  generate_context_managerpolicy_outputr  policy_output_decodedref_output_decodeds           rJ   generate_from_model_and_refz&DPOTrainer.generate_from_model_and_ref  s    7;6X6XHT%%,,112^i^k 	! & 	!NN 23$%<=??!// + M u$"<0
>>)..0 %)ZZ%8%8&+,>&?+01H+I'+&*)-);); &9 &
  "&!8!8"'(:";',-D'E#'??"&%)%7%7 "9 "J/	> &mT__dFXFXY $ 5 5 B B=fj B k":t@R@RS
!22??
`d?e$&888/ 	 	s&   AF+;FAF+F(	$F++F4prediction_loss_onlyignore_keysc                 *   |&t        |d      rt        |j                  dg       }ng }| j                  r)t	        | j
                  j                  j                        n	t               }t        j                         5  |5  | j                  ||d      \  }}d d d        d d d        | j                  d       |rj                         d d fS |d   |d   d}|j                         D 	
cg c]  \  }	}
|	|vs|
 }}	}
t        j                  || j
                  j                        }t        j                   |j"                  d	   | j
                  j                        }j                         ||fS # 1 sw Y   xY w# 1 sw Y   xY wc c}
}	w )
Nr   keys_to_ignore_at_inferencer   r  eval_logits/choseneval_logits/rejected)r$  r%  r  r   )r   r   r   r   r   r   r\  r]  r   rd   r^  r  r  r|  r   re   r  rG   )rg   ry   r  r   r!  prediction_context_managerr  r  logits_dictkvr  r  s                rJ   prediction_stepzDPOTrainer.prediction_step  s    uh'%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_ 	Z8 	Z 77vRX7YMD'	Z 	Z 	7v6;;=$,, #**>"?$+,B$C
 !, 1 1 3L1q7K!LLfT-=-=-D-DEV\\!_T5E5E5L5LMvv..%	Z 	Z 	Z 	Z Ms0   <F?E7F-F:F7F 	<FFr  c                 v    |j                         D ]&  \  }}| j                  |   |   j                  |       ( y r   )r   r   r?  )rg   r  r  r   rr  s        rJ   r  zDPOTrainer.store_metrics  s;    !--/ 	@JC  ,S188?	@rL   
dataloaderdescriptionmetric_key_prefixc                 L   | j                   r{t        |j                        }t        j                  t        |      | j                  j                        }|j                  j                  |      }| j                  |      }	| j                  |	      }	| j                  | j                  |	      \  }
}t        j                  g dt        |d   |
|      D cg c]#  \  }}}||t        |      d |t        |      d g% c}}}      }d| j                  j                   v rA| j"                  j$                  r+t'        j(                  dt'        j*                  |      i       d	| j                  j                   v rt-        d
|       t.        | a  |||||      }|S c c}}}w )z
        Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
        `Trainer.evaluate()` and `Trainer.predict()`.

        Works both with or without labels.
        )r(  )PromptPolicyz	Ref Modelr  N)columnsdatawandbgame_log)r3  comet_mlzgame_log.csv)r7  table)r   lenr   randomsampleranger{   eval_batch_sizeselectr|   _prepare_inputsr  ry   pd	DataFramezip	report_tor   is_main_processr4  ry  Tabler7   r   evaluation_loop)rg   r,  r-  r   r!  r.  num_samplesrandom_indicesrandom_batch_datasetrandom_batchr  r  r  polrefr7  initial_outputr   s                    rJ   rE  zDPOTrainer.evaluation_loop  s     $$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L8<8X8XY]YcYceq8r5!#5LL9 -0,X68MOa- (S SV/S[]1CDE $))---$2B2B2R2R		:u{{'>?@TYY000-' 0%9;HY
 +s   (Flogs
start_timec                     d|v rdnd}| j                   |   j                         D ]9  \  }}t        j                  |      j	                         j                         ||<   ; | j                   |= t        |   ||      S )a1  
        Log `logs` on the various objects watching training, including stored metrics.

        Args:
            logs (`dict[str, float]`):
                The values to log.
            start_time (`float` or `None`, *optional*, defaults to `None`):
                Start time of the training.
        r  r   r   )r   r   rd   re   r{  r  r   ry  )rg   rM  rN  r  r   r  r   s         rJ   ry  zDPOTrainer.log.  s|     !'$WF
 00<BBD 	<LCW-22499;DI	<  ,w{4,,rL   c                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )Nr   rE   )r   )	r{   hub_model_idr   
output_dirr7  r   create_model_cardr   _save_checkpoint)rg   ry   trialr   r   s       rJ   rT  zDPOTrainer._save_checkpointA  sl    99!!)dii22388J//55c:2>J*5 .rL   r   tagsc                    | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        j                  d      }t!        ||| j"                  ||t%               r.t&        j(                  t&        j(                  j+                         ndt-               d|dd	      }|j/                  t        j
                  j1                  | j2                  j4                  d
             y)a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        Nr   unsloth_versionunslothaO              @inproceedings{rafailov2023direct,
                title        = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
                author       = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
                year         = 2023,
                booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
                url          = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
                editor       = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
            }DPOzNDirect Preference Optimization: Your Language Model is Secretly a Reward Modelz
2305.18290)r  r   rQ  r  rV  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   ry   r   ospathisdirr   setr   rq   addr}  r   textwrapdedentr5   rQ  r   r4  runget_urlr6   savejoinr{   rR  )rg   r   r  rV  r  citation
model_cards          rJ   rS  zDPOTrainer.create_model_cardI  sG   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$??

 )!!**%-?-AeiiF[eii'')ae.0%h!

 	TYY%9%9;GHrL   )NNNNNNNN)NNNNN)Fr   )r   )FN)NNr   )NNN)Drk   rl   rm   rn   r   r   rq   nnModuler   r   r,   r   r   r   rs   r   r   r   r   r	   r!   rr   r    tuplerd   optim	Optimizerlr_schedulerLambdaLRr]  r   Tensorr   r  r   r   r   r   staticmethodr  r  r,  r   rF  rP  r   rV  
LongTensorr=  ro   ri  FloatTensorr  r  r_  r
   r  r  r  r*  rC  r  rE  ry  rT  rS  __classcell__)r   s   @rJ   rv   rv      s   >@ J
 GK$(04CGnr FJ59jvaehl.2!G<S"))_45G< E/299c"ABCG< y!	G<
  -G<  g&> ?@G< uWotCwXgOgIhDh?i%ijkG< #)+=?UWeef
G< "(N+;T+A"BCG< D12G< (5;;#8#898EKKD\D\DeDe;ffgG< #+5ekk6K6K1LdSVX[S[n1\+]"^G< (0%,,9UW\WcWc9c0d'eG<  l+!G<R# Y PT ap 63$31@3OR3Zc3	3j_ I $+w/0+   79KMces st+ 	+
 + 
w'	(+Z <
 <
| ( (T,.j ,.\1F0A 1FZ 1Ff M MT4U5E5E0E+F T4 T JCtU%5%55667JHKJ	c5###	$J JXx8''x8 ))x8  ++	x8
 "--x8 
u  %"3"3U5F5FF	Gx8tc c4U4QVQaQaKaEb@b;c cL gl[YY['+CtU=M=M7M1N,N'O[_c[B 07	B& CtU%5%55667B& O,	B&P _bii/0 S%c 1223 
u||U5<<c5<<6G1H#HII	J..9S%BRBR=R8S .9X]^acf^fXg .9j ,0#/_bii/0#/ S%c 1223#/ #	#/
 d3i(#/J@T#u*%5 @7?C[ @jn @ 04+/!'33 3 'tn	3
 d3i(3 3 
3j-S%Z( -huo -QU -&/ %)&*,0	@ISM@I sm@I CcD()	@IrL   rv   )fr   rb  r9  rg  r   collectionsr   
contextlibr   r   dataclassesr   pathlibr   typingr   r	   r
   r   r   pandasr?  rd   torch.nnro  torch.nn.functional
functionalru  
accelerater   accelerate.utilsr   datasetsr   r   r   torch.utils.datar   transformersr   r   r   r   r   r   r   r   r   r   r   transformers.data.data_collatorr   &transformers.models.auto.modeling_autor   transformers.trainer_callbackr    transformers.trainer_utilsr!   transformers.utilsr"   r#   
data_utilsr%   r&   modelsr'   r(   models.utilsr)   r   r+   
dpo_configr,   r-   r.   utilsr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   peftr<   r=   r>   r?   liger_kernel.chunked_lossr@   r4  rv  ro   rK   rN   rv   rt   rL   rJ   <module>r     s    	    # 2 !  : :      # ! -  '    > W 9 5 K H > ' + H H   " [[A 5%,, 5 5PUP\P\ 5 J 1 J JZ]I ]IrL   