
    bi^A             	          d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmc mZ d dlm Z  d dl!m"Z" d d	l#m$Z$m%Z% d d
lm&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; ddl<m=Z=m>Z>m?Z? ddl@mAZA ddlBmCZCmDZD ddlEmFZF ddlGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZO  eA       rd dlPmQZQ  e;       r
d dlRmSZSmTZTmUZU  e6       rd dlVZVerd dl*m/Z/mWZW dZXdeYeZe[e   f   deYeZe[e   f   fdZ\deYeZe[e   f   dddeYeZe[e   f   fdZ]d&d eYeZef   d!d"deYfd#Z^ G d$ d%e2      Z_y)'    N)defaultdict)contextmanagernullcontext)
itemgetter)Path)TYPE_CHECKINGAnyCallableLiteralOptionalUnion)PartialState)tqdm)Datasetconcatenate_datasets)autocast)
DataLoaderSequentialSampler)AutoModelForCausalLMBaseImageProcessorDataCollatorFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainerCallbackTrainingArgumentsis_comet_availableis_wandb_available)EvalLoopOutput
has_length)is_peft_available   )maybe_apply_chat_templatemaybe_extract_promptmaybe_unpair_preference_dataset)is_liger_kernel_available)create_reference_modelprepare_deepspeed   )	KTOConfig)DPODataCollatorWithPaddingdisable_dropout_in_modelgenerate_model_cardget_comet_experiment_urllog_table_to_comet_experimentpad_to_lengthpeft_module_casting_to_bf16selective_log_softmax)LigerFusedLinearKTOLoss)	PeftModelget_peft_modelprepare_model_for_kbit_training)r   PreTrainedTokenizerz
running.ptbatchreturnc                 Z    | d   d   g| d   dd z   | d<   | d   d   g| d   dd z   | d<   | S )aZ  
    Creates mismatched pairs of prompts and completions for the KL dataset by adding a +1 offset to the order of
    completions. For best results, the mismatched outputs y' used to estimate the KL term for a batch should be the
    same set as the matched outputs y used to estimate the rewards in that batch, just paired with different x.
    answer_input_idsNanswer_attention_mask )r:   s    R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/kto_trainer.py_get_kl_datasetrB   U   sc     "''9!:2!> ?%HZB[\_]_B` `E
&+,C&DR&H%IERiLjknlnLo%oE
!"L    	tokenizerr9   c                 0    || d   d      }|d   }|d   }t        | d   | d         D cg c]
  \  }}||z    }}} ||d      }|d   }	|d   }
t        |	|      D cg c]  \  }}|t        |      d  }}}t        |
|      D cg c]  \  }}|t        |      d  }}}t        ||      D cg c]  \  }}t        j                  ||g       }}}|	D cg c]  }t        j                  |       }	}t        |	|      D ]'  \  }}t        |      t        |      k7  st        d       |D cg c]  }t        |       }}t        t        ||	|            D ]0  \  }\  }}}t        j                  ||d|       r$||xx   d	z  cc<   2 t        |	|      D cg c]
  \  }}|d|  }}}t        |
|      D cg c]
  \  }}|d|  }}}t        ||      D ]'  \  }}t        |      t        |      k7  st        d
       t        |	|      D cg c]
  \  }}||d  }}}t        |
|      D cg c]
  \  }}||d  }}}t        ||||      }|S c c}}w c c}}w c c}}w c c}}w c c}w c c}w c c}}w c c}}w c c}}w c c}}w )z-Tokenize a batch from a KTO specific dataset.promptF)add_special_tokens	input_idsattention_mask
completionNz`The elements in 'full_input_ids' and 'full_concat_input_ids' must have the same pairwise length.r+   z@Prompt input ids and attention mask should have the same length.prompt_input_idsprompt_attention_maskr=   r?   )	ziplennpconcatenatearray
ValueError	enumeratearray_equaldict)r:   rD   prompt_tokenizedrL   rM   rF   rJ   prompt_and_completionfull_tokenizedfull_input_idsfull_attention_maskfpr=   r?   afull_concat_input_idsfullconcatresponse_token_ids_start_idxidxrmoutputs                           rA   	_tokenizerg   `   s   
 !xUK'4,-=>KNuU]`efr`sKtu5GVZVj0uu4ON#K0N()9:03NDT0UV1#a&(VV589LNc5deTQQs1vx[ee ADDTVf@gh1R^^QF3hh+9:abhhqk:N:N,AB ft9F#r  5E#EqCF#E #E $C(8.Jf$gh 3YaA~~a2A'(-2-3 +.n>Z*[\$!Q"1\\/23FHd/eftq!QrUff$&;< a1q6SV_``a +.n>Z*[\$!Q!"\\/23FHd/eftq!QqrUff)3)3	F M_ v
 We i: $F ]f ]fs;   I"I$I*6!I0I66I;"J JJ;Jexamplemodelr   c                    | d   }| d   }|d    d||d    d||d    d| d   i}|d   st        |t              st        dt        |             t        |t              st        dt        |             | d   | d	   | d
   | d   d}|d   }|d   j                  }|d   j
                  }	t        |d         dkD  r||d   d   k7  r|dz  }t        |d
         dkD  r|	|d
   d   k7  r|dz  }t        |d         t        |d
         z   |kD  rFdD ]A  }
|d   dk(  r||
   d|d    ||
<   |d   dk(  r||
   |d    d ||
<   2t        d|d           t        |d         t        |d
         z   |kD  rdD ]  }
||
   d||d   z
   ||
<    |d   ||d    d<   |d	   ||d    d	<   |d   |d
   z   ||d    d<   |d	   |d   z   ||d    d<   ||t        |d         dk(  s||d   d   k7  r`|g||d    d   z   ||d    d<   dg||d    d	   z   ||d    d	<   |g||d    d   z   ||d    d<   dg||d    d   z   ||d    d<   t        |d
         dk(  s|	|d
   d   k7  r0||d    d   |	gz   ||d    d<   ||d    d   dgz   ||d    d<   ||d    d   dd ||d    d<   |d   gt        ||d    d         z  ||d    d   dt        ||d    d          |S  |d   |d|d   d       } |d   |d|d   d       }|d!   ||d    d<   |d"   ||d    d	<   |d!   ||d    d<   |d"   ||d    d<   |=t        |d#      r1|j                  t        j                  |d         $      ||d    d%<   |S )&a  Process tokens of a KTO specific dataset.

    At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation in case the prompt +
    completion responses is/are too long. First we truncate the prompt; if we're still too long, we truncate the
    completion.

    We also create the labels for the completion responses, which are of length equal to the sum of the length of the
    prompt and the completion response, with label_pad_token_id for the prompt tokens.
    rF   rJ   prefixlabelis_encoder_decoderz prompt should be an str but got z$completion should be an str but got rL   rM   r=   r?   rK   
max_lengthrD   r   r+   r>   )rL   rM   truncation_mode
keep_startNmax_prompt_lengthkeep_endzUnknown truncation mode: )r=   r?   completion_input_idscompletion_attention_maskcompletion_labelslabel_pad_token_idTmax_completion_length)
truncationrn   rG   rH   rI   %prepare_decoder_input_ids_from_labels)labelscompletion_decoder_input_ids)
isinstancestrrS   typebos_token_ideos_token_idrO   hasattrry   torchtensor)rh   ri   kwargsrF   rJ   r:   
all_tokensrn   r   r   kcompletion_tokensprompt_tokenss                rA   _process_tokensr      sm    XF&J (
F#V(
J'(
E"GG$4E &' &#&?V~NOO*c*CDDTCUVWW !((: ;%,-D%E '(: ;%,-D%E	

 L)
k*77k*77z,-.2|zRdGefgGh7h!OJz,-.2|zRdGefhGi7i!OJ z,-.Z@R5S1TTWaaB ^+,<$.qM2OF;N4O$PJqM-.*<$.qM6:M3N2N2P$QJqM$'@HYAZ@[%\]]^ z,-.Z@R5S1TTWaaB Z *1.X
VDW=X0X Y
1Z 8BBT7U!""234<FG^<_!""789)*Z8J-KK 	!""678 ./*=T2UU 	!"";<=
 #:012a7<:VhKijkKl;l@L~PUh'((89Q @)**:;< FGC%h'((=>K E)**?@A EQ>TYh'((<=U D)**>?@ JKeh'((ABO I)**CDE z,-.!3|zRdGefhGi7i?DxHXGYYmEn?os @EVH%&&:;< EJ(#$$=>EEEVH%&&?@A 9>AQ@RRf>g8hij8k!""345'(n
)**:;<=n>!""3456iEVHEUDVVfBg<h8ij* L# 0F;/4F;R4Shl
 ,{+t7J0K`d
 8E[7Q!""234<IJZ<[!""7898I+8V!""345@QRb@c!"";<=0W!XGLGrGr||E*=$>? Hs HEVH%&&BCD LrC   c                    4    e Zd ZdZddgZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dEdeeej                  e	f   de
eeej                  e	f      dede
e   d	e
eeee	ef   f      d
e
eeeeef      de
e   de
eg ef      de
ee      deej0                  j2                  ej0                  j4                  j6                  f   de
eej8                  ej8                  gej8                  f      de
e   de
eegef      de
e	   de
e	   f fdZed        Z de!f fdZ"dFd	e
e   de!f fdZ#dedefdZ$e%	 	 	 dGdejL                  dejN                  de(de)de(dejL                  fd        Z*dej                  d!ee	eeejN                  f   f   deejL                  ejL                  ejL                  ejL                  f   fd"Z+d#ejL                  d$ejL                  d%ejL                  d&ejL                  d'ejL                  d(ejL                  deejL                  ejL                  ejL                  ejL                  f   fd)Z,d* Z-d+ Z.d!ee	eeejN                  f   f   fd,Z/	 	 dHdeeej                  f   d-ee	eej8                  e0f   f   deej8                  eej8                  ee	ej8                  f   f   f   fd.Z1dId/ee	e2f   d0e3d1   ddfd2Z4dFd3e
e   de
ejj                  jl                  jn                     fd4Z8d!ee	ejN                  f   dee	e	f   fd5Z9	 dFdeeej                  f   d-ee	eej8                  e0f   f   d6e(d7e
ee	      fd8Z:	 	 	 dJd9e!d:e	d6e
e(   d7e
ee	      d;e	def fd<Z;dFd=ee	e2f   d>e
e2   ddf fd?Z< fd@Z=	 	 	 dKdAe
e	   dBe
e	   dCee	ee	   df   fdDZ> xZ?S )L
KTOTraineraG  
    Initialize KTOTrainer.

    Args:
        model (`transformers.PreTrainedModel`):
            The model to train, preferably an `AutoModelForSequenceClassification`.
        ref_model (`PreTrainedModelWrapper`):
            Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
            and loss. If no reference model is provided, the trainer will create a reference model with the same
            architecture as the model to be optimized.
        args (`KTOConfig`):
            The arguments to use for training.
        train_dataset (`datasets.Dataset`):
            The dataset to use for training.
        eval_dataset (`datasets.Dataset`):
            The dataset to use for evaluation.
        processing_class (`PreTrainedTokenizerBase` or `BaseImageProcessor` or `FeatureExtractionMixin` or `ProcessorMixin`, *optional*):
            Processing class used to process the data. If provided, will be used to automatically process the inputs
            for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
            reuse the fine-tuned model.
        data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
            The data collator to use for training. If None is specified, the default data collator
            (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
            sequences in the batch, given a dataset of paired sequences.
        model_init (`Callable[[], transformers.PreTrainedModel]`):
            The model initializer to use for training. If None is specified, the default model initializer will be
            used.
        callbacks (`list[transformers.TrainerCallback]`):
            The callbacks to use for training.
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
            The optimizer and scheduler to use for training.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
            The function to use to preprocess the logits before computing the metrics.
        peft_config (`dict`, defaults to `None`):
            The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
            a PEFT model.
        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
            The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
            metric values.
        model_adapter_name (`str`, defaults to `None`):
            Name of the train target PEFT adapter, when using LoRA with multiple adapters.
        ref_adapter_name (`str`, defaults to `None`):
            Name of the reference PEFT adapter, when using LoRA with multiple adapters.
    trlktoNri   	ref_modelargstrain_dataseteval_datasetprocessing_classdata_collator
model_init	callbacks
optimizerspreprocess_logits_for_metricspeft_configcompute_metricsmodel_adapter_nameref_adapter_namec                 (   t        |      t        u rt        d      t        |t              s||u rt        d      |j
                  i }nt        |t              st        d      |j
                  }|j                  d      }|Xt        |t              r|dk7  rt        t        |      }|dk7  r)t        |t        j                        st        d| d      ||d<   |j                  i }nt        |t              st        d      |j                  }|j                  d      }|Xt        |t              r|dk7  rt        t        |      }|dk7  r)t        |t        j                        st        d| d      ||d<   t        |t              rt        j                  |fi |}t        |t              rt        j                  |fi |}d	| _        t               s|t        d
      t               r(|%t        |t              r|j!                         }t        |dd	      st        |dd	      rht#        |d      xr. dt%        t'        j(                  t*              j,                        v }d|j.                  i}|r|j0                  |d<   t+        |fi |}nK|j.                  r?t#        |d      r|j3                          n"d }|j5                         j7                  |       t9        ||      }|j:                  rkt        |dd	      r^t=        |       d| _        nK|j.                  r?t#        |d      r|j3                          n"d }|j5                         j7                  |       |j>                  rtA               stC               st        d      ||jD                  jF                  | _#        n(|jF                  t        d      |jF                  | _#        t               xr t        |t              | _$        || _%        || _&        |r|| _'        n0| jH                  s|jP                  rd | _'        ntS        |      | _'        |t        d      |jT                  tW        jX                  dtZ               d}|jT                  |jT                  }|j\                  tW        jX                  dtZ               d}|j\                  |j\                  }d }|j^                  (| jF                  rtW        jX                  dtZ               d}|j^                  | jF                  r|j^                  }|ata        |jb                  |jd                  | jF                        }|jf                  r!d	|_3        tW        jX                  dtZ               d| _4        nd	| _4        |jj                  r,tm        |       | jN                  tm        | jN                         |jn                  | _7        | _*        |j>                  | _        |jd                  | _2        |jp                  |jp                  n|jb                  | _8        | _.        |jr                  | _9        || _/        || _:        |jP                  | _(        d| _;        | jn                  dv rd	| _;        d	| _<        d	| _=        t}        d       | _?        |j                  | _@        |j                  | _A        |j                  | _B        t        |jD                  dd	      | _C        t        |jD                  d d!      | _D        | j                  r)| j                  d!k(  rtW        jX                  d"tZ               d|j                  d#<   t               j                         5  |j                  t        |j                  d$%      }t        ||j                  d&'      }|j                  t        d(|i|j                  d)*      }|_|j                  t        |j                  d+%      }t        ||j                  d,'      }|j                  t        d(|i|j                  d-*      }|j                  t        dd(| jt                  i|j                  d./      }d0| jF                  | jt                  | jT                  | jr                  | jd                  | j\                  | j^                  d1}|j                  t        ||j                  d2*      }|S|j                  t        d(| jt                  id|j                  d34      }|j                  t        ||j                  d5*      }| jv                  r1|j                  d6k  rt        d7      |j                  t        d|j                  |j                  d89      }d:|d;<   |j                  t        ||j                  |j                  D cg c]  }||j                  v s| c}d<=      }t        ||gd6>      }||j                  t        d|j                  |j                  d?9      }|j                  t        ||j                  |j                  D cg c]  }||j                  v s| c}d@=      }t        ||gd6>      }t        t        |dA         d6      }t        t        |dA         |z
  d6      }||k7  rt        || j                  z  |z  d6z  dB      }t        || j                  z  |z  dCz  dB      } t        || j                  z  |z  dCz  dB      }!t        || j                  z  |z  d6z  dB      }"|| j                  cxk  xr | k  nc }#|!| j                  cxk  xr |"k  nc }$|#s)|$s'tW        jX                  dD| dE|  dF|! dE|" dG	tZ               d d d        t        %| a  |||||||||	|
|H       d	| _Y        t#        | j                  dI      r%| j                  j                  | j                         t#        | dJ      st        dK      | j                  rD| j                  j                  j                  j                  dLk(  r| jP                  rt        dM      | jN                  #| jH                  su| jP                  sit        dN      | j                  r&t        | jN                  | j                        | _'        n,| j                  j                  | jN                  dO      | _'        | j                  j                  rt               st        dP      | jn                  dv rt        dQ      | jP                  rt        dR      | jH                  s| jL                  t        dS      t        | jd                  | j                  | jN                  d uT      | _j        y y c c}w c c}w # 1 sw Y   xY w)UNz1Please use `KTOConfig` instead TrainingArguments.z`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must mass a copy of it, or `None` if you use peft.zRYou passed model_kwargs to the KTOTrainer. But your model is already instantiated.torch_dtypeautoznInvalid `torch_dtype` passed to the KTOConfig. Expected a string with either `torch.dtype` or 'auto', but got .zZYou passed ref_model_kwargs to the KTOTrainer. But your ref_model is already instantiated.FzPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT modelsis_loaded_in_8bitis_loaded_in_4bitgradient_checkpointing_kwargsuse_gradient_checkpointingenable_input_require_gradsc                 &    |j                  d       y NTrequires_grad_moduleinputrf   s      rA   make_inputs_require_gradz5KTOTrainer.__init__.<locals>.make_inputs_require_grad  s    --d3rC   Tc                 &    |j                  d       y r   r   r   s      rA   r   z5KTOTrainer.__init__.<locals>.make_inputs_require_grad  s    ))$/rC   z`generate_during_eval=True` requires Weights and Biases or Comet to be installed. Please install `wandb` or `comet-ml` to resolve.zMWhen no model is provided, you need to pass the parameter is_encoder_decoder.zdmax_length or a processing_class must be specified when using the default DPODataCollatorWithPaddingzWhen using DPODataCollatorWithPadding, you should set `max_length` in the KTOTrainer's init it will be set to `512` by default, but you should do it yourself in the future.i   zWhen using DPODataCollatorWithPadding, you should set `max_prompt_length` in the KTOTrainer's init it will be set to `128` by default, but you should do it yourself in the future.   zWhen using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the KTOTrainer's init it will be set to `128` by default, but you should do it yourself in the future.)pad_token_idrv   rm   zWhen using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your KTOConfig we have set it for you, but you should do it yourself in the future.)apo_zero_unpairedc                       t        t              S N)r   listr@   rC   rA   <lambda>z%KTOTrainer.__init__.<locals>.<lambda>1  s    ;t3D rC   output_router_logitsrouter_aux_loss_coefg        a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.estimate_tokensz$Extracting prompt from train dataset)num_procdesczUnpairing train dataset)r   rD   z'Applying chat template to train dataset)	fn_kwargsr   r   z#Extracting prompt from eval datasetzUnpairing eval datasetz&Applying chat template to eval datasetzTokenizing train dataset)batchedr   r   r    )rk   rm   rD   rn   ro   rv   rq   rw   z"Processing tokenized train datasetzTokenizing eval dataset)r   r   r   r   z!Processing tokenized eval datasetr+   zActual (not effective) batch size must be > 1. KTO will not work properly because the KL term will be equivalent to the implied reward.zExtracting KL train dataset)r   
batch_sizer   r   KL_rk   z%Processing tokenized train KL dataset)r   r   remove_columnsr   )axiszExtracting eval KL datasetz$Processing tokenized eval KL datasetrl   r$   gHzG?zYou have different amounts of desirable/positive and undesirable/negative examples but the weights on the desirable and undesirable losses don't seem to be in an ideal range. Based on your data, we recommend EITHER desirable_weight in [z, z] or undesirable_weight in [zN] (but NOT BOTH). See the documentation on how to optimally set these weights.)ri   r   r   r   r   r   r   r   r   r   r   add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.   zrYou cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`.z]No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`)evaluation_modezYou set `use_liger_loss=True` but the liger kernel is not available. Please install liger-kernel first: `pip install liger-kernel`znYou cannot set `loss_type='apo_zero_unpaired'` with liger-kernel.Only KTO loss is supported with liger-kernel.znYou cannot use `precompute_ref_log_probs=True` with liger kernel. Please set `precompute_ref_log_probs=False`.zYYou cannot use `use_liger_loss=True` with Peft models. Please set `use_liger_loss=False`.)ignore_indexbetause_ref_model)kr~   r   rS   r|   r}   model_init_kwargsgetgetattrr   dtyperef_model_init_kwargsr   from_pretrained_peft_has_been_casted_to_bf16r#   r6   merge_and_unloadr   r   inspect	signaturer8   
parametersgradient_checkpointingr   r   get_input_embeddingsregister_forward_hookr7   bf16r3   generate_during_evalr    r   configrm   is_peft_modelr   r   r   precompute_ref_log_probsr)   rn   warningswarnUserWarningrq   rw   r-   r   rv   remove_unused_columnsuse_dpo_data_collatordisable_dropoutr.   	loss_typepadding_valuero   r   calculate_KL _precomputed_train_ref_log_probs_precomputed_eval_ref_log_probsr   _stored_metricsr   desirable_weightundesirable_weightaux_loss_enabledaux_loss_coefwarnings_issuedr   main_process_firstmapr&   dataset_num_procr'   r%   rg   r   per_device_train_batch_sizerB   column_namesr   maxsumrO   roundsuper__init__model_accepts_loss_kwargsri   r   
_tag_namesAttributeErroris_deepspeed_enabledr   statedeepspeed_plugin
zero_stager*   prepare_modelr   use_liger_lossr(   ImportErrorr5   kto_loss_fn)&selfri   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _support_gc_kwargsprepare_model_kwargsr   rn   rq   rw   r   train_kl_datasetceval_kl_datasetnum_desirablenum_undesirabledes_weight_lower_bounddes_weight_upper_boundund_weight_lower_boundund_weight_upper_bounddes_weight_in_rangeund_weight_in_range	__class__s&                                        rA   r   zKTOTrainer.__init__D  s   ( :**PQQ%%)u*<Z 
 !!) "E3'qrr $ 6 6+//>K&k3/K64I")%"=K&(K1U$ I  JU  IV  VW  X  4?!-0%%-$&!Is+l  %)$>$>!/33MBK&k3/K64I")%"=K&(K1U$ I  JU  IV  VW  X  8C%m4eS!(88TBSTEi%,<<Y`J_`I .3* "{'> a   [%<%+..0u159WUL_af=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV,,5">?4464 ..0FFG_` #5+6EyyWU,?G+E2592
 ((u:;0020 **,BBC[\$$.@.BFXFZD 
 &+ll&E&ED#$$,lmm&*&=&=D#.0QZy5Q"4 0&DN4#@#@!DN3E:DN#v  ??"MMd
 J??&J!!)MMd
 !$!!- $ 6 6 $%%-$2I2IMMd
 %(!%%1d6M6M$($>$>! 6-::#'#:#:#'#:#:M ))-2*\ *.D&).D& $U+~~)(8$$($=$=!"&"9"9373E3E3QT//WgWtWt!2#33%:" 0(,(E(E% !>>22 %D 16-/4,  ++DE II	 $ 5 5"&"9"9 '6Le T$U\\3I3O  T%7%73%>MM  48/0 ^..0 S	)--$t/D/DKq . M <t44;TM *--)&(89..>	 . M '+//(43H3HOt  0    ? $"7"7>V   ,//-*,<=!22A	  0   *--&(=(=>../ . M &*&=&=!22"oo#'#7#7&*&=&=%)%;%;)-)C)C	I *--#..9	 . M '+//*D,A,AB !222  0    ,//#'!22<	  0     33q8$ b  $1#4#4# #??!226 $5 $  ',	(##3#7#7#'!22/?/L/L#p!PQUbUoUoPoA#p@ $8 $  !5mEU5V]^ _+&2&6&6' $#'#C#C!%!6!69 '7 'O '6&9&9'"+!%!6!63B3O3O'raSTXdXqXqSq'rC ': 'O $88W^_#`L  M'$: ;Q?M!#mG&<"="MqQO/).$BYBY0Y\i0imn/npq)r&).$BYBY0Y\i0imq/qst)u&).@U@U0UXg0gko/oqr)s&).@U@U0UXg0gkl/lno)p&&<@U@U&oYo&o#&<@W@W&q[q&q#+/BMM0 1G/GrJ`Ia b22H1ILbKc dW	W $WS	j 	''%-!+!*G 	 	
" */& 4::/0JJ%%doo6t]+ j 
 $$%%66AAQF4KhKh  I  >>!&&$*G*G s  ((!24>>4CSCS!T!%!1!1!?!?`d!?!e 99##,.!T  ~~!66 D  ,, 8  !!T%:%:%F o   7!44499UYUcUckoUo D) $I $q* (skS	 S	s9   &H>v$u=8u=<A2v.vvD+v=
vvc              #     K   | j                   r?| j                  s3| j                  j                  | j                        j                         n	t               5  | j                  r%| j                  j                  | j                         d | j                  r)| j                  j                  | j                  xs d       ddd       y# 1 sw Y   yxY ww)zWContext manager for handling null reference model (that is, peft adapter manipulation).Ndefault)	r   r   r   unwrap_modelri   disable_adapterr   set_adapterr   )r  s    rA   null_ref_contextzKTOTrainer.null_ref_context)  s     
 !!$*?*? ))$**5EEG		M
 $$

&&t'<'<=$$

&&t'>'>'K)L		M 		M 		Ms   ACA+C	CCCr;   c                    | j                   r| j                  s| j                  j                  | j                  | j                  j
                  | j                  j                  dd}| j                  j                  t        | j                  fi |      }g }g }t        |d      D ]  }| j                  |      \  }}| j                  j                  |      }|j                  |j                                | j                   s^| j                  j                  |      }|j                  |j                                 | j                  j#                  dt%        j&                  |      j)                         j+                               | _
        | j                   rQ| j                  j#                  dt%        j&                  |      j)                         j+                               | _
        d| _        t,        | ]         S )	z
        Returns the training [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
        Fr   
collate_fnnum_workers
pin_memoryshufflez!Train dataset reference log probsiterabler   reference_logpsnamecolumnreference_KL_logpsT)r   r   r   r   r   dataloader_num_workersdataloader_pin_memoryr   preparer   r   r   compute_reference_log_probsgather_for_metricsappendcpur   
add_columnr   catfloatnumpyr   get_train_dataloader)	r  dataloader_paramsdata_loaderreference_completion_logpsr"  padded_batchreference_completion_logpreference_KL_logpr  s	           rA   r.  zKTOTrainer.get_train_dataloader7  s    ((1V1V"iiCC"00#yy??"ii== ! **22:d>P>P3fTe3fgK)+&!# $k@c d G?C?_?_`l?m<)+<,0,<,<,O,OPi,j)*112K2O2O2QR$$(,(8(8(K(KL](^%&--.?.C.C.EFG "&!3!3!>!>&uyy9S/T/Z/Z/\/b/b/d "? "D   %)%7%7%B%B-eii@R6S6Y6Y6[6a6a6c &C &" 59D1w+--rC   c                 :   || j                   t        d      ||n| j                   }| j                  r| j                  s| j                  j
                  | j                  | j                  j                  | j                  j                  dd}| j                  j                  t        |fi |      }g }g }t        |d      D ]  }| j                  |      \  }}| j                  j                  |      }|j                  |j!                                | j"                  s^| j                  j                  |      }|j                  |j!                                 |j%                  dt'        j(                  |      j+                         j-                               }| j"                  rB|j%                  dt'        j(                  |      j+                         j-                               }| j                   || _         d	| _        t.        	| a  |
      S )a  
        Returns the evaluation [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.

        Args:
            eval_dataset (`torch.utils.data.Dataset`, *optional*):
                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
                by the `model.forward()` method are automatically removed. It must implement `__len__`.
        z-Trainer: evaluation requires an eval_dataset.Fr  z Eval dataset reference log probsr  r  r  r"  T)r   )r   rS   r   r   r   per_device_eval_batch_sizer   r#  r$  r   r%  r   r   r&  r'  r(  r)  r   r*  r   r+  r,  r-  r   get_eval_dataloader)
r  r   r/  r0  r1  r"  r2  r3  r4  r  s
            rA   r7  zKTOTrainer.get_eval_dataloaderc  s    D$5$5$=LMM'3'?|TEVEV((1U1U"iiBB"00#yy??"ii== ! **22:l3`N_3`aK)+&!# $k@b c G?C?_?_`l?m<)+<,0,<,<,O,OPi,j)*112K2O2O2QR$$(,(8(8(K(KL](^%&--.?.C.C.EFG (22&uyy9S/T/Z/Z/\/b/b/d 3 L   +66-eii@R6S6Y6Y6[6a6a6c  7  
   ,$0!37D0w**EErC   r2  c           	         t        j                         5  | j                  | j                         5  | j                  r{| j                  |d   |d   |j                  d      |d         j                  }| j                  r| j                  |d   |d   |j                  d	      |d
         j                  }nR| j                  |d   |d         j                  }| j                  r#| j                  |d   |d         j                  }ddd       n| j                  r{| j                  |d   |d   |j                  d      |d         j                  }| j                  r| j                  |d   |d   |j                  d	      |d
         j                  }nR| j                  |d   |d         j                  }| j                  r#| j                  |d   |d         j                  }ddd       | j                  |d   d| j                  | j                        }| j                  r1| j                  |d
   d| j                  | j                        }||fS d}||fS # 1 sw Y   xY w# 1 sw Y   xY w)zfComputes log probabilities of the reference model for a single padded batch of a KTO specific dataset.NrL   rM   r{   ru   )rI   decoder_input_idsrz   KL_prompt_input_idsKL_prompt_attention_maskKL_completion_decoder_input_idsKL_completion_labelsrs   rt   )rI   KL_completion_input_idsKL_completion_attention_maskFaverage_log_probrm   rv   )r   no_gradr   r  rm   ri   r   logitsr   get_batch_logpsrv   )r  r2  completion_logits	KL_logitscompletion_logpsKL_logpss         rA   r&  z&KTOTrainer.compute_reference_log_probs  s   ]]_ 6	!~~%**, %..,0JJ();<+78O+P.:.>.>?].^#/0C#D	 -7 -
 !& *  ,,(,

 ,-B C/;<V/W2>2B2BCd2e'34J'K	 )3 )
 %f & -1JJ()?@+78S+T -7 - !& *
  ,,(,

 ,-F G/;<Z/[ )3 ) %f &/% %8 **(,$%78'34K'L*6*:*:;Y*Z+,?@	 )7 )
 f & (($(NN()>?+78R+S.:.>.>?`.a#/0F#G	 %3 %
 !& " )-$%;<\ZuMv )7 )f & (($(NN()BC+78V+W %3 % !& "g6	!p  //,-"#66#66 0 
 ++34!&#'#:#:#'#:#: , H  )) H))S% %6	! 6	!s$   I2CI&C"I2&I/	+I22I;rC  rz   rA  rv   rm   c                 n   | j                   dd |j                   k7  rt        d      |s(|ddddf   j                         }| ddddddf   } n|j                         }||k7  }d|||k(  <   t        | |      }|r&||z  j	                  d      |j	                  d      z  S ||z  j	                  d      S )a  Compute the log probabilities of the given labels under the given logits.

        Args:
            logits:
                Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)
            labels:
                Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are
                ignored. Shape: (batch_size, sequence_length)
            average_log_prob:
                If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
                log probabilities of the (non-masked) tokens.

        Returns:
            A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
            given logits.
        Nr>   zKLogits (batch and sequence length dim) and labels must have the same shape.r+   r   )shaperS   cloner4   r   )rC  rz   rA  rv   rm   	loss_maskper_token_logpss          rA   rD  zKTOTrainer.get_batch_logps  s    0 <<,jkk!AqrE]((*FAssAI&F \\^F00	 01v++,/?#i/44R89==;LLL#i/44R88rC   r:   c                    | j                  ||      }| j                  r|d   |j                  d      dni }| j                  rd|d<    ||d   fd|d   i|}|j                  }| j                  ||d   d	| j                  | j                  
      }|j                  d   t        |d         k7  rt        d      t        |j                  d         D cg c]  }|d   |   du s| }	}t        |j                  d         D cg c]  }|d   |   d	u s| }
}||	df   }||
df   }||	df   }||
df   }| j                  r||||||j                  fS |||||fS c c}w c c}w )Nru   r{   rz   r9  Tr   rs   rI   rt   Fr@  r   rl   zThere is a mismatch between the number of examples in this batch and the number of examples for which an output sequence was predicted..)_compute_kl_logpsrm   r   r   rC  rD  rv   rJ  rO   rS   rangeaux_loss)r  ri   r:   rH  model_kwargsoutputsrE  rG  i
chosen_idxrejected_idxchosen_logpsrejected_logpschosen_logitsrejected_logitss                  rA   forwardzKTOTrainer.forward  s    ))%7 &&   34%*YY/M%N
  	   37L/0()
 !<=
 

 $NN//%&"#66#66 0 
 !!!$E'N(;;G 
 "''7'='=a'@!A_AU7^TUEVZ^E^a_
_#()9)?)?)B#CbauW~VWGX\aGabb'
C8),*;<)*c/:+L#,=>   .-RZ\c\l\lmm .-RZ[[ `bs   E&EE"E"policy_chosen_logpspolicy_rejected_logpspolicy_KL_logpsreference_chosen_logpsreference_rejected_logpsr"  c                 "   | j                   r[||z
  j                         j                         }| j                  j	                  |      j                         j                  d      }n.t        j                  d      j                  |j                        }|j                  d   dk7  s|j                  d   dk7  r||z
  }| j                  dk(  r)dt        j                  | j                  ||z
  z        z
  }	n4| j                  dk(  r%dt        j                  | j                  |z        z
  }	| j                  |j                         z  }
npt        j                  g       j                  | j                  j                        }	t        j                  g       j                  | j                  j                        }
|j                  d   dk7  s|j                  d   dk7  r||z
  }| j                  dk(  r)dt        j                  | j                  ||z
  z        z
  }n1| j                  dk(  r"t        j                  | j                  |z        }| j                  |j                         z  }npt        j                  g       j                  | j                  j                        }t        j                  g       j                  | j                  j                        }t        j                   | j"                  	z  | j$                  z  fd      }||
||fS )av  Compute the KTO loss for a batch of policy and reference model log probabilities.

        Args:
            policy_chosen_logps:
                Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,)
            policy_rejected_logps:
                Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,)
            policy_KL_logps: Log probabilities of the policy model for the KL responses. Shape: (batch_size,)
            reference_chosen_logps:
                Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,)
            reference_rejected_logps:
                Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in
                batch_size,)
            reference_KL_logps: Log probabilities of the reference model for the KL responses. Shape: (batch_size,)

        Returns:
            A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, KL). The losses tensor contains the KTO
            loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for
            the chosen and rejected responses, respectively. The KL tensor contains the detached KL divergence estimate
            between the policy and reference models.
        r   minr+   r   r   )r   meandetachr   r'  clampr   zerostodevicerJ  r   Fsigmoidr   Tensorr+  r   r   )r  r]  r^  r_  r`  ra  r"  klchosen_logratioschosen_losseschosen_rewardsrejected_logratiosrejected_lossesrejected_rewardslossess                  rA   kto_losszKTOTrainer.kto_lossJ  s   < !$66<<>EEGB!!44R8==?EE!ELBQ""#6#=#=>B $$Q'1,0F0L0LQ0OST0T25KK~~& !AIIdii;Kb;P.Q$R R#66 !"AIIdii:J.J$K K!YY)9)@)@)BBN "LL,//0@0@0G0GHM"\\"-001A1A1H1HIN !&&q)Q.2J2P2PQR2SWX2X!69Q!Q~~&"#aii		RBT=T0U&V"V#66"#))DII8J,J"K#yy+=+D+D+FF $ll2.11$2B2B2I2IJO$||B/2243C3C3J3JK""]2D4K4Ko4]^

 ~'7;;rC   c                 `   d}| j                   r| j                  r |d   |d   |d   |j                  d      d}n|d   |d   d	}t        j                         5   |di |j
                  }ddd       | j                  |d   d
| j                  | j                        }|S # 1 sw Y   8xY w)z/Compute KL log probabilities for a given batch.Nr:  r;  r=  r<  )rH   rI   rz   r9  r>  r?  )rH   rI   Fr@  r@   )r   rm   r   r   rB  rC  rD  rv   )r  ri   r:   rH  KL_model_kwargsrF  s         rA   rP  zKTOTrainer._compute_kl_logps  s    &&!&'<!=&+,F&G#$:;).3T)U	# "''@!A&+,J&K#
  <!4O4;;	< ++,-!&#'#:#:#'#:#: , H < <s   B$$B-c                    | j                  ||      }| j                  | j                  |      }| j                  r[||z
  j                         j	                         }| j
                  j                  |      j                         j                  d      }n8t        j                  d      j                  | j
                  j                        }| j                  r|d   |j                  d      dni }| j                  rd|d<   | j                  r |j                         |d	   f|d
   dd|} |j!                         d|d   |j"                  dd|} | j                  j                         |d	   f|d
   dd|}	 | j                  j!                         d|d   |	j"                  dd|}
nt%        |d      r|j!                         }n t'        || j(                  j*                        } ||d	   f|d
   dd|}t%        | j                  d      r| j                  j!                         }n*t'        | j                  | j(                  j*                        } ||d	   f|d
   dd|}
|j-                         }| j                  j-                         }| j/                  | j                  s|j"                  ddddf   n|j"                  |j0                  |d   ddddf   t%        |d      r|j2                  ndt        j4                  |d   t        j6                        j                  | j
                  j                        | j                  s|
j"                  ddddf   n|j"                  |j0                  t%        |d      r|j2                  nd|	      \  }\  }}}}}}||||||||d}| j                  r|j8                  |d<   |S )a!  
        Compute the KTO loss using the Liger-Kernel's LigerFusedLinearKTOLoss.

        Args:
            model:
                The policy model used for generating log probabilities and outputs. It could be an encoder-decoder
                model or a regular language model.
            batch: A dictionary containing the input data and labels for the batch.

        Returns:
            A dictionary containing the following keys:
                - "loss": The computed KTO loss for the batch.
                - "chosen_logits_sum": Sum of the logits for the chosen responses from the policy model.
                - "rejected_logits_sum": Sum of the logits for the rejected responses from the policy model.
                - "chosen_logps": Log probabilities of the chosen responses from the policy model.
                - "rejected_logps": Log probabilities of the rejected responses from the policy model.
                - "chosen_rewards": Rewards for the chosen responses.
                - "rejected_rewards": Rewards for the rejected responses.
                - "kl": The KL divergence between the policy and reference models (detached).

            If auxiliary loss is enabled, the dictionary will also include:
                - "aux_loss": The auxiliary loss from the model outputs.
        r   rc  r+   ru   r{   rO  Tr   rs   rt   )rI   return_dictr9  F)rH   encoder_hidden_states	use_cacheget_decoder)rI   r|  Nr>   biasrl   )r   )	_input
lin_weighttargetr~  preference_labels	ref_input
ref_weightref_biasrn  )losschosen_logits_sumrejected_logits_sumchosen_logps_sumrejected_logps_sumchosen_rewards_sumrejected_rewards_sumrn  rR  r@   )rP  r   r   re  rf  r   r'  rg  r   rh  ri  rj  rm   r   r   get_encoderr}  last_hidden_stater   r   r   base_model_attribute_nameget_output_embeddingsr   weightr~  r   boolrR  )r  ri   r:   r_  r"  rn  rS  encoder_outputsrT  ref_encoder_outputsref_outputs
base_modelref_base_modellm_headref_lm_headr  r  r  r  r  r  r  rf   s                          rA   _compute_loss_ligerzKTOTrainer._compute_loss_liger  s   0 00>!33DNNEJ!$66<<>EEGB!!44R8==?EE!ELBQ""4#3#3#:#:;B &&   34%*YY/M%N
  	   37L/0""1e//1,-$%@A  	O *e'') &':;&5&G&G 	G #?$.."<"<">,-#$%@A # 	# 7$..446 &':;&9&K&K 	K um,"..0
$UDII,O,OP
 ,-$%@A 	G t~~}5!%!;!;!=!(9\9\!](,-$%@A 	K --/nn::< <@<S<S7,,QV4Y`YrYr~~,-ae4!(&!9t#ll5>LOOPTP`P`PgPgh** "33AssF;**")))0&)A[%%t  

	
 "!#"$" !2#6 0"4"4$8	
   !(!1!1F:rC   c           	      
   i }|j                         D ci c]G  \  }}|t        |t        j                        r%|j	                  | j
                  j                        n|I }}}t        j                  |d         }|j                         j	                  | j
                  j                        }t        |      |z
  j	                  | j
                  j                        }| j                  j                  rN| j                  ||      }	|	d   }
|	d   }|	d   }|	d   }|	d   }|	d   }|	d   }|	d	   }| j                  rp|	d
   }ni| j                  ||      }|dd \  }}}}}| j                  r|d   }d|v rt        |d   j                   d         D cg c]  }|d   |   du s| }}t        |d   j                   d         D cg c]  }|d   |   du s| }}|d   |df   }|d   |df   }| j"                  r|d   }nd}nt        j$                         5  | j&                  ?| j)                         5  | j                  | j*                  |      dd \  }}}}}ddd       n%| j                  | j&                  |      dd \  }}}}}ddd       | j-                  |||      \  }
}}}|j/                         |d	<   | j
                  j1                  |      j                         j/                         }| j
                  j1                  |      j                         j/                         }|dkD  r| j
                  j1                  |j3                               j3                         j/                         |d<   | j
                  j1                  |j3                               j3                         j/                         |d<   | j
                  j1                  |j3                               j3                         j/                         |d<   ||d<   |dkD  r| j
                  j1                  |j3                               j3                         j/                         |d<   | j
                  j1                  |j3                               j3                         j/                         |d<   | j
                  j1                  |j3                               j3                         j/                         |d<   ||d<   |
j5                         }| j                  r|| j6                  z  z  }||fS c c}}w c c}w c c}w # 1 sw Y   xY w# 1 sw Y   xY w)zWCompute the KTO loss and other metrics for the given batch of inputs for train or test.rl   r  r  r  r  r  r  r  rn  rR  N   r  r   TF.r"  zrewards/chosen_sumzlogps/chosen_sumlogits/chosen_sumzcount/chosenzrewards/rejected_sumzlogps/rejected_sumlogits/rejected_sumzcount/rejected)itemsr|   r   rm  ri  r   rj  r   r   rO   r   r   r  r   r\  rQ  rJ  r   rB  r   r  ri   rv  itemr'  nansumnanmeanr   )r  ri   r:   metricsr   vrz   
num_chosennum_rejectedmodel_outputru  policy_chosen_logitspolicy_rejected_logitsr]  r^  rq  rt  rn  rR  forward_outputr_  rU  rV  rW  r`  ra  r"  _all_num_chosenall_num_rejectedr  s                                  rA   get_batch_loss_metricsz!KTOTrainer.get_batch_loss_metrics>  sf    fkfqfqfst^b^_abjELL6QQTT$**112WXXtteGn-ZZ\__T%5%5%<%<=
Fj044T5E5E5L5LM99##33E5AL!&)F#/0C#D %12G%H""./A"B$01E$F!)*>?N+,BCd#B$$'
3!\\%7N r"#%$&$$)!, !E)).u5F/G/M/Ma/P)QoAUZ[bUcdeUfjnUnao
o+07H1I1O1OPQ1R+SraW\]dWefgWhlqWqrr)./@)A*c/)R&+01B+CLRUDU+V($$)./C)D&)-&]]_ D~~-!224 D !%TZZ ? C 6 8 ! ! 2D D !LL?C24.!D& <@==#%&("<8FN$4b 	))<<ZHLLNSSU++>>|LPPRWWYA  33N4I4I4KLSSUZZ\ ()   334G4N4N4PQXXZ__a &'   334H4O4O4QRYY[``b '( '5GN#a  334D4K4K4MNUUW\\^ *+   334I4P4P4RSZZ\aac ()   334J4Q4Q4ST[[]bbd )* )9G$%~~  D&&11DW}O u@ prD DD DsB   AT%T+$T+T0T0U7&T5.U5T?	:UUinputsc                    | j                   r)t        | j                  j                  j                        n	t               }|5  | j                  ||      \  }}d d d        j                  | j                  j                        }| j                  j                  r| j                  d       |r|fS |S # 1 sw Y   _xY w)Ntrain
train_eval)r   r   r   rj  r~   r   r  ri  r   is_main_processstore_metrics)r  ri   r  return_outputsnum_items_in_batchcompute_loss_context_managerr  r  s           rA   compute_losszKTOTrainer.compute_loss  s     7;6X6XHT%%,,112^i^k 	% * 	G 77vFMD'	G wwtyy''(++w7;'?"	G 	Gs   B66B?r  r  )r  evalc                 v    |j                         D ]&  \  }}| j                  |   |   j                  |       ( y r   )r  r   r(  )r  r  r  keyvalues        rA   r  zKTOTrainer.store_metrics  s;    !--/ 	@JC  ,S188?	@rC   datasetc                 P    || j                   }|t        |      sy t        |      S r   )r   r"   r   )r  r  s     rA   _get_train_samplerzKTOTrainer._get_train_sampler  s,    ?((G?*W"5 ))rC   c           	         | j                   r)t        | j                  j                  j                        n	t               }|5  |j                  |d   |d   | j                  d| j                  j                        }d|v r|d   }n| j                  ^| j                         5  | j                  j                  |d   |d   | j                  d| j                  j                        }ddd       nD| j                  j                  |d   |d   | j                  d| j                  j                        }ddd       t        | j                  | j                  j                        }| j                  j                  |d      }t        | j                  | j                  j                        }| j                  j                  |d      }||fS # 1 sw Y   xY w# 1 sw Y   xY w)zRGenerate samples from the model and reference model for the given batch of inputs.rL   rM   T)rH   rI   rn   	do_sampler   reference_outputN)skip_special_tokens)r   r   r   rj  r~   r   generatern   r   r   r   r  ri   r2   batch_decode)r  ri   r:   generate_context_managerpolicy_outputr  policy_output_decodedreference_output_decodeds           rA   generate_from_model_and_refz&KTOTrainer.generate_from_model_and_ref  s    7;6X6XHT%%,,112^i^k 	! & 	!NN 23$%<=??!22?? + M "U*#();#< >>)..0 +/::+>+>&+,>&?+01H+I'+&*)-)>)>)K)K ,? ,(  (,~~'>'>"'(:";',-D'E#'??"&%)%:%:%G%G (? ($/	> &mT__dF[F[FhFhi $ 5 5 B B=fj B k()94??DLaLaLnLno#'#8#8#E#EFVlp#E#q $&>>>/ 	 	s'   A!G#AG(AGG	GG&prediction_loss_onlyignore_keysc                 t   |&t        |d      rt        |j                  dg       }ng }| j                  r)t	        | j
                  j                  j                        n	t               }t        j                         5  |5  | j                  ||      \  }}d d d        d d d        | j
                  j                  r| j                  d       |rj                         d d fS i }dv r|d   |d<   d|v r|d   |d<   |j                         D 	
cg c]  \  }	}
|	|vs|
 }}	}
t        j                   || j
                  j                  	      }t        j"                  |j$                  d
   | j
                  j                  	      }j                         ||fS # 1 sw Y   xY w# 1 sw Y   xY wc c}
}	w )Nr   keys_to_ignore_at_inferencer  r  r  zeval_logits/chosenr  zeval_logits/rejected)rj  r   )r   r   r   r   r   r   rj  r~   r   r   rB  r  r  r  rf  r  r   rh  rJ  )r  ri   r  r  r  prediction_context_managerr  r  logits_dictr   r  rC  rz   s                rA   prediction_stepzKTOTrainer.prediction_step  s    uh'%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_ 	G8 	G 77vFMD'	G 	G ++w6:KKM4.. ')078K0LK,- G+29:O2PK./ + 1 1 3L1q7K!LLfT-=-=-D-DEV\\!_T5E5E5L5LMvv..)	G 	G 	G 	G  Ms0   <F'?FF'F4F4F$	F''F1
dataloaderdescriptionmetric_key_prefixc                    | j                   rt        |j                        }t        j                  t        |      | j                  j                        }|j                  j                  |      }| j                  |      }	| j                  |	      }	t        t        |	d               D 
cg c]  }
|	d   |
   du s|
 }}
|	d   |   |	d   |    t        | |	d         d}| j                  | j                  |      \  }}t        j                  g dt!        |d   ||      D cg c]#  \  }}}||t        |      d	 |t        |      d	 g% c}}}
      }d| j                  j"                  v r+t%        j&                  dt%        j(                  |      i       d| j                  j"                  v rt+        d|       t,        | ]  |||||      }|S c c}
w c c}}}w )z
        Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
        `Trainer.evaluate()` and `Trainer.predict()`.

        Works both with or without labels.
        )r   rl   FrL   rM   rF   )rL   rM   rF   )PromptPolicyz	Ref ModelN)columnsdatawandbgame_log)r  comet_mlzgame_log.csv)r   table)r   rO   r  randomsamplerQ  r   eval_batch_sizeselectr   _prepare_inputsr   r  ri   pd	DataFramerN   	report_tor  logTabler1   r   evaluation_loop)r  r  r  r  r  r  num_samplesrandom_indicesrandom_batch_datasetrandom_batchrU  target_indiciestarget_batchr  ref_output_decodedrF   polrefr  initial_outputr  s                       rA   r  zKTOTrainer.evaluation_loop'  s     $$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L*/L4I0J*KqQ|\cOdefOgkpOpqqOq$01C$D_$U)56M)N)_6*o6|H7MNL
 9=8X8XY]YcYceq8r5!#5LL9 -0X0FH]_q,r (S SV/S[]1CDE $))---		:u{{'>?@TYY000-' 0%9;HY
 ; rs   .G>G*(Glogs
start_timec           	      ,   d|v rdnd}|dk(  rdnd}dD ]  }d| | j                   |   v st        j                  | j                   |   d|          j                         j	                         }dD ]l  }t        j                  | j                   |   | d	| d
         j                         j	                         |z  || | d	| <   | j                   |   | d	| d
= n | j                   |   d| =  | d|v r| d|v r|| d   || d   z
  || d<   | j                   |   j                         D ]=  \  }}	t        j                  |	      j                         j	                         || | <   ? | j                   |= t        
| !  ||      S )a1  
        Log `logs` on the various objects watching training, including stored metrics.

        Args:
            logs (`dict[str, float]`):
                The values to log.
            start_time (`float` or `None`, *optional*, defaults to `None`):
                Start time of the training.
        r  r  r  eval_r   )chosenrejectedzcount/)rewardslogpsrC  /_sumzrewards/chosenzrewards/rejectedzrewards/margins)	r   r   rm  r   r  r  re  r   r  )r  r  r  r  rk   split	count_summetricr  r  r  s             rA   r  zKTOTrainer.log`  s    !'$WF
&&0b+ 
	GEw4#7#7
#CC!LL)=)=j)IFSXRYJZ)[\``bggi	< RFT%9%9*%EPQRWQXX\F]%^_ccejjl#$ F8F81UG45
 ,,Z8F81UG49PQR ((4veW5EF
	G X^$,F8;K1LPT1T/3vhn4M/NQUY_X``pVqQr/rDF8?+, 00<BBD 	ILC%*\\'%:%?%?%A%F%F%HDF8C5!"	I  ,w{4,,rC   c                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )Nr  r>   )
model_name)	r   hub_model_idr   
output_dirr   r  create_model_cardr   _save_checkpoint)r  ri   trialr  r  s       rA   r  zKTOTrainer._save_checkpoint  sl    99!!)dii22388J//55c:2>J*5 .rC   r  dataset_nametagsc                    | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        j                  d      }t!        ||| j"                  ||t%               r.t&        j(                  t&        j(                  j+                         ndt-               d|dd	      }|j/                  t        j
                  j1                  | j2                  j4                  d
             y)a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        N_name_or_pathunsloth_versionunslothaJ          @article{ethayarajh2024kto,
            title        = {{KTO: Model Alignment as Prospect Theoretic Optimization}},
            author       = {Kawin Ethayarajh and Winnie Xu and Niklas Muennighoff and Dan Jurafsky and Douwe Kiela},
            year         = 2024,
            eprint       = {arXiv:2402.01306},
        }KTOz7KTO: Model Alignment as Prospect Theoretic Optimizationz
2402.01306)r  r  r  r  r	  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   ri   r   ospathisdirr  setr|   r}   addupdater   textwrapdedentr/   r  r    r  runget_urlr0   savejoinr   r  )r  r  r  r	  r  citation
model_cards          rA   r  zKTOTrainer.create_model_card  sG   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$?? $  )!!**%-?-AeiiF[eii'')ae.0%Q!

 	TYY%9%9;GHrC   )NNNNNNNNN)NNNNNNNr   )FiF)FN)r  )NNr  )NNN)@__name__
__module____qualname____doc__r   r   r   nnModuler}   r   r,   r   rV   r   r   r   r   r   r
   r   r   tupler   optim	Optimizerlr_schedulerLambdaLRrm  r!   r   r   r  r   r.  r7  r&  staticmethodFloatTensor
LongTensorr  intrD  r\  rv  rP  r  r  r	   r  r,  r   r  utilsr  Samplerr  r  r  r  r  r  r  __classcell__)r  s   @rA   r   r     s   +Z J 9=FJ+/EI 04>B59Vbhl&*FJ,0*.%c_bii45c E/299c"ABCc 	c
  (c uWd3<.@%@ABc #)+=?UWeef
c  -c Xb/&9:;c D12c %++//1I1I1R1RRSc (0%,,9UW\WcWc9c0d'ec d^c  "(N+;T+A"BC!c" %SM#c$ #3-%cJ M M*.j *.X5F0A 5FZ 5FnM* M* M*^  "'"&#(+9!!+9  +9 +9  	+9
 !+9 
		+9 +9Z1\YY1\'+CtU=M=M7M1N,N'O1\	u  %"3"3U5F5FHYHYY	Z1\fK<"..K<  %00K< **	K<
 !& 1 1K< #("3"3K< "--K< 
u  %"3"3U5F5FHYHYY	ZK<Z:HTn CtU%5%55667nh _bii/0 S%c 1223 
u||U5<<c5<<6G1H#HII	J0@T#u*%5 @7?C[ @jn @*(7*; *xPUP[P[P`P`PhPhGi *.?S%BRBR=R8S .?X]^acf^fXg .?j ,0$/_bii/0$/ S%c 1223$/ #	$/
 d3i($/T 04+/!'77 7 'tn	7
 d3i(7 7 
7r!-S%Z( !-huo !-QU !-H/ %)&*,0	<ISM<I sm<I CcD()	<IrC   r   r   )`r   r  r  r  r   collectionsr   
contextlibr   r   operatorr   pathlibr   typingr   r	   r
   r   r   r   r-  rP   pandasr  r   torch.nnr(  torch.nn.functional
functionalrk  
accelerater   accelerate.utilsr   datasetsr   r   r   torch.utils.datar   r   transformersr   r   r   r   r   r   r   r   r   r   r   r    transformers.trainer_utilsr!   r"   transformers.utilsr#   
data_utilsr%   r&   r'   import_utilsr(   modelsr)   r*   
kto_configr,   r3  r-   r.   r/   r0   r1   r2   r3   r4   liger_kernel.chunked_lossr5   peftr6   r7   r8   r  r9   RUNNING_NAMErV   r}   r   rB   rg   r   r   r@   rC   rA   <module>rM     sQ    	    # 2   I I       # ! 2  :    B 0 i i 4 > !	 	 	 AOO A4T#Y/ Dd3i4H 7T#Y7$7 
#tCy.7twT#s(^ w4E w[_ wttI tIrC   