
    biZ                       d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmc mZ d dlm Z  d dl!m"Z" d d	l#m$Z$ d d
l%m&Z& d dlm'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9 d dl:m;Z;m<Z< d dl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZDmEZE ddlFmGZG ddlHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ  e>       r
d dlRmSZSmTZTmUZU  e7       rd dlVZV e6       rd dlWmXZX  eB       rd dlYZYerd dl+m0Z0mZZZ  e"e[      Z\dZ]dZ^	 d)de_e`eae   f   ddd ed   d!e_e`eae   f   fd"Zbd)d#e_e`ef   d$d%d!e_fd&Zc G d' d(e3      Zdy)*    N)defaultdict)contextmanagernullcontext)
itemgetter)Path)TYPE_CHECKINGAnyCallableLiteralOptionalUnion)PartialState)
get_logger)tqdm)Dataset)autocast)
DataLoaderSequentialSampler)AutoModelForCausalLMBaseImageProcessorDataCollatorFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainingArgumentsis_comet_availableis_sklearn_availableis_wandb_available)TrainerCallback)EvalLoopOutput
has_length)is_peft_available   )maybe_apply_chat_template)is_joblib_available)create_reference_modelprepare_deepspeed   )	BCOConfig)	DPODataCollatorWithPaddingRunningMomentsdisable_dropout_in_modelgenerate_model_cardget_comet_experiment_urllog_table_to_comet_experimentpad_to_lengthpeft_module_casting_to_bf16selective_log_softmax)	PeftModelget_peft_modelprepare_model_for_kbit_training)LogisticRegression)r   PreTrainedTokenizerzrunning.jsonzclf.pklbatch	tokenizerr9   embedding_tokenizerreturnc                     || d   d      }|d   }|d   }t        | d   | d         D cg c]
  \  }}||z    }}} ||d      }	|	d   }
|	d   }t        |
|      D cg c]  \  }}|t        |      d  }}}t        ||      D cg c]  \  }}|t        |      d  }}}t        ||      D cg c]  \  }}t        j                  ||g       }}}|
D cg c]  }t        j                  |       }
}t        |
|      D ]'  \  }}t        |      t        |      k7  st        d       |D cg c]  }t        |       }}t        t        ||
|            D ]0  \  }\  }}}t        j                  ||d|       r$||xx   d	z  cc<   2 t        |
|      D cg c]
  \  }}|d|  }}}t        ||      D cg c]
  \  }}|d|  }}}t        ||      D ]'  \  }}t        |      t        |      k7  st        d
       t        |
|      D cg c]
  \  }}||d  }}}t        ||      D cg c]
  \  }}||d  }}}t        ||||      }|( || d   dd      }|j                  |d   |d   d       |S c c}}w c c}}w c c}}w c c}}w c c}w c c}w c c}}w c c}}w c c}}w c c}}w )z-Tokenize a batch from a BCO specific dataset.promptF)add_special_tokens	input_idsattention_mask
completionNz`The elements in 'full_input_ids' and 'full_concat_input_ids' must have the same pairwise length.r*   z@Prompt input ids and attention mask should have the same length.prompt_input_idsprompt_attention_maskanswer_input_idsanswer_attention_maskT)
truncationr@   )embedding_input_idsembedding_attention_mask)
ziplennpconcatenatearray
ValueError	enumeratearray_equaldictupdate)r:   r;   r<   prompt_tokenizedrE   rF   r?   rC   prompt_and_completionfull_tokenizedfull_input_idsfull_attention_maskfprG   rH   afull_concat_input_idsfullconcatresponse_token_ids_start_idxidxrmoutputembedding_tokenizeds                             R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/bco_trainer.py	_tokenizerh   ]   s+    !xUK'4,-=>KNuU]`efr`sKtu5GVZVj0uu4ON#K0N()9:03NDT0UV1#a&(VV589LNc5deTQQs1vx[ee ADDTVf@gh1R^^QF3hh+9:abhhqk:N:N,AB ft9F#r  5E#EqCF#E #E $C(8.Jf$gh 3YaA~~a2A'(-2-3 +.n>Z*[\$!Q"1\\/23FHd/eftq!QrUff$&;< a1q6SV_``a +.n>Z*[\$!Q!"\\/23FHd/eftq!QqrUff)3)3	F &1%/dglm':;'G,?@P,Q	
 Ms v
 We i: $F ]f ]fs;   J"JJ6!JJ 6J%"J*J0J6;J<examplemodelr   c                    | d   }| d   }|d    d||d    d||d    d| d   i}|d   st        |t              st        dt        |             t        |t              st        dt        |             | d   | d	   | d
   | d   d}|d   }|d   j                  }|d   j
                  }	||d   d   k7  r|dz  }|	|d
   d   k7  r|dz  }t        |d         t        |d
         z   |kD  rFdD ]A  }
|d   dk(  r||
   d|d    ||
<   |d   dk(  r||
   |d    d ||
<   2t        d|d           t        |d         t        |d
         z   |kD  rdD ]  }
||
   d||d   z
   ||
<    |d   ||d    d<   |d	   ||d    d	<   |d   |d
   z   ||d    d<   |d	   |d   z   ||d    d<   ||t        |d         dk(  s||d   d   k7  r`|g||d    d   z   ||d    d<   dg||d    d	   z   ||d    d	<   |g||d    d   z   ||d    d<   dg||d    d   z   ||d    d<   t        |d
         dk(  s|	|d
   d   k7  r0||d    d   |	gz   ||d    d<   ||d    d   dgz   ||d    d<   ||d    d   dd ||d    d<   |d   gt        ||d    d         z  ||d    d   dt        ||d    d          |S  |d   |d|d   d       } |d   |d|d   d       }|d!   ||d    d<   |d"   ||d    d	<   |d!   ||d    d<   |d"   ||d    d<   |=t        |d#      r1|j                  t        j                  |d         $      ||d    d%<   |S )&a  Process tokens of a BCO specific dataset.

    At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation in case the prompt +
    completion responses is/are too long. First we truncate the prompt; if we're still too long, we truncate the
    completion.

    We also create the labels for the completion responses, which are of length equal to the sum of the length of the
    prompt and the completion response, with label_pad_token_id for the prompt tokens.
    r?   rC   prefixlabelis_encoder_decoderz prompt should be an str but got z$completion should be an str but got rE   rF   rG   rH   rD   
max_lengthr;   r   r*   )rE   rF   truncation_mode
keep_startNmax_prompt_lengthkeep_endzUnknown truncation mode: )rG   rH   completion_input_idscompletion_attention_maskcompletion_labelslabel_pad_token_idTmax_completion_length)rI   ro   r@   rA   rB   %prepare_decoder_input_ids_from_labels)labelscompletion_decoder_input_ids)
isinstancestrrQ   typebos_token_ideos_token_idrM   hasattrrz   torchtensor)ri   rj   kwargsr?   rC   r:   
all_tokensro   r   r   kcompletion_tokensprompt_tokenss                rg   _process_tokensr      sG    XF&J (
F#V(
J'(
E"GG$4E &' &#&?V~NOO*c*CDDTCUVWW !((: ;%,-D%E '(: ;%,-D%E	

 L)
k*77k*77:&89!<<!OJ:&89"==!OJ z,-.Z@R5S1TTWaaB ^+,<$.qM2OF;N4O$PJqM-.*<$.qM6:M3N2N2P$QJqM$'@HYAZ@[%\]]^ z,-.Z@R5S1TTWaaB Z *1.X
VDW=X0X Y
1Z 8BBT7U!""234<FG^<_!""789)*Z8J-KK 	!""678 ./*=T2UU 	!"";<=
 #:012a7<:VhKijkKl;l@L~PUh'((89Q @)**:;< FGC%h'((=>K E)**?@A EQ>TYh'((<=U D)**>?@ JKeh'((ABO I)**CDE z,-.!3|zRdGefhGi7i?DxHXGYYmEn?os @EVH%&&:;< EJ(#$$=>EEEVH%&&?@A 9>AQ@RRf>g8hij8k!""345'(n
)**:;<=n>!""3456iEVHEUDVVfBg<h8ij* L# 0F;/4F;R4Shl
 ,{+t7J0K`d
 8E[7Q!""234<IJZ<[!""7898I+8V!""345@QRb@c!"";<=0W!XGLGrGr||E*=$>? Hs HEVH%&&BCD L    c            $           e Zd ZdZddgZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dRdeeej                  e	f   de
eeej                  e	f      dede
e   d	e
eeee	ef   f      d
e
eeeeef      de
e   de
eg ef      de
ee      deej0                  j2                  ej0                  j4                  j6                  f   de
eej8                  ej8                  gej8                  f      de
e   de
eegef      de
e	   de
e	   de
e   de
e   f" fdZed        Z dejB                  dejB                  fdZ"dejF                  dejF                  dejB                  fdZ$dee	eeejF                  f   f   deejB                  ejB                  f   fdZ%dSd ed!e&dejB                  fd"Z' fd#Z( fd$Z)e*d%        Z+de,f fd&Z-dTd	e
e   de,f fd'Z.d(edefd)Z/e0	 	 	 dUd*ejB                  d+ejF                  d,e1d-e&d.e1dejB                  fd/       Z2dej                  dee	eeejF                  f   f   deejB                  ejB                  ejB                  ejB                  f   fd0Z3d1ejB                  dejB                  fd2Z4	 dVd3ejB                  d4ejB                  d5ejB                  d6ejB                  d7e
ejB                     d1e
ejB                     d8e1deejB                  ejB                  ejB                  ejB                  f   fd9Z5	 dVdee	eeejF                  f   f   d8e1fd:Z6	 	 dWdeeej                  f   d;ee	eej8                  e7f   f   deej8                  eej8                  ee	ej8                  f   f   f   fd<Z8dXd=ee	e9f   d>e:d?   ddfd@Z;dTd e
e   de
ejx                  jz                  j|                     fdAZ?dee	ejF                  f   dee	e	f   fdBZ@	 dTdeeej                  f   d;ee	eej8                  e7f   f   dCe1dDe
ee	      fdEZA	 	 	 dYdFe,dGe	dCe
e1   dDe
ee	      dHe	def fdIZBdTdJee	e9f   dKe
e9   ddf fdLZC fdMZD	 	 	 dZdNe
e	   dOe
e	   dPee	ee	   df   fdQZE xZFS )[
BCOTrainera  
    Initialize BCOTrainer from [BCO](https://huggingface.co/papers/2404.04656) paper.

    Args:
        model (`transformers.PreTrainedModel`):
            The model to train, preferably an `AutoModelForSequenceClassification`.
        ref_model (`PreTrainedModelWrapper`):
            Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
            and loss. If no reference model is provided, the trainer will create a reference model with the same
            architecture as the model to be optimized.
        args (`BCOConfig`):
            The arguments to use for training.
        train_dataset (`datasets.Dataset`):
            The dataset to use for training.
        eval_dataset (`datasets.Dataset`):
            The dataset to use for evaluation.
        processing_class (`PreTrainedTokenizerBase` or `BaseImageProcessor` or `FeatureExtractionMixin` or `ProcessorMixin`, *optional*):
            Processing class used to process the data. If provided, will be used to automatically process the inputs
            for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
            reuse the fine-tuned model.
        data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
            The data collator to use for training. If None is specified, the default data collator
            (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
            sequences in the batch, given a dataset of paired sequences.
        model_init (`Callable[[], transformers.PreTrainedModel]`):
            The model initializer to use for training. If None is specified, the default model initializer will be
            used.
        callbacks (`list[transformers.TrainerCallback]`):
            The callbacks to use for training.
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
            The optimizer and scheduler to use for training.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
            The function to use to preprocess the logits before computing the metrics.
        peft_config (`dict`, defaults to `None`):
            The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
            a PEFT model.
        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
            The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
            metric values.
        model_adapter_name (`str`, defaults to `None`):
            Name of the train target PEFT adapter, when using LoRA with multiple adapters.
        ref_adapter_name (`str`, defaults to `None`):
            Name of the reference PEFT adapter, when using LoRA with multiple adapters.
    trlbcoNrj   	ref_modelargstrain_dataseteval_datasetprocessing_classdata_collator
model_init	callbacks
optimizerspreprocess_logits_for_metricspeft_configcompute_metricsmodel_adapter_nameref_adapter_nameembedding_funcr<   c                    |t               r
t               st        d      t        |      t        u rt        d      t        |t              s|||u rt        d      |j                  i }nt        |t              st        d      |j                  }|j                  d      }|Xt        |t              r|dk7  rt        t        |      }|dk7  r)t        |t        j                        st        d| d      ||d<   |j                  i }nt        |t              st        d	      |j                  }|j                  d      }|Xt        |t              r|dk7  rt        t        |      }|dk7  r)t        |t        j                        st        d| d      ||d<   t        |t              rt        j                  |fi |}t        |t              rt        j                  |fi |}d
| _        t#               s|t        d      t#               r(|%t        |t$              r|j'                         }t        |dd
      st        |dd
      rht)        |d      xr. dt+        t-        j.                  t0              j2                        v }d|j4                  i}|r|j6                  |d<   t1        |fi |}nK|j4                  r?t)        |d      r|j9                          n"d }|j;                         j=                  |       t?        ||      }|j@                  rkt        |dd
      r^tC        |       d| _        nK|j4                  r?t)        |d      r|j9                          n"d }|j;                         j=                  |       |jD                  rtG               stI               st        d      ||jJ                  jL                  | _&        n(|jL                  t        d      |jL                  | _&        t#               xr t        |t$              | _'        || _(        || _)        |r|| _*        n0| jN                  s|jV                  rd | _*        ntY        |      | _*        |t        d      |jZ                  t]        j^                  dt`               d}|jZ                  |jZ                  }|jb                  t]        j^                  dt`               d}|jb                  |jb                  }d }|jd                  (| jL                  rt]        j^                  dt`               d}|jd                  | jL                  r|jd                  }|atg        |jh                  |jj                  | jL                        }|jl                  r!d
|_6        t]        j^                  dt`               d| _7        nd
| _7        |jp                  r,ts        |       | jT                  ts        | jT                         | _-        |jD                  | _"        |jj                  | _5        |jt                  |jt                  n|jh                  | _:        | _1        |jv                  | _;        || _2        |jV                  | _+        d
| _<        d
| _=        t}        d       | _?        |j                  | _@        t        |jJ                  dd
      | _A        t        |jJ                  d d!      | _B        | j                  r)| j                  d!k(  rt]        j^                  d"t`               || _C        || _D        d|j                  d#<   t               j                         5  |j                  t        d$|i|j                  %      }|$|j                  t        d$|i|j                  %      }|j                  t        d|| j                  d&|j                  d'(      }d)| jL                  || jZ                  | jv                  | jj                  | jb                  | jd                  d*}|j                  t        ||j                  d+,      }||j                  t        || j                  d&d|j                  d-.      }d)| jL                  || jZ                  | jv                  | jj                  | jb                  | jd                  d*}|j                  t        ||j                  d/,      }|j                  d0 |j                  d12      }|j                  d3 |j                  d42      }d d d        t        $| =  |||||||||	|
|5       d
| _P        t)        | j                  d6      r%| j                  j                  | j                         t)        | d7      st        d8      | j                  rD| j                  j                  j                  j                  d9k(  r| jV                  rt        d:      | jT                  #| jN                  su| jV                  sit        d;      | j                  r&t        | jT                  | j                        | _*        n,| j                  j                  | jT                  d<      | _*        t        | j                  =      | _]        | j                  |j                  ry | j                  | j                  j                  >      }| j                  | j                  j                  >      }t        j                  ||fd?@      } t        j                  t        j                  |d d d?f         t        j                  |d d d?f         fd?@      }!t        dAB      j                  | j                         j                         j                         |!j                         j                               | _j        | j                  j                  |j                         j                         j                         t        j                  |d d d?f         j                         j                               }"| j                  j                  |j                         j                         j                         t        j                  |d d d?f         j                         j                               }#t        j                  dC|" dD|#        y # 1 sw Y   xY w)ENz}BCOTrainer with UDM requires the scikit-learn and joblib libraries. Please install it with `pip install scikit-learn joblib`.z3Please use `BCOConfig` instead `TrainingArguments`.z`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must mass a copy of it, or `None` if you use peft.zRYou passed model_kwargs to the BCOTrainer. But your model is already instantiated.torch_dtypeautoznInvalid `torch_dtype` passed to the BCOConfig. Expected a string with either `torch.dtype` or 'auto', but got .zZYou passed ref_model_kwargs to the BCOTrainer. But your ref_model is already instantiated.FzPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT modelsis_loaded_in_8bitis_loaded_in_4bitgradient_checkpointing_kwargsuse_gradient_checkpointingenable_input_require_gradsc                 &    |j                  d       y NTrequires_grad_moduleinputre   s      rg   make_inputs_require_gradz5BCOTrainer.__init__.<locals>.make_inputs_require_grad  s    --d3r   Tc                 &    |j                  d       y r   r   r   s      rg   r   z5BCOTrainer.__init__.<locals>.make_inputs_require_grad  s    ))$/r   z`generate_during_eval=True` requires Weights and Biases or Comet to be installed. Please install `wandb` or `comet-ml` to resolve.zMWhen no model is provided, you need to pass the parameter is_encoder_decoder.zdmax_length or a processing_class must be specified when using the default DPODataCollatorWithPaddingzWhen using DPODataCollatorWithPadding, you should set `max_length` in the `BCOConfig`. It will be set to `512` by default, but you should do it yourself in the future.   zWhen using DPODataCollatorWithPadding, you should set `max_prompt_length` in the `BCOConfig`. It will be set to `128` by default, but you should do it yourself in the future.   zWhen using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the BCOTrainer's init it will be set to `128` by default, but you should do it yourself in the future.)pad_token_idrx   rn   zWhen using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your BCOConfig we have set it for you, but you should do it yourself in the future.c                       t        t              S N)r   list r   rg   <lambda>z%BCOTrainer.__init__.<locals>.<lambda>9  s    ;t3D r   output_router_logitsrouter_aux_loss_coefg        a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.estimate_tokensr;   )	fn_kwargsnum_proc)r;   r<   zTokenizing train dataset)batchedr   r   desc )rl   rn   r;   ro   rq   rx   rs   ry   z"Processing tokenized train dataset)r   r   r   zTokenizing eval dataset)r   r   r   r   z!Processing tokenized eval datasetc                     | d   S Nrm   r   xs    rg   r   z%BCOTrainer.__init__.<locals>.<lambda>  s
    !G* r   zFiltering desirable examples)r   r   c                     | d    S r   r   r   s    rg   r   z%BCOTrainer.__init__.<locals>.<lambda>  s    aj. r   zFiltering undesirable examples)rj   r   r   r   r   r   r   r   r   r   r   add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.   zrYou cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`.z]No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`)evaluation_mode)r   )sample_sizer   dimbalanced)class_weightz(UDM classifier training scores: chosen: z, rejected: )nr   r'   ImportErrorr   r   rQ   r}   r~   model_init_kwargsgetgetattrr   dtyperef_model_init_kwargsr   from_pretrained_peft_has_been_casted_to_bf16r$   r5   merge_and_unloadr   r   inspect	signaturer7   
parametersgradient_checkpointingr   r   get_input_embeddingsregister_forward_hookr6   bf16r3   generate_during_evalr    r   configrn   is_peft_modelr   r   r   precompute_ref_log_probsr(   ro   warningswarnUserWarningrs   ry   r,   r   rx   remove_unused_columnsuse_dpo_data_collatordisable_dropoutr.   padding_valuerq    _precomputed_train_ref_log_probs_precomputed_eval_ref_log_probsr   _stored_metricsbetaaux_loss_enabledaux_loss_coefr   r<   warnings_issuedr   main_process_firstmapr&   dataset_num_procrh   r   filtersuper__init__model_accepts_loss_kwargsrj   r   
_tag_namesAttributeErroris_deepspeed_enabledr   statedeepspeed_plugin
zero_stager)   prepare_modelr-   runningresume_from_checkpoint_get_sample_prompt_embeddingsr   prompt_sample_sizecat	ones_like
zeros_liker8   fitcpufloatnumpyclfscoreloggerinfo)%selfrj   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r<   r   r   r   _support_gc_kwargsprepare_model_kwargsr   ro   rs   ry   r   	desirableundesirablechosen_embeddingsrejected_embeddings
embeddingsr{   chosen_meanrejected_mean	__class__s%                                       rg   r   zBCOTrainer.__init__L  s@   , %/C/EJ]J_ P  :**RSS%%%*;	U@RZ 
 !!) "E3'qrr $ 6 6+//>K&k3/K64I")%"=K&(K1U$ I  JU  IV  VW  X  4?!-0%%-$&!Is+l  %)$>$>!/33MBK&k3/K64I")%"=K&(K1U$ I  JU  IV  VW  X  8C%m4eS!(88TBSTEi%,<<Y`J_`I .3* "{'> a   [%<%+..0u159WUL_af=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV,,5">?4464 ..0FFG_` #5+6EyyWU,?G+E2592
 ((u:;0020 **,BBC[\$$.@.BFXFZD 
 &+ll&E&ED#$$,lmm&*&=&=D#.0QZy5Q"4 0&DN4#@#@!DN3E:DN#v  ??"MMc
 J??&J!!)MMc
 !$!!- $ 6 6 $%%-$2I2IMMd
 %(!%%1d6M6M$($>$>! 6-::#'#:#:#'#:#:M ))-2*\ *.D&).D& $U+~~)(8$$($=$=!"&"9"9373E3E3QT//WgWtWt!2#33%:"(,(E(E% 16-/4,  ++DE II	 '6Le T$U\\3I3O  T%7%73%>MM  -#6  48/0^..0 H	)--)kCS5T_c_t_t . M '+//-*,<=!22  0   *--(8QUQiQij../ . M &*&=&=-"oo#'#7#7&*&=&=%)%;%;)-)C)C	I *--#..9	 . M '+//,<UYUmUmn !222  0   !*.*A*A!1"&//'+';';*.*A*A)-)?)?-1-G-G		  ,//#'!22<	  0   &,,$t/D/DKi - I (..(43H3HOo / KMH	T 	''%-!+!*G 	 	
" */& 4::/0JJ%%doo6t]+ j 
 $$%%66AAQF4KhKh  I  >>!&&$*G*G s  ((!24>>4CSCS!T!%!1!1!?!?`d!?!e%$2B2BC&$*E*E >>yVZV_V_VrVr>s"@@Z^ZcZcZvZv@wYY 13FGQO
__.q!t45u7G7GH[\]_`\`Ha7bcij
 &:>BBNN""$**,fjjl.@.@.B
 hhnn!!#))+113U__EVWXZ[W[E\5]5a5a5c5i5i5k
 ##%++-335u7G7GH[\]_`\`Ha7b7f7f7h7n7n7p
 	>{m<XeWfgh]H	 H	s   Gr>>sc                 >    | j                   d uxr | j                  d uS r   )r   r<   r  s    rg   match_underlying_distributionz(BCOTrainer.match_underlying_distribution  s$    ""$.W43K3KSW3WWr   prompt_embeddingsr=   c                    |j                   }|j                  }| j                  j                  }| j                  j	                  || j
                  j                        }|j                  d   }|j                  d      | j
                  j                  k7  }| j                  j                  |      }|j                  d   dk(  rt        j                  g ||      S | j                  j                  |j                         j                         j!                               dddf   }t        j"                  |||      }| j                  j%                  |d	      }|||z  ||dz   z   }||   }|S )
z
        Calculates the probability if the given prompt embedding is from desirable dataset. This function calculates
        the probability in the process and ensemble across processes.
        )	pad_indexr   r*   r   )devicer   N)r   r  mean)	reduction)r   r  r   process_indexpad_across_processesr<   r   shaper   gatherr   r   r
  predict_probar  r  r	  	as_tensorreduce)	r  r  r   r  rankpadded_prompt_embeddingsr   nonzeroprobs	            rg   _get_chosen_probzBCOTrainer._get_chosen_prob  s^   
 "''"))--#'#3#3#H#H)A)A)N)N $I $
  /44Q7*//A/6$:R:R:_:__ ,,334LM ""1%*<<6??xx%%&7&;&;&=&C&C&E&K&K&MNqRStTt5@&&tv&>K$&q)ABG}r   rA   rB   c                    t        j                  || j                  j                  k(  | j                  j                  |      }t        j
                         5  | j                  ||      }ddd       |S # 1 sw Y   S xY w)z|
        Replaces processing_class.pad_token_id to embedding_tokenizer.pad_token_id and applies self.embedding_func
        rA   rB   N)r   wherer   r   r<   no_gradr   )r  rA   rB   r  s       rg   _vectorize_promptzBCOTrainer._vectorize_prompt  s~     KK..;;;$$11
	 ]]_ 	,,#- - J	 	 s   A55A?r:   c                 B   | j                   sy| j                  |d   |d         }t        t        |d               D cg c]  }|d   |   du s| }}t        t        |d               D cg c]  }|d   |   du s| }}||df   }||df   }||fS c c}w c c}w )	z.Extract embeddings from frozen embedding modelNNrJ   rK   r/  rm   TF.)r  r2  rangerM   )r  r:   r  i
chosen_idxrejected_idxr  r  s           rg   _get_prompt_embeddingsz!BCOTrainer._get_prompt_embeddings  s    
 11++12 !;< , 


 "'s5>':!;YAuW~a?PTX?XaY
Y#(U7^)<#=\awPQARV[A[\\&z37(s):;!#677 Z\s    BB/B?Bdatasetr   c                    t        t        |      |      }t        j                  j	                  t        |      |f      }|j                  |      }| j                  j                  | j                  | j                  j                  | j                  j                  dd}| j                  j                  t        |fi |      }t        j                         5  t        j                   d      }t#        |d      D ][  }	| j%                  |	d   |	d   	      }
| j                  j'                  |
      }
t        j(                  ||
j+                         f      }] 	 d
d
d
       |S # 1 sw Y   S xY w)zv
        Sample instances from dataset and get prompt embeddings. Used for density ratio classifier training.
        )sizeF
batch_size
collate_fnnum_workers
pin_memoryshuffler   z!Building sample prompt embeddingsiterabler   rJ   rK   r/  N)minrM   rN   randomchoiceselectr   per_device_train_batch_sizer   dataloader_num_workersdataloader_pin_memoryr   preparer   r   r1  emptyr   r2  gather_for_metricsr  r  )r  r:  r   	n_samplesrand_indicesembedding_datasetdataloader_paramsdata_loaderall_embeddingspadded_batchr  s              rg   r  z(BCOTrainer._get_sample_prompt_embeddings-  sN    Gk2	yy''GI<'H#NN<8 ))??,,99;;))99
 &&..z:K/aO`/ab]]_ 	O"[[^N $k@c d O!33*+@A#/0J#K 4 
 "--@@L
!&NJNN<L+M!NO	O 	O s   BE%%E/c                    ||n| j                   j                  }t        |   |       | j                  j
                  r| j                  j                  t        j                  j                  |t                     | j                  rEt        j                  | j                  t        j                  j                  |t               d       y y y )NT)compress)r   
output_dirr   _save_optimizer_and_schedulerr   is_main_processr   save_to_jsonospathjoinRUNNING_NAMEr  joblibdumpr
  CLF_NAME)r  rX  r  s     rg   rY  z(BCOTrainer._save_optimizer_and_schedulerM  s    #-#9Ztyy?S?S
-j9++LL%%bggll:|&LM11DHHbggll:x&HSWX 2	 ,r   c                     |t         j                  d|        y t        |   |       t        j
                  j                  |t              }t        j
                  j                  |      r%t        j                  | j                  |      | _        | j                  r_t        j
                  j                  |t              }t        j
                  j                  |      rt        j                   |      | _        y y y )NzMissing Checkpoint )r  warning_oncer   _load_optimizer_and_schedulerr\  r]  r^  r_  isfiler-   load_from_jsonr   r   r  rb  r`  loadr
  )r  
checkpointrunning_fileclf_filer  s       rg   re  z(BCOTrainer._load_optimizer_and_schedulerX  s    "5j\ BC-j9 ww||J=77>>,')889I9I<XDL--ww||J9Hww~~h'!;;x0 ( .r   c              #     K   | j                   r?| j                  s3| j                  j                  | j                        j                         n	t               5  | j                  r%| j                  j                  | j                         d | j                  r)| j                  j                  | j                  xs d       ddd       y# 1 sw Y   yxY ww)zWContext manager for handling null reference model (that is, peft adapter manipulation).Ndefault)	r   r   r   unwrap_modelrj   disable_adapterr   set_adapterr   r  s    rg   null_ref_contextzBCOTrainer.null_ref_contexti  s     
 !!$*?*? ))$**5EEG		M
 $$

&&t'<'<=$$

&&t'>'>'K)L		M 		M 		Ms   ACA+C	CCCc                    | j                   rA| j                  s4| j                  j                  | j                  | j                  j
                  | j                  j                  dd}| j                  j                  t        | j                  fi |      }g }t        |d      D ]M  }| j                  |      }| j                  j                  |      }|j                  |j                                O | j                  j!                  dt#        j$                  |      j'                         j)                               | _
        d| _        t*        | Y         S )z
        Returns the training [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
        Fr=  z!Train dataset reference log probsrC  reference_logpsnamecolumnT)r   r   r   rI  r   rJ  rK  r   rL  r   r   r   compute_reference_log_probsrN  appendr  
add_columnr   r  r  r	  r   get_train_dataloader)r  rR  rS  reference_completion_logpsrU  reference_completion_logpr  s         rg   rz  zBCOTrainer.get_train_dataloaderw  s3    ((1V1V"iiCC"00#yy??"ii== ! **22:d>P>P3fTe3fgK)+& $k@c d S,0,L,L\,Z),0,<,<,O,OPi,j)*112K2O2O2QR	S "&!3!3!>!>&uyy9S/T/Z/Z/\/b/b/d "? "D 59D1w+--r   c                    || j                   t        d      ||n| j                   }| j                  r;| j                  s.| j                  j
                  | j                  | j                  j                  | j                  j                  dd}| j                  j                  t        |fi |      }g }t        |d      D ]M  }| j                  |      }| j                  j                  |      }|j                  |j!                                O |j#                  dt%        j&                  |      j)                         j+                               }| j                   || _         d| _        t,        | ]  |	      S )
a  
        Returns the evaluation [`~torch.utils.data.DataLoader`].

        Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.

        Args:
            eval_dataset (`torch.utils.data.Dataset`, *optional*):
                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
                by the `model.forward()` method are automatically removed. It must implement `__len__`.
        z-Trainer: evaluation requires an eval_dataset.Fr=  z Eval dataset reference log probsrC  rs  rt  T)r   )r   rQ   r   r   r   per_device_eval_batch_sizer   rJ  rK  r   rL  r   r   rw  rN  rx  r  ry  r   r  r  r	  r   get_eval_dataloader)r  r   rR  rS  r{  rU  r|  r  s          rg   r  zBCOTrainer.get_eval_dataloader  sl    D$5$5$=LMM'3'?|TEVEV((1U1U"iiBB"00#yy??"ii== ! **22:l3`N_3`aK)+& $k@b c S,0,L,L\,Z),0,<,<,O,OPi,j)*112K2O2O2QR	S (22&uyy9S/T/Z/Z/\/b/b/d 3 L
   ,$0!37D0w**EEr   rU  c           	         t        j                         5  | j                  | j                         5  | j                  r8| j                  |d   |d   |j                  d      |d         j                  }n#| j                  |d   |d   	      j                  }ddd       ng| j                  r8| j                  |d   |d   |j                  d      |d         j                  }n#| j                  |d   |d   	      j                  }ddd       | j                  |d   d
| j                  | j                        }|S # 1 sw Y   @xY w# 1 sw Y   DxY w)zfComputes log probabilities of the reference model for a single padded batch of a BCO specific dataset.NrE   rF   r|   rw   )rB   decoder_input_idsr{   ru   rv   )rB   Faverage_log_probrn   rx   )
r   r1  r   rq  rn   rj   r   logitsget_batch_logpsrx   )r  rU  completion_logitscompletion_logpss       rg   rw  z&BCOTrainer.compute_reference_log_probs  s   ]]_ 	~~%**, !..,0JJ();<+78O+P.:.>.>?].^#/0C#D	 -7 -
 !& * -1JJ()?@+78S+T -7 - !& *! !  **(,$%78'34K'L*6*:*:;Y*Z+,?@	 )7 )
 f & )-$%;<\ZuMv )7 )f &7	>  //,-"#66#66 0 
  K! !	 	s$   EA(EA0EE
	EEr  r{   r  rx   rn   c                 n   | j                   dd |j                   k7  rt        d      |s(|ddddf   j                         }| ddddddf   } n|j                         }||k7  }d|||k(  <   t        | |      }|r&||z  j	                  d      |j	                  d      z  S ||z  j	                  d      S )a  Compute the log probabilities of the given labels under the given logits.

        Args:
            logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)
            labels:
                Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are
                ignored. Shape: (batch_size, sequence_length)
            average_log_prob:
                If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
                log probabilities of the (non-masked) tokens.

        Returns:
            A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
            given logits.
        Nrp   zKLogits (batch and sequence length dim) and labels must have the same shape.r*   r   )r$  rQ   cloner4   sum)r  r{   r  rx   rn   	loss_maskper_token_logpss          rg   r  zBCOTrainer.get_batch_logps  s    . <<,jkk!AqrE]((*FAssAI&F \\^F00	 01v++,/?#i/44R89==;LLL#i/44R88r   c                    | j                   r|d   |j                  d      dni }| j                  rd|d<    ||d   fd|d   i|}|j                  }| j	                  ||d   d	| j                   | j
                  
      }|j                  d   t        |d         k7  rt        d      t        |j                  d         D cg c]  }|d   |   du s| }}t        |j                  d         D cg c]  }|d   |   d	u s| }	}||df   }
||	df   }||df   }||	df   }| j                  r|
||||j                  fS |
|||fS c c}w c c}w )Nrw   r|   )r{   r  Tr   ru   rB   rv   Fr  r   rm   zThere is a mismatch between the number of examples in this batch and the number of examples for which an output sequence was predicted..)rn   r   r   r  r  rx   r$  rM   rQ   r5  aux_loss)r  rj   r:   model_kwargsoutputsr  r  r6  r7  r8  chosen_logpsrejected_logpschosen_logitsrejected_logitss                 rg   forwardzBCOTrainer.forward  s    &&   34%*YY/M%N
  	   37L/0()
 !<=
 

 $NN//%&"#66#66 0 
 !!!$E'N(;;G 
 "''7'='=a'@!A_AU7^TUEVZ^E^a_
_#()9)?)?)B#CbauW~VWGX\aGabb'
C8),*;<)*c/:+L#,=>   .-RYRbRbcc .-QQ `bs   E	E	4EEr  c                     | j                  |      }| j                  j                  }| j                  j                  }|d|z
  dz   z  j	                  ||      }|S )Nr*   g:0yE>)rE  max)r-  r   min_density_ratiomax_density_ratioclamp)r  r  prob_desirable	min_ratio	max_ratioweights         rg   _get_udm_weightzBCOTrainer._get_udm_weightP  s[    ../BCII//	II//	 A$6$=>EE)YbEcr   policy_chosen_logpspolicy_rejected_logpsreference_chosen_logpsreference_rejected_logpsr  do_trainc                    ||z
  }| j                   |z  }	||z
  }
| j                   |
z  }|r?| j                  j                  t        j                  |	|fd      j                                t        j                  | j                  j                  |	j                        }t        j                  |	|z
         }t        j                  ||z
          }| j                  rFt        j                  |      }| j                  |      }t        j                  ||z  ||z  fd      }nt        j                  ||fd      }||	||fS )a  Compute the BCO loss for a batch of policy and reference model log probabilities.

        Args:
            policy_chosen_logps:
                Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,)
            policy_rejected_logps:
                Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,)
            reference_chosen_logps:
                Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,)
            reference_rejected_logps:
                Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in
                batch_size,)
            chosen_embeddings: embeddings of desirable prompts
            rejected_embeddings: embeddings of undesirable prompts

        Returns:
            A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, delta). The losses tensor contains the
            BCO loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards
            for the chosen and rejected responses, respectively. The delta value contains the moving average of all
            implicit rewards.
        r   r  r   )r   r   rU   r   r  detachr'  r   r  F
logsigmoidr  r  r  )r  r  r  r  r  r  r  r  chosen_logratioschosen_rewardsrejected_logratiosrejected_rewardsdeltachosen_lossesrejected_losseschosen_weightrejected_weightlossess                     rg   bco_losszBCOTrainer.bco_lossY  s)   @ /1GG%5525MM99'99LL		>;K*La P W W YZ 1 1.:O:OPnu&<==<<*:U*B(CDD--!OOM:M"223FGOYY =Q`?`aghiFYY?QGF~'7>>r   c           	      2
   i }|j                         D ci c]G  \  }}|t        |t        j                        r%|j	                  | j
                  j                        n|I }}}| j                  ||      }|dd \  }}	}
}| j                  r|d   }d|v r{t        |d   j                  d         D cg c]  }|d   |   du s| }}t        |d   j                  d         D cg c]  }|d   |   du s| }}|d   |df   }|d   |df   }nt        j                         5  | j                  >| j                         5  | j                  | j                  |      dd \  }}}}ddd       n$| j                  | j                  |      dd \  }}}}ddd       | j                  |      \  }}| j!                  ||	|||	      \  }}}}| j
                  j#                  |      j%                         j'                         |d
<   t        j                  t)        |      g      j	                  | j
                  j                        }t        j                  t)        |      g      j	                  | j
                  j                        }| j
                  j#                  |      j+                         j'                         }| j
                  j#                  |      j+                         j'                         }|dkD  r| j
                  j#                  |j-                               j-                         j'                         |d<   | j
                  j#                  |j-                               j-                         j'                         |d<   | j
                  j#                  |
j-                               j-                         j'                         |d<   ||d<   |dkD  r| j
                  j#                  |j-                               j-                         j'                         |d<   | j
                  j#                  |	j-                               j-                         j'                         |d<   | j
                  j#                  |j-                               j-                         j'                         |d<   ||d<   |j/                         }| j                  r|| j0                  z  z  }||fS c c}}w c c}w c c}w # 1 sw Y   uxY w# 1 sw Y   zxY w)zWCompute the BCO loss and other metrics for the given batch of inputs for train or test.N   rs  r   rm   TF.r  r  zrewards/chosen_sumzlogps/chosen_sumlogits/chosen_sumzcount/chosenzrewards/rejected_sumzlogps/rejected_sumlogits/rejected_sumzcount/rejected)itemsr}   r   Tensortor   r  r  r   r5  r$  r1  r   rq  rj   r9  r  rN  r   itemrM   r  nansumnanmeanr   )r  rj   r:   r  metricsr   vforward_outputr  r  policy_chosen_logitspolicy_rejected_logitsr  r6  r7  r8  r  r  _r  r  r  r  r  r  
num_chosennum_rejectedall_num_chosenall_num_rejectedlosss                                 rg   get_batch_loss_metricsz!BCOTrainer.get_batch_loss_metrics  s    fkfqfqfst^b^_abjELL6QQTT$**112WXXtteU3 2A	
! "  %a(H %%*51B+C+I+I!+L%MkQVW^Q_`aQbfjQj!kJk',U3D-E-K-KA-N'On!SXY`SabcSdhmSmAnLn%*+<%=j#o%N"',->'?c@Q'R$ @>>)..0 @ !LLU;BQ?24@ @ T^^U;BQ?.0@" 261L1LU1S..:>--!"$ ;H ;
7 0%  ++>>uEJJLQQS\\3~#6"78;;D<L<L<S<ST
||S)9%:$;<??@P@P@W@WX))<<ZHLLNSSU++>>|LPPRWWYA  33N4I4I4KLSSUZZ\ ()   334G4N4N4PQXXZ__a &'   334H4O4O4QRYY[``b '( '5GN#a  334D4K4K4MNUUW\\^ *+   334I4P4P4RSZZ\aac ()   334J4Q4Q4ST[[]bbd )* )9G$%~~  D&&11DW}i u ln@ @@ @sB   AS/3S5S5&S:6S:%T%S?'-T?T		TTinputsc                    | j                   r)t        | j                  j                  j                        n	t               }|5  | j                  ||      \  }}d d d        j                  | j                  j                        }| j                  j                  r| j                  d       |r|fS |S # 1 sw Y   _xY w)Ntrain
train_eval)r   r   r   r  r   r   r  r  r   rZ  store_metrics)r  rj   r  return_outputsnum_items_in_batchcompute_loss_context_managerr  r  s           rg   compute_losszBCOTrainer.compute_loss  s     7;6X6XHT%%,,112^i^k 	% * 	G 77vFMD'	G wwtyy''(++w7;'?"	G 	Gs   B66B?r  r  )r  evalc                 v    |j                         D ]&  \  }}| j                  |   |   j                  |       ( y r   )r  r   rx  )r  r  r  keyvalues        rg   r  zBCOTrainer.store_metrics  s;    !--/ 	@JC  ,S188?	@r   c                 P    || j                   }|t        |      sy t        |      S r   )r   r#   r   )r  r:  s     rg   _get_train_samplerzBCOTrainer._get_train_sampler
  s,    ?((G?*W"5 ))r   c           	         | j                   r)t        | j                  j                  j                        n	t               }|5  |j                  |d   |d   | j                  d| j                  j                        }d|v r|d   }n| j                  ^| j                         5  | j                  j                  |d   |d   | j                  d| j                  j                        }ddd       nD| j                  j                  |d   |d   | j                  d| j                  j                        }ddd       t        | j                  | j                  j                        }| j                  j                  |d      }t        | j                  | j                  j                        }| j                  j                  |d      }||fS # 1 sw Y   xY w# 1 sw Y   xY w)zRGenerate samples from the model and reference model for the given batch of inputs.rE   rF   T)rA   rB   ro   	do_sampler   reference_outputN)skip_special_tokens)r   r   r   r  r   r   generatero   r   r   r   rq  rj   r2   batch_decode)r  rj   r:   generate_context_managerpolicy_outputr  policy_output_decodedreference_output_decodeds           rg   generate_from_model_and_refz&BCOTrainer.generate_from_model_and_ref  s    7;6X6XHT%%,,112^i^k 	! & 	!NN 23$%<=??!22?? + M "U*#();#< >>)..0 +/::+>+>&+,>&?+01H+I'+&*)-)>)>)K)K ,? ,(  (,~~'>'>"'(:";',-D'E#'??"&%)%:%:%G%G (? ($/	> &mT__dF[F[FhFhi $ 5 5 B B=fj B k()94??DLaLaLnLno#'#8#8#E#EFVlp#E#q $&>>>/ 	 	s'   A!G#AG(AGG	GG&prediction_loss_onlyignore_keysc                 x   |&t        |d      rt        |j                  dg       }ng }| j                  r)t	        | j
                  j                  j                        n	t               }t        j                         5  |5  | j                  ||d      \  }}d d d        d d d        | j
                  j                  r| j                  d       |rj                         d d fS i }dv r|d   |d<   d	|v r|d	   |d
<   |j                         D 	
cg c]  \  }	}
|	|vs|
 }}	}
t        j                   || j
                  j                        }t        j"                  |j$                  d   | j
                  j                        }j                         ||fS # 1 sw Y   xY w# 1 sw Y   xY wc c}
}	w )Nr   keys_to_ignore_at_inferenceFr  r  r  r  zeval_logits/chosenr  zeval_logits/rejectedr  r   )r   r   r   r   r   r   r  r   r   r   r1  r  rZ  r  r  r  r   zerosr$  )r  rj   r  r  r  prediction_context_managerr  r  logits_dictr   r  r  r{   s                rg   prediction_stepzBCOTrainer.prediction_step@  s    uh'%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_ 	W8 	W 77vPU7VMD'	W 	W ++w6:KKM4.. ')078K0LK,- G+29:O2PK./ + 1 1 3L1q7K!LLfT-=-=-D-DEV\\!_T5E5E5L5LMvv..)	W 	W 	W 	W  Ms0   <F)?FF)F6F6F&	!F))F3
dataloaderdescriptionmetric_key_prefixc                    | j                   rt        |j                        }t        j                  t        |      | j                  j                        }|j                  j                  |      }| j                  |      }	| j                  |	      }	t        t        |	d               D 
cg c]  }
|	d   |
   du s|
 }}
|	d   |   |	d   |    t        | |	d         d}| j                  | j                  |      \  }}t        j                  g dt!        |d   ||      D cg c]#  \  }}}||t        |      d	 |t        |      d	 g% c}}}
      }d| j                  j"                  v r+t%        j&                  dt%        j(                  |      i       d| j                  j"                  v rt+        d|       t,        | ]  |||||      }|S c c}
w c c}}}w )z
        Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
        `Trainer.evaluate()` and `Trainer.predict()`.

        Works both with or without labels.
        )r   rm   FrE   rF   r?   )rE   rF   r?   )PromptPolicyz	Ref ModelN)columnsdatawandbgame_log)r  comet_mlzgame_log.csv)ru  table)r   rM   r:  rF  sampler5  r   eval_batch_sizerH  r   _prepare_inputsr   r  rj   pd	DataFramerL   	report_tor  logTabler1   r   evaluation_loop)r  r  r  r  r  r  num_samplesrandom_indicesrandom_batch_datasetrandom_batchr6  target_indiciestarget_batchr  ref_output_decodedr?   polrefr  initial_outputr  s                       rg   r  zBCOTrainer.evaluation_loopf  s     $$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L*/L4I0J*KqQ|\cOdefOgkpOpqqOq$01C$D_$U)56M)N)_6*o6|H7MNL
 9=8X8XY]YcYceq8r5!#5LL9 -0X0FH]_q,r (S SV/S[]1CDE $))---		:u{{'>?@TYY000-' 0%9;HY
 ; rs   .G>G*(Glogs
start_timec           	      ,   d|v rdnd}|dk(  rdnd}dD ]  }d| | j                   |   v st        j                  | j                   |   d|          j                         j	                         }dD ]l  }t        j                  | j                   |   | d	| d
         j                         j	                         |z  || | d	| <   | j                   |   | d	| d
= n | j                   |   d| =  | d|v r| d|v r|| d   || d   z
  || d<   | j                   |   j                         D ]=  \  }}	t        j                  |	      j                         j	                         || | <   ? | j                   |= t        
| !  ||      S )a1  
        Log `logs` on the various objects watching training, including stored metrics.

        Args:
            logs (`dict[str, float]`):
                The values to log.
            start_time (`float` or `None`, *optional*, defaults to `None`):
                Start time of the training.
        r  r  r  eval_r   )chosenrejectedzcount/)rewardslogpsr  /_sumzrewards/chosenzrewards/rejectedzrewards/margins)	r   r   r  r  r  r  r   r   r  )r  r  r  r  rl   split	count_summetricr  r  r  s             rg   r  zBCOTrainer.log  s    !'$WF
&&0b+ 
	GEw4#7#7
#CC!LL)=)=j)IFSXRYJZ)[\``bggi	< RFT%9%9*%EPQRWQXX\F]%^_ccejjl#$ F8F81UG45
 ,,Z8F81UG49PQR ((4veW5EF
	G X^$,F8;K1LPT1T/3vhn4M/NQUY_X``pVqQr/rDF8?+, 00<BBD 	ILC%*\\'%:%?%?%A%F%F%HDF8C5!"	I  ,w{4,,r   c                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )Nr  rp   )
model_name)	r   hub_model_idr   rX  ru  r  create_model_cardr   _save_checkpoint)r  rj   trialr  r  s       rg   r  zBCOTrainer._save_checkpoint  sl    99!!)dii22388J//55c:2>J*5 .r   r  dataset_nametagsc                    | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        j                  d      }t!        ||| j"                  ||t%               r.t&        j(                  t&        j(                  j+                         ndt-               d|dd	      }|j/                  t        j
                  j1                  | j2                  j4                  d
             y)a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        N_name_or_pathunsloth_versionunslothaA          @article{jung2024binary,
            title        = {{Binary Classifier Optimization for Large Language Model Alignment}},
            author       = {Seungjae Jung and Gunsoo Han and Daniel Wontae Nam and Kyoung{-}Woon On},
            year         = 2024,
            eprint       = {arXiv:2404.04656}
        }BCOzABinary Classifier Optimization for Large Language Model Alignmentz
2404.04656)
base_modelr  r  r!  r"  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   rj   r   r\  r]  isdirr$  setr}   r~   addrU   r   textwrapdedentr/   r  r    r  runget_urlr0   saver^  r   rX  )r  r  r!  r"  r(  citation
model_cards          rg   r  zBCOTrainer.create_model_card  sG   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$?? $  )!!**%-?-AeiiF[eii'')ae.0%[!

 	TYY%9%9;GHr   )NNNNNNNNNr4  NNNNNNN)r   r   )FiF)T)FN)r  )NNr  )NNN)G__name__
__module____qualname____doc__r   r   r   nnModuler~   r   r+   r   rT   r   r   r   r   r   r
   r   r!   tupler   optim	Optimizerlr_schedulerLambdaLRr  r"   r   propertyr  FloatTensorr-  
LongTensorr2  r9  intr  rY  re  r   rq  r   rz  r  rw  staticmethodboolr  r  r  r  r  r	   r  r  r   r  utilsr  Samplerr  r  r  r  r  r  r  __classcell__)r  s   @rg   r   r     s;   +Z J 9=FJ+/EI 04>B59Vbhl&*FJ,0*.-1AE)Wi_bii45Wi E/299c"ABCWi 	Wi
  (Wi uWd3<.@%@ABWi #)+=?UWeef
Wi  -Wi Xb/&9:;Wi D12Wi %++//1I1I1R1RRSWi (0%,,9UW\WcWc9c0d'eWi d^Wi  "(N+;T+A"BC!Wi" %SM#Wi$ #3-%Wi& !*'Wi( &&=>)Wir X X%2C2C HYHY :5+;+; UM]M] bgbsbs $8#uT5+;+;%;<<=8	u  %"3"33	48*W 3 Y^YjYj @	Y1" M M .j  .D,F0A ,FZ ,F\)  )  ) V  "'"&#(*9!!*9  *9 *9  	*9
 !*9 
		*9 *9X/RYY/R'+CtU=M=M7M1N,N'O/R	u  %"3"3U5F5FHYHYY	Z/Rb53D3D IZIZ " 5?"..5?  %005? !& 1 1	5?
 #("3"35? $E$5$565? &e&7&785? 5? 
u  %"3"3U5F5FHYHYY	Z5?v 	\ CtU%5%55667\ 	\D _bii/0 S%c 1223 
u||U5<<c5<<6G1H#HII	J0@T#u*%5 @7?C[ @jn @*(7*; *xPUP[P[P`P`PhPhGi *-?S%BRBR=R8S -?X]^acf^fXg -?h ,0$/_bii/0$/ S%c 1223$/ #	$/
 d3i($/T 04+/!'77 7 'tn	7
 d3i(7 7 
7r!-S%Z( !-huo !-QU !-H/ %)&*,0	<ISM<I sm<I CcD()	<Ir   r   r   )er   r\  rF  r3  r   collectionsr   
contextlibr   r   operatorr   pathlibr   typingr   r	   r
   r   r   r   r	  rN   pandasr  r   torch.nnr>  torch.nn.functional
functionalr  
accelerater   accelerate.loggingr   accelerate.utilsr   datasetsr   r   torch.utils.datar   r   transformersr   r   r   r   r   r   r   r   r   r   r   r    transformers.trainer_callbackr!   transformers.trainer_utilsr"   r#   transformers.utilsr$   
data_utilsr&   import_utilsr'   modelsr(   r)   
bco_configr+   rK  r,   r-   r.   r/   r0   r1   r2   r3   r4   peftr5   r6   r7   r  sklearn.linear_modelr8   r`  r9   r:  r  r_  rb  rT   r~   r   rh   r   r   r   r   rg   <module>rf     sW    	    # 2   I I       # ) !   :    : A 0 2 . > !
 
 
 OO7A	H	 <@BT#YB$B ""78B 
#tCy.	BJwT#s(^ w4E w[_ wtkI kIr   