
    bi_                     &   d dl Z d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lmZ d	dlmZ d	dlm Z m!Z!m"Z" d	dl#m$Z$ ddl%m&Z& ddl'm(Z(  e       rd dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1  e       rd dl2Z2	 d+de3e4   dedededee   de5de3e4   fdZ6 G d de      Z7 G d d e      Z8d!ede3e4   d"e3e4   d#e3e4   dejr                  f
d$Z: G d% d&e      Z; G d' d(e      Z< G d) d*e      Z=y),    N)OptionalUnion)Accelerator)AcceleratorState)gather_objectis_wandb_available)GenerationConfigPreTrainedModelPreTrainedTokenizerBaseTrainerTrainerCallbackTrainerControlTrainerStateTrainingArguments)
has_length)is_rich_available   )maybe_apply_chat_template)is_mergekit_available)MergeConfigmerge_modelsupload_model_to_hf)unwrap_model_for_generation   )BasePairwiseJudge)log_table_to_comet_experiment)ConsoleGroup)Live)Panel)Progresspromptsmodel	tokenizeracceleratorgeneration_config
batch_sizereturnc                    g }t        ||      5 }t        dt        |       |      D ]  }| |||z    }	 ||	ddd      j                  |j                        }
 |j
                  di |
d|i}t        |
j                  |      D ]7  \  }}|t        |      d }|j                  |d      }|j                  |       9  	 ddd       |S # 1 sw Y   |S xY w)	a  
    Generates completions for a list of pre-formatted prompts from the given model.

    Args:
        prompts (list[str]): A list of input prompts for which completions are to be generated.
        model (PreTrainedModel): The pre-trained model to be used for generation.
        tokenizer (PreTrainedTokenizerBase): The tokenizer to be used for encoding and decoding.
        accelerator (Accelerator): The accelerator to be used for model execution.
        generation_config (GenerationConfig): Configuration for text generation.
        batch_size (int, optional): The number of prompts to process in each batch. Default is 1.

    Returns:
        list[str]: A list of generated text completions corresponding to the input prompts.
    r   ptT)return_tensorspadding
truncationr&   N)skip_special_tokens )
r   rangelentodevicegeneratezip	input_idsdecodeappend)r"   r#   r$   r%   r&   r'   completionsunwrapped_modelidxbatchtokenized_batchgenerationsprompt
generation
completions                  P/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/callbacks.py_generate_completionsrC   6   s    , K	$UK	8 /OCL*5 	/CC#
"23E'dD]abeefkfrfrsO2/22 !"3K '*/*C*C[&Q /"
'F6
&--jd-S
"":.	/	// / s   B/C		Cc                   ~    e Zd ZdZdeeej                  j                  f   de	e
   fdZed        Zed        Zd Zy)	SyncRefModelCallbackzC
    Callback to synchronize the model with a reference model.
    	ref_modelr%   c                      || _         || _        y N)r%   rF   )selfrF   r%   s      rB   __init__zSyncRefModelCallback.__init__b   s    
 '"    c                     t        |j                         | j                               D ]>  \  }}|j                  j                  d|z
        j	                  |j                  |       @ y )N      ?)alpha)r5   
parametersdatamul_add_)r#   target_modelrN   target_param
copy_params        rB   _sync_target_modelz'SyncRefModelCallback._sync_target_modelj   s]    (+L,C,C,EuGWGWGY(Z 	S$L*""3;/44Z__E4R	SrK   c                    t               j                  }||j                  dk(  rdd l}|j                  j                  t        | j                               t        |j                               z   d      5  |j                  j                         dk(  rt        j                  | ||       d d d        y t        j                  | ||       y # 1 sw Y   y xY w)N   r   )modifier_rank)r   deepspeed_plugin
zero_stage	deepspeedzeroGatheredParameterslistrO   commget_rankrE   rV   )r#   rS   rN   rZ   r\   s        rB   sync_target_modelz&SyncRefModelCallback.sync_target_modelo   s    +->>',<,G,G1,L22U%%'(40G0G0I+JJZ[ 3  X >>**,1(;;E<QVW	X X !33E<OX Xs   85CCc                     |d   }| j                   l|j                  |j                  z  dk(  rO| j                  r| j                  j	                  |      }| j                  || j                   |j                         y y y )Nr#   r   )rF   global_stepref_model_sync_stepsr%   unwrap_modelrb   ref_model_mixup_alpha)rI   argsstatecontrolkwargsr#   s         rB   on_step_endz SyncRefModelCallback.on_step_end}   sr    !'>>%%*;*;d>W>W*W[\*\((55e<""5$..$:T:TU +]%rK   N)__name__
__module____qualname____doc__r   r
   torchnnModuler   r   rJ   staticmethodrV   rb   rl   r/   rK   rB   rE   rE   ]   sg    #%((//9:# k*# S S P PVrK   rE   c                   D    e Zd ZdZd Zd Zd ZddZd Zd Z	dd	Z
d
 Zy)RichProgressCallbackz`
    A [`TrainerCallback`] that displays the progress of training or evaluation using Rich.
    c                     t               st        d      d | _        d | _        d | _        d | _        d | _        d | _        d | _        d | _	        y )NzSRichProgressCallback requires the `rich` extra. To install, run `pip install rich`.)
r   ImportErrortraining_barprediction_bartraining_task_idprediction_task_id
rich_grouprich_consoletraining_statuscurrent_step)rI   s    rB   rJ   zRichProgressCallback.__init__   sS     "stt " $"& # rK   c           	         |j                   rt               | _        t               | _        t	               | _        | j
                  j                  d      | _        t        t        t        | j                  | j                  | j                                    | _        | j                  j                          | j                  j                  d|j                        | _        d| _        y y )NzNothing to log yet ...z[blue]Training the modeltotalr   )is_world_process_zeror!   ry   rz   r   r~   statusr   r   r    r   r}   startadd_task	max_stepsr{   r   rI   rh   ri   rj   rk   s        rB   on_train_beginz#RichProgressCallback.on_train_begin   s    && (
D"**D '	D#'#4#4#;#;<T#UD "5t/@/@$BUBUW[WkWk)l#mnDOOO!!#$($5$5$>$>?Yafapap$>$qD! !D 'rK   c                     |j                   rQ| j                  j                  | j                  |j                  | j
                  z
  d       |j                  | _        y y )NTadvanceupdate)r   ry   r   r{   rd   r   r   s        rB   rl   z RichProgressCallback.on_step_end   sR    &&$$T%:%:EDUDUX\XiXiDirv$w % 1 1D 'rK   Nc                     |j                   rlt        |      r`| j                  +| j                  j	                  dt        |            | _        | j                  j                  | j                  dd       y y y )Nz*[blue]Predicting on the evaluation datasetr   r   Tr   )r   r   r|   rz   r   r1   r   )rI   rh   ri   rj   eval_dataloaderrk   s         rB   on_prediction_stepz'RichProgressCallback.on_prediction_step   st    &&:o+F&&.*.*=*=*F*F@OH\ +G +' &&t'>'>RV&W ,G&rK   c                     |j                   r:| j                  -| j                  j                  | j                         d | _        y y y rH   r   r|   rz   remove_taskr   s        rB   on_evaluatez RichProgressCallback.on_evaluate   C    &&&&2##//0G0GH*.' 3 'rK   c                     |j                   r:| j                  -| j                  j                  | j                         d | _        y y y rH   r   r   s        rB   
on_predictzRichProgressCallback.on_predict   r   rK   c                     |j                   rG| j                  :|j                  dd       }| j                  j	                  dt        |              y y y )N
total_flosz[bold green]Status = )r   ry   popr   r   str)rI   rh   ri   rj   logsrk   _s          rB   on_logzRichProgressCallback.on_log   sO    &&4+<+<+Ht,A  ''*?D	{(KL ,I&rK   c                     |j                   rS| j                  j                          d | _        d | _        d | _        d | _        d | _        d | _        d | _        d | _	        y y rH   )
r   r}   stopry   rz   r{   r|   r~   r   r   r   s        rB   on_train_endz!RichProgressCallback.on_train_end   s_    &&OO  " $D"&D$(D!&*D#"DO $D#'D  $D 'rK   rH   )rm   rn   ro   rp   rJ   r   rl   r   r   r   r   r   r/   rK   rB   rv   rv      s1    ! "2
X//M
%rK   rv   ri   r9   winner_indicesc           	         t        | j                        gt        |      z  }t        t	        ||||            }|D cg c]  }|d   |d   |d   d   |d   d   |d   f  }}t        j                  |g d      S c c}w )Nr   r   r   rX   )stepr?   reference_modelpolicywinner_index)columns)r   rd   r1   r_   r5   pd	DataFrame)ri   r"   r9   r   rd   rP   item
split_datas           rB   _win_rate_completions_dfr      s     u(()*S\9KK+~FGDRVW$47DGT!WQZaT!WEWJW<<
,kll Xs   #A<c                   n    e Zd ZdZ	 	 	 	 ddededee   dee   de	de	fd	Z
d
ededefdZd
ededefdZy)WinRateCallbacka  
    A [`~transformers.TrainerCallback`] that computes the win rate of a model based on a reference.

    It generates completions using prompts from the evaluation dataset and compares the trained model's outputs against
    a reference. The reference is either the initial version of the model (before training) or the reference model, if
    available in the trainer. During each evaluation step, a judge determines how often the trained model's completions
    win against the reference using a judge. The win rate is then logged in the trainer's logs under the key
    `"eval_win_rate"`.

    Usage:
    ```python
    trainer = DPOTrainer(...)
    judge = PairRMJudge()
    win_rate_callback = WinRateCallback(judge=judge, trainer=trainer)
    trainer.add_callback(win_rate_callback)
    ```

    Args:
        judge (`BasePairwiseJudge`):
            The judge to use for comparing completions.
        trainer (`Trainer`):
            Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
            column containing the prompts for generating completions. If the `Trainer` has a reference model (via the
            `ref_model` attribute), it will use this reference model for generating the reference completions;
            otherwise, it defaults to using the initial model.
        generation_config (`GenerationConfig`, *optional*):
            The generation config to use for generating completions.
        num_prompts (`int` or `None`, *optional*, defaults to `None`):
            The number of prompts to generate completions for. If not provided, defaults to the number of examples in
            the evaluation dataset.
        shuffle_order (`bool`, *optional*, defaults to `True`):
            Whether to shuffle the order of the completions before judging.
        use_soft_judge (`bool`, *optional*, defaults to `False`):
            Whether to use a soft judge that returns a win probability between 0 and 1 for the first completion vs the
            second.
    Njudgetrainerr&   num_promptsshuffle_orderuse_soft_judgec                 (   || _         || _        || _        || _        g | _        || _        | j                  j                  t        d      | j                  j                  | _        |*| j                  j                  t        |            | _        y y )NzCTrainer must have an evaluation dataset to use the WinRateCallback.)
r   r   r   r&   ref_completionsr   eval_dataset
ValueErrorselectr0   )rI   r   r   r&   r   r   r   s          rB   rJ   zWinRateCallback.__init__  s     
*!2!,<<$$,bcc $ 9 9D" $ 1 1 8 8{9K LD #rK   rh   ri   rj   c           	      8   |d   }d|_         | j                  j                  }t        | j                  dd       }|| j                  j                  }|j                  | j                  d         5 }t        ||||| j                  |j                        | _
        t        t        | j                  | j                              }	| j                  rK| j                  j                  ||	| j                  d      }
|
D cg c]  }|dkD  rd	nd
 }}t!        |
      }
n'| j                  j                  ||	| j                        }t!        |      }t!        |	      }	t!        |      }d d d        | j                  j                  j"                  rt%        d D              t'        |      z  }| j                  r9dt%        
      t'        |
      z  z
  }| j                  j)                  ||d       n| j                  j)                  d|i       d|j*                  v rDd	d l}|j.                  4t1        |	|      } |j(                  d |j2                  |      i       d|j*                  v rt1        |	|      }t5        d|       y y y c c}w # 1 sw Y   .xY w)Nprocessing_classleftrF   r?   r#   r$   r%   r&   r'   Treturn_scores      ?r   r   c              3   &   K   | ]	  }|d k(    ywr   Nr/   .0
winner_idxs     rB   	<genexpr>z1WinRateCallback.on_train_begin.<locals>.<genexpr>E       Lz:?L   rM   eval_avg_win_probeval_win_rater   wandbri   r"   r9   r   win_rate_completions	dataframecomet_mlwin_rate_completions.csvnametable)padding_sider   r%   getattrmodel_wrappedsplit_between_processesr   rC   r&   per_device_eval_batch_sizer   r_   r5   r   r   r   r   is_main_processsumr1   log	report_tor   runr   Tabler   rI   rh   ri   rj   rk   r$   r%   r#   r"   r9   ref_win_probsscorer   win_rateavg_win_probr   dfs                    rB   r   zWinRateCallback.on_train_begin  sk   -.	!'	ll..k48 =LL..E001B1B81LM 	;QX#8#'"&"8"8::$D  s4#7#79M9MNOK"" $

 0 0+tGYGYim 0 nGT!Ueus{!"9!U!U -m <!%!1!1';HZHZ![#G,G'4K*>:N'	;, <<##33L^LLsSaObbH"""S%7#m:L%LL  |V^!_`  /8!<=$..(99(1# '$/'5	B EII5{u{{R7PQRT^^+-# +#1	 .3 ,) 4 "V	; 	;s    0BJ=J
AJ
JJc           	         |d   }d|_         | j                  j                  }| j                  j                  }|j	                  | j
                  d         5 }t        ||||| j                  |j                        }	t        t        | j                  |	            }	| j                  rK| j                  j                  ||	| j                  d      }
|
D cg c]  }|dkD  rdnd	 }}t        |
      }
n'| j                  j                  ||	| j                        }t        |      }t        |	      }	t        |      }d d d        | j                  j                  j                   rt#        d
 D              t%        |      z  }| j                  r9dt#        
      t%        |
      z  z
  }| j                  j'                  ||d       n| j                  j'                  d|i       d|j(                  v rDdd l}|j,                  4t/        |	|      } |j&                  d |j0                  |      i       d|j(                  v rt/        |	|      }t3        d|       y y y c c}w # 1 sw Y   .xY w)Nr   r   r?   r   Tr   r   r   r   c              3   &   K   | ]	  }|d k(    ywr   r/   r   s     rB   r   z.WinRateCallback.on_evaluate.<locals>.<genexpr>  r   r   rM   r   r   r   r   r   r   r   r   r   )r   r   r%   r   r   r   rC   r&   r   r_   r5   r   r   r   r   r   r   r   r1   r   r   r   r   r   r   r   r   s                    rB   r   zWinRateCallback.on_evaluated  sK    -.	!'	ll..**001B1B81LM 	;QX/#'"&"8"8::K s4#7#7EFK"" $

 0 0+tGYGYim 0 nGT!Ueus{!"9!U!U -m <!%!1!1';HZHZ![#G,G'4K*>:N)	;. <<##33L^LLsSaObbH"""S%7#m:L%LL  |V^!_`  /8!<=$..(99(1# '$/'5	B EII5{u{{R7PQRT^^+-# +#1	 .3 ,) 4 "V	; 	;s    A>I'I"%AI'"I''I1)NNTF)rm   rn   ro   rp   r   r   r   r	   intboolrJ   r   r   r   r   r   r/   rK   rB   r   r      s    #R 9=%)"$M M M $$45	M
 c]M M M0D#4 D\ DTb DL= 1 =, =Q_ =rK   r   c            
       H    e Zd ZdZ	 	 	 d	dedee   dee   dee   fdZd Z	y)
LogCompletionsCallbacka  
    A [`~transformers.TrainerCallback`] that logs completions to Weights & Biases and/or Comet.

    Usage:
    ```python
    trainer = DPOTrainer(...)
    completions_callback = LogCompletionsCallback(trainer=trainer)
    trainer.add_callback(completions_callback)
    ```

    Args:
        trainer (`Trainer`):
            Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
            column containing the prompts for generating completions.
        generation_config (`GenerationConfig`, *optional*):
            The generation config to use for generating completions.
        num_prompts (`int` or `None`, *optional*):
            The number of prompts to generate completions for. If not provided, defaults to the number of examples in
            the evaluation dataset.
        freq (`int` or `None`, *optional*):
            The frequency at which to log completions. If not provided, defaults to the trainer's `eval_steps`.
    Nr   r&   r   freqc                    || _         || _        || _        g | _        d| _        | j                   j
                  t        d      | j                   j
                  | _        |*| j
                  j                  t        |            | _        y y )NzJTrainer must have an evaluation dataset to use the LogCompletionsCallback.)	r   r&   r   r   _last_logged_stepr   r   r   r0   )rI   r   r&   r   r   s        rB   rJ   zLogCompletionsCallback.__init__  s     !2	
!#<<$$,ijj $ 9 9D" $ 1 1 8 8{9K LD #rK   c           	         |j                   | j                  k(  ry | j                  xs |j                  }|j                   |z  dk7  ry |d   }d|_        | j
                  j                  }| j
                  j                  }|j                  | j                  d         5 }	|	D 
cg c]  }
t        d|
i|      d    }	}
t        |	|||| j                  |j                        }t        |      }t        |	      }	d d d        | j
                  j                  j                  rt!        |j                         gt#        	      z  }t%        t'        ||	            }| j(                  j+                  |       t-        j.                  g d| j(                        }d|j0                  v rt3        j4                  d	|i       d
|j0                  v rt7        d|       |j                   | _        y c c}
w # 1 sw Y   xY w)Nr   r   r   r?   r   )r   r?   rA   )r   rP   r   r9   r   zcompletions.csvr   )rd   r   r   
eval_stepsr   r   r%   r   r   r   r   rC   r&   r   r   r   r   r1   r_   r5   r   extendr   r   r   r   r   r   )rI   rh   ri   rj   rk   r   r$   r%   r#   r"   r?   r9   rd   rP   r   s                  rB   rl   z"LogCompletionsCallback.on_step_end  s    6 66 yy,E,,t#q(-.	!'	ll..**001B1B81LM 	-QXhop^d0(F1CYOPXYpGp/#'"&"8"8::K (4K#G,G	- <<##33u0012S\AKK+>?DJJd#LL)IPTPZPZ[E$..(		=%01T^^+-* "'!2!2; q	- 	-s   G-#G(;=G-(G--G6)NNN)
rm   rn   ro   rp   r   r   r	   r   rJ   rl   r/   rK   rB   r   r     sU    4 9=%)"MM $$45M c]	M
 smM*,3rK   r   c                   H    e Zd ZdZ	 	 	 dded   dedefdZd Zdd	Zdd
Z	y)MergeModelCallbacka  
    A [`~transformers.TrainerCallback`] that merges the policy model (the model being trained) with another model based
    on a merge configuration.

    Args:
        merge_config ([`MergeConfig`], *optional*, defaults to `None`):
            Configuration used for the merging process. If not provided, the default [`MergeConfig`] is used.
        merge_at_every_checkpoint (`bool`, *optional*, defaults to `False`):
            Whether to merge the model at every checkpoint.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the merged model to the Hub after merging.

    Example:

    ```python
    # pip install mergekit

    from trl.mergekit_utils import MergeConfig
    from trl import MergeModelCallback

    config = MergeConfig()
    merge_callback = MergeModelCallback(config)
    trainer = DPOTrainer(..., callbacks=[merge_callback])
    ```
    Nmerge_configr   merge_at_every_checkpointpush_to_hubc                 p    t               st        d      |xs
 t               | _        || _        || _        y )NzYMergeModelCallback requires the `mergekit` extra. To install, run `pip install mergekit`.)r   rx   r   r   r   r   )rI   r   r   r   s       rB   rJ   zMergeModelCallback.__init__  s;     %&k  )9KM)B&&rK   c                    t         j                  j                  |d|       }|| j                  _        | j                  j
                  %|j                  j                  | j                  _        t         j                  j                  |d      }t        | j                  j                         |       | j                  r| d| d}t        ||       y y )Nzcheckpoint-mergedz_checkpoint-_merged)ospathjoinr   policy_model_pathtarget_model_pathconfig_name_or_pathr   creater   r   )rI   
output_dirrd   r#   checkpoint_path
merge_path	repo_names          rB   _merge_and_maybe_pushz(MergeModelCallback._merge_and_maybe_push)  s    '',,z[3NO.=+..627,,2L2LD/WW\\/8<
T&&--/<%,l;-wGIz95 rK   c                 l    | j                   r(| j                  |j                  |j                  |       y y rH   r   r
  r  rd   rI   rh   ri   rj   r#   rk   s         rB   on_savezMergeModelCallback.on_save6  s,    ))&&t8I8I5Q *rK   c                 l    | j                   s(| j                  |j                  |j                  |       y y rH   r  r  s         rB   r   zMergeModelCallback.on_train_end:  s,    --&&t8I8I5Q .rK   )NFFrH   )
rm   rn   ro   rp   r   r   rJ   r
  r  r   r/   rK   rB   r   r      sI    8 15*/!	'}-' $(' 	'6RRrK   r   )r   )>r   typingr   r   pandasr   rq   
accelerater   accelerate.stater   accelerate.utilsr   r   transformersr	   r
   r   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   
data_utilsr   import_utilsr   mergekit_utilsr   r   r   models.utilsr   judgesr   utilsr   rich.consoler   r   	rich.liver   
rich.panelr    rich.progressr!   r   r_   r   r   rC   rE   rv   r   r   r   r   r   r/   rK   rB   <module>r"     sO   
 "   " - >	 	 	 2 0 2 0 J J 6 % 0 + & $#Y$$ '$ 	$
   01$ $ 
#Y$N&V? &VRM%? M%`mm"&s)m:>s)mUYZ]U^m\\mAo AHY3_ Y3x<R <RrK   