
    bi                     $   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1m2Z2 d dl3m4Z4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZO  e4       r
d dlPmQZQmRZRmSZS  e)       rd dlTZTdZU G d dej                        ZW G d de&      ZXy)    N)defaultdict)contextmanagernullcontext)Path)OptionalUnion)Accelerator)	broadcastgather_object)Dataset)
DataLoader)
BaseImageProcessorDataCollatorWithPaddingFeatureExtractionMixinGenerationConfigPreTrainedTokenizerBaseProcessorMixinTrainerTrainerCallbackTrainerControlis_wandb_available)#get_reporting_integration_callbacks)DEFAULT_CALLBACKSDEFAULT_PROGRESS_CALLBACK)CallbackHandlerExportableStatePrinterCallback)is_peft_availableis_rich_available   )masked_meanmasked_whiten)create_reference_model)unwrap_model_for_generation   )	PPOConfig)OnlineTrainerStatebatch_generationdisable_dropout_in_modelempty_cache	exact_divfirst_true_indicesforwardgenerate_model_cardget_comet_experiment_url
get_rewardlog_table_to_comet_experimentpeft_module_casting_to_bf16prepare_deepspeedprint_rich_tableselective_log_softmaxtruncate_response)
PeftConfig	PeftModelget_peft_model      ?c                   &     e Zd Zd fdZd Z xZS )PolicyAndValueWrapperc                 t    t         |           || _        || _        t	        ||j
                        | _        y N)super__init__policyvalue_modelgetattrbase_model_prefixcritic_backbone)selfrA   rB   	__class__s      R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/ppo_trainer.pyr@   zPolicyAndValueWrapper.__init__W   s2    &&{K4Q4QR    c                      | j                   di |}| j                  j                  |j                  d         } | j                  di ||fS )N )rE   rB   scorehidden_statesrA   )rF   kwargsoutputlogitss       rH   r-   zPolicyAndValueWrapper.forward]   sQ    %%%//!!''(<(<R(@At{{$V$f,,rI   )returnN)__name__
__module____qualname__r@   r-   __classcell__rG   s   @rH   r<   r<   V   s    S-rI   r<   c                   
    e Zd ZddgZ	 	 	 	 	 d!dedeeeee	e
f      dej                  deej                     dej                  d	ed
ej                  dee   deeeeeef   f      deej&                  j(                  ej&                  j*                  j,                  f   deee      ded   ddfdZdefdZdefdZed        Zd"dee   def fdZ d Z!d#defdZ" fdZ#	 	 	 d$dee   dee   deeee   df   fd Z$ xZ%S )%
PPOTrainertrlppoNargsprocessing_classmodel	ref_modelreward_modeltrain_datasetrB   data_collatoreval_dataset
optimizers	callbackspeft_configr7   rR   c                    ||u rt        d      || _        || _        || _        |t	        | j                        }|j
                  r|j                  rt        d      |j
                  rU|j
                  dk(  r-|j                  x| j                  j                  _        | _        nEt        d|j
                   d      |j                  x| j                  j                  _        | _        | j                  j                  dvrt        d      t               s|t        d      t               r|t        | j                  t              r| j                  j                         | _        t        | j                  |      | _        |j                   r,t#        | j                  d	d
      rt%        | j                         t               xr t        | j                  t              | _        |j(                  | _        |j*                  | _        |r|| _        n.| j&                  rd | _        nt/        | j                        | _        || _        || _        t5        |      | _        || _        || _        |	| _        |
\  | _        | _         d | _!        |jD                  'tG        |jH                  | j6                  z        |_"        tK        |jL                        }|| _'        |jP                  |_)        |jT                  |jL                  z  |_+        tG        |jT                  |jR                  z        |_,        tG        |jV                  |jR                  z        |_-        t]        |jZ                  |j^                  d      |_0        t]        |jV                  |j^                  d      |_1        |jd                  r$|jb                  dk\  sJ d|jb                   d       tg        jh                  |jD                  |jZ                  z        |_5        tm        jn                  tG        tq        jp                               |jr                        }tu        |d      jw                         }|jx                   d|jz                   d| |_>        |jz                  |j~                  dz  z   | _@        |j                  dkD  r(t        d|jj                  |j                  z        | _C        |jV                  | _D        | j                  | j,                  | j8                  | j0                  fD ]  }|t        |        t        | j                  | j8                        | _G        | j                  j                  | j                  _H        | j                  |jj                         t        t        | j                  j                        z   }||n||z   | _M        t        | j                  | j                  | j                  | j>                  | j@                        | _O        | j                  | j                  j                  rt        nt               t               | _U        t        | j                         | j                         | j                  j                  | j                  gz   D cg c]  }t        |t              s| c}      | _Z        d| _[        d | _\        t#        | jN                  j                  dd       d u| _]        t#        | jN                  j                  dd       d u| _^        d | __        | j                  j                  r| j                          | j                  j                  r+t        j                  | j                  j                  d       t        | j                  d      r%| j                  j                  | j                         t        | j2                  | j                  d| j:                  d      | _j        tm        j                  |jz                         |j                  | j                  | j>                  | j                        \  | _G        | _        | _j        tm        j                  | j                         t        | j<                  |j                  | j:                  d      | _n        |j                  | j                        | _n        | j                  rt        | j0                  |jT                  |j                  |j                         | _        | j,                  | j&                  sGt        d      t        | j,                  |jT                  |j                  |j                         | _        y y | j,                  | j&                  s?t        d      | j,                  j                  | jN                  jr                        | _        | j0                  j                  | jN                  jr                        | _        y c c}w ) Nz`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must make a copy of it, or `None` if you use peft.z5You cannot set both `stop_token` and `stop_token_id`.eoszUnknown `stop_token` z9. Allowed values are: `'eos'` and `None` (no stop token).>   k1k3zkl_estimator must be either 'k1' (straightforward, unbiased) or 'k3' (lower variance, unbiased, appears to be a strictly better estimator). See [Approximating KL Divergence](http://joschu.net/blog/kl-approx.html) for details.zvPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT modelsis_loaded_in_4bitF)gradient_accumulation_stepsz5`batch_size` must be a multiple of `num_mini_batches`z;`local_batch_size` must be a multiple of `num_mini_batches`   zPer-rank minibatch size z is insufficient for whiteningdevicer   __i r%   )num_training_steps)is_local_process_zerois_world_process_zerostateful_callbacksdeepspeed_pluginfsdp_pluginT)exist_okadd_model_tags)
batch_sizeshuffle
collate_fn	drop_last)ry   r{   r|   z1No reference model and model is not a Peft model.)r
ValueErrorr\   r]   policy_modelr   
stop_tokenstop_token_ideos_token_idgeneration_configkl_estimatorr   ImportError
isinstancer8   merge_and_unloadr9   bf16rC   r2   is_peft_modelmodel_adapter_nameref_adapter_namer_   r#   r`   ra   lentrain_dataset_lenrB   rb   rc   	optimizerlr_scheduleroptimizer_cls_and_kwargstotal_episodesintnum_train_epochsr	   rl   acceleratornum_processes
world_sizeper_device_train_batch_sizelocal_batch_sizemicro_batch_sizery   r+   num_mini_batchesmini_batch_sizelocal_mini_batch_sizewhiten_rewardsmathceilnum_total_batchestorchtensortimero   r
   itemexp_nameseedrun_nameprocess_index
local_seednum_sample_generationsmaxsample_generations_freqlocal_dataloader_batch_sizer)   r<   r^   configcreate_optimizer_and_schedulerr   r   	report_tore   r   callback_handleradd_callbackdisable_tqdmr   r   r   controlr'   rr   rs   r   statecurrent_floshp_search_backendis_deepspeed_enabledis_fsdp_enabledhub_model_idpush_to_hubinit_hf_reposhould_saveosmakedirs
output_dirhasattrrx   
_tag_namesr   
dataloadermanual_seedprepareper_device_eval_batch_sizeeval_dataloaderr3   fp16to)rF   r\   r]   r^   r_   r`   ra   rB   rb   rc   rd   re   rf   r   time_tensortime_intmoduledefault_callbackscbs                      rH   r@   zPPOTrainer.__init__f   sC   $ Z 
 	 0!  3D4I4IJM ??t11TUU__%'XhXuXuu!!33@4CU +DOO+<<uv  UYTfTffD//<t?Q 99!!5d  !"{'> I   [%<$++Y7$($5$5$F$F$H! !/t/@/@+ NDyyWT%6%68KUS+D,=,=>.0]Z@Q@QS\5]"&"9"9 $ 5 5&DN!DN3D4E4EFDN(*!$]!3&*(,6))(,%
 &"%d&;&;d>T>T&T"UD!d>^>^_&%33 $ @ @4CcCc c #D$D$Dt$V Wd33dooEF(OOT224k 
 &/!!4#8#8:w&
" --2 *4+E+E*FFde2
 "&$//1"
 ll3tyy{#3K<N<NO[!,113==/DII;b
C))k&?&?&&HH&&*+.q$2H2HDLgLg2g+hD(+/+@+@(
 (($..$:J:JDL]L]^ 	1F!(0	1 +4+<+<d>N>NO
 --44

++#55 	, 	
 .0STXT]T]TgTg0hh.7.?*EVYbEb /NNDJJ(=(=t~~tO`O`!
 	TYY-C-C/Ibc%''"&"<"<">"&"<"<">!22<<~M Q[\^`oQp 

 !%$+D,<,<,B,BDVX\$]ei$i!&t'7'7'='=}dS[__ 99  99  KK		,,t< 4::/0JJ%%doo6
 %77))
 	$))$6A6I6I$**VZVdVdfjfufu6v3
DNDO$//*)66))	 
  +2243G3GH$$ 1!!4#C#CTYYPTPYPY!D ~~%))$%XYY!2NND$D$DdiiQUQZQZ" * ~~%))$%XYY!%!2!243C3C3J3J!K $ 1 1 4 4T5E5E5L5L MD{ s   g:(g:c                     | j                   S r>   r   rF   s    rH   get_train_dataloaderzPPOTrainer.get_train_dataloader8  s    rI   c                     | j                   S r>   )r   r   s    rH   get_eval_dataloaderzPPOTrainer.get_eval_dataloader;  s    ###rI   c              #     K   | j                   rI| j                  s=| j                  j                  | j                  j
                        j                         n	t               5  | j                  r/| j                  j
                  j                  | j                         d | j                  r3| j                  j
                  j                  | j                  xs d       ddd       y# 1 sw Y   yxY ww)zWContext manager for handling null reference model (that is, peft adapter manipulation).Ndefault)
r   r   r   unwrap_modelr^   rA   disable_adapterr   set_adapterr   r   s    rH   null_ref_contextzPPOTrainer.null_ref_context>  s     
 !!$*?*? ))$***;*;<LLN		T
 $$

!!--d.C.CD$$

!!--d.E.E.RS		T 		T 		Ts   A C6"A?C*!	C6*C3/C6r   _internal_callc                     | j                   }| j                   j                  | _         | j                  r| j                  }| j                   | _        t        |   ||       || _         | j                  r| _        y y r>   )r^   rA   r   	deepspeedr?   
save_model)rF   r   r   backup_modelbackup_deepspeedrG   s        rH   r   zPPOTrainer.save_modelL  sf    zzZZ&&
$$#~~!ZZDN:~6!
$$-DN %rI   c                 ,  r | j                   }| j                  }| j                  }| j                  }| j                  }| j
                  }| j                  }| j                  r|j                  }rfd}	t         |	             }
t        |j                  |j                  dz   ddd      }|j                  d       t        j                         }|j                  |j                   |j"                  f}t%        j&                  ||      }t%        j&                  ||      }t%        j&                  ||      }t%        j&                  ||      }t%        j&                  ||      }t%        j&                  ||      }t%        j&                  ||      }|j)                          d	| j*                  _        d	| j*                  _        |j0                  | j*                  _        |j4                  | j6                  z  | j*                  _        |j:                  p|j:                  d
k  rFt=        j>                  | j*                  j2                  |j:                  z        | j*                  _        n|j:                  | j*                  _        |j@                  p|j@                  d
k  rFt=        j>                  | j*                  j2                  |j@                  z        | j*                  _         n|j@                  | j*                  _         |jB                  p|jB                  d
k  rFt=        j>                  | j*                  j2                  |jB                  z        | j*                  _!        n|jB                  | j*                  _!        | jD                  jG                  || j*                  | jH                        | _$        | jJ                  r"| j                  | _&        | j                  | _'        tQ        d
|j0                  d
z         D ]  }| j*                  xj.                  d
|jR                  z  z  c_        tU        |
      }t%        jV                         5  |d   jY                  |      }|jZ                  d
   }g }g }g }g }g }g }g }t]        | j                  | j                  | j                   j^                        5 } ta        | jb                  ||jd                  |jf                  |      \  }!}"d d d        tQ        d	|jZ                  d	   |jd                        D ]:  }#||#|#|jd                  z    }$!|#|#|jd                  z    }%|%d d |d f   }&"|#|#|jd                  z    }'ti        |'|&      }(~'tk                |;| jm                         5  to        |jb                  |%|jf                        })d d d        nto        ||%|jf                        }))jp                  d d |d
z
  df   }*|*|j                  dz   z  }*ti        |*|&      }+~)~*tk                |&},| jr                  !tu        | jr                  |jf                  |&      },t%        jv                  |$|,fd
      }-ty        |,|jf                  k(        d
z
  }.|j{                  |      j|                  }/t        |/|%|jf                  |      \  }0}1}1|0d d |d
z
  df   j                  d      }2t        ||-|jf                  |      \  }1}3}1|j                  |&       |j                  |,       |j                  |(       |j                  |+       |j                  |.       |j                  |3       |j                  |2       = t%        jv                  |d	      }t%        jv                  |d	      }t%        jv                  |d	      }t%        jv                  |d	      }t%        jv                  |d	      }t%        jv                  |d	      }t%        jv                  |d	      }~(~+~0~2~3~ tk                t        j                          t%        j                  || j                  j                  k(  d      }4| j                   j                  "||4 xx   | j                   j                  z  cc<   t%        j                  |jZ                  d
   |j                        j                  |jZ                  d	   d
      }5|5|j                  d
      kD  }6t%        j                  ||6t              }t%        j                  ||6t              }|d
z   }7|5|7j                  d
      kD  }8t%        j                  ||8d	      }||z
  }9|j                  dk(  r|9 n|9j                         d
z
  |9z
  }:|j                   |:z  };|;j                         }<t%        j                  |<j                  d	      |<j                        }=t%        j                  |7|<j                  d
      k  |7|      }>|<|=|>gxx   |z  cc<   |j                  r&t        |<|8 d      }<t%        j                  |<|8d	      }<d	}?g }@|jZ                  d
   }At        tQ        |A            D ]k  }B|BAd
z
  k  r|d d Bd
z   f   nd}C|<d d Bf   |j                  |Cz  z   |d d |Bf   z
  }D|D|j                  |j                  z  |?z  z   }?@j                  |?       m t%        j                  @d d d   d
      }E|E|z   }Ft        |E|6       }Et%        j                  |E|6d	      }Etk                d d d        tQ        |j                        D ]"  }Gt        j                  j                  |j                        }Hd	}ItQ        d	|j                  |j                        D ]  }J|J|j                  z   }KH|J|K }Ld	}MtQ        d	|j                  |j                        D ]g  }N|j                  |      5  N|j                  z   }OL|N|O }PE|P   }Q|P   }R!|P   }S|P   }TF|P   }U|P   }Vto        ||S|jf                        \  }W}X|Wjp                  d d d
z
  df   }'|'|j                  dz   z  }'ti        |'|R      }Yt%        j                  |Y6|P   t              }Y|Xd d |d
z
  df   j                  d      }Zt%        j                  |Z8|P   d	      }Zt%        j                  |Z|V|j                  z
  |V|j                  z         }[t%        j                  |Z|Uz
        }\t%        j                  |[|Uz
        }]t%        j                  |\|]      }^dt        |^|8|P          z  }_t        |]|\kD  j                         |8|P          }`|Y|Tz
  }at%        j                  |a      }b|Q |bz  }c|Q t%        j                  |bd|j                  z
  d|j                  z         z  }dt%        j                  |c|d      }et        |e|6|P          }f|f|j                  |_z  z   }g|j                  |g       |j                          |j                          t%        jV                         5  t        dckD  j                         |6P          }ht$        j                  j                  j                  |'d      }it%        j                  |'d      t%        j                  |i|'z  d      z
  }jdadz  j                         z  }k|k|GIMf<   |h||G|I|Mf<   f||G|I|Mf<   _||G|I|Mf<   `||G|I|Mf<   |jj                         ||G|I|Mf<   bj                         ||G|I|Mf<   d d d        d d d        Md
z  }Mj Id
z  }I~W~X~'~Y~Z~[~\~]~_~`~a~b~c~d~e~f~g~h~i~j~k~U~Q~V~R~S~Ttk                 % t%        jV                         5  :j                  d
      j                         }l j                  d
      j                         }m;j                  d
      j                         }n|nj                         z   }ot        | j*                  j.                  t        j                         |z
  z        }pi }q|p|qd<   | j                  j                  |l      j                         j                         |qd<   | j                  j                  |m      j                         j                         |qd<   | j                  j                  |n      j                         j                         |qd<   | j                  j                  |o      j                         j                         |qd<   | j                  j                  |j                               j                         j                         |qd<   | j                  j                  |      j                         j                         |qd<   | j                  j                  |      j                         j                         |qd<   | j                  j                  |      j                         j                         |qd<   | j                  j                  |      j                         j                         |qd<   | j                  j                  |      j                         j                         |qd<   | j                  j                  |      j                         j                         |qd <   | j                  j                  |      j                         j                         |qd!<   | j                  j                  |      j                         j                         |qd"<   |j                  k(  j                         j                         |qd#<   | j                  j                         d	   |qd$<   | j*                  j.                  |qd%<   | j*                  j.                  | j6                  z  | j*                  _v        | j*                  xj,                  d
z  c_        | j                  |q       d d d        | j                  j                          | jD                  j                  || j*                  | jH                        | _$        | jH                  j                  rS| j                  |d &       | jD                  j                  | j                   | j*                  | jH                        | _$        ~:~l~m~n~~q~;tk                t        j                          |j                  d	kD  r1|d
z
  | j                  z  d	k(  r| j                  d'       tk                ~!~~~~~~~4~7~5~6~8~<~=~>~E~Ftk                 | jD                  j                  || j*                  | jH                        | _$        | jH                  j                  rU| j                  |d d (       | jD                  j                  | j                   | j*                  | jH                        | _$        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   
xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   )xY w))Nc               3   (   K   	  E d {    7 wr>   rL   r   s   rH   repeat_generatorz*PPOTrainer.train.<locals>.repeat_generatorf  s     %%% %s   gHz>        r:   Tmax_new_tokenstemperaturetop_ktop_p	do_samplez===training policy===rn   r   r%   	input_idsgather_deepspeed3_paramsrK   )dimri   F)mask
shift_mean)axisg      ?r    epszobjective/klzobjective/entropyzobjective/non_score_rewardzobjective/rlhf_rewardzobjective/scoreszpolicy/approxkl_avgzpolicy/clipfrac_avgzloss/policy_avgzloss/value_avgzval/clipfrac_avgzpolicy/entropy_avgz	val/ratiozval/ratio_varzval/num_eos_tokenslrepisode)trial)sampling)r   metrics)r\   r   r   r^   r_   r`   r]   r   ro   iterr   response_lengthr   printr   num_ppo_epochsr   rl   r   zerostrainr   global_stepr   r   	max_stepsr   r   r   logging_stepsr   r   
eval_steps
save_stepsr   on_train_beginr   r   r   model_wrappedrangery   nextno_gradr   shaper$   ds3_gather_for_generationr(   rA    local_rollout_forward_batch_sizepad_token_idr5   r*   r   r-   rQ   r   r6   catr,   r   rB   r0   squeezeappendgccollectanyr   missing_eos_penaltyarangerepeat	unsqueezemasked_fillINVALID_LOGPROBr   expkl_coefclonesizewherer   r"   reversedgammalamstacknprandompermutationr   r   r   
accumulateclampcliprange_valuesquarer   r!   float	cliprangevf_coefbackwardstep	zero_gradnn
functionalsoftmax	logsumexpsummeanr   gather_for_metricsr   varr   get_last_lrepochlogon_step_endr   _save_checkpointon_saver   r   generate_completionson_train_end)srF   r\   r   r   r^   
ref_policyr`   r]   ro   r   iter_dataloaderr   
start_timestats_shapeapproxkl_statspg_clipfrac_statspg_loss_statsvf_loss_statsvf_clipfrac_statsentropy_statsratio_statsupdatedataqueriescontext_length	responsespostprocessed_responseslogprobsref_logprobsscoressequence_lengthsvaluesunwrapped_modelquery_responseslogitssiqueryquery_responseresponserQ   logprob
ref_output
ref_logitsref_logprobpostprocessed_responsepostprocessed_query_responsesequence_lengthunwrapped_value_model
full_value_valuerM   contain_eos_tokenresponse_idxspadding_masksequence_lengths_p1padding_mask_p1logrklnon_score_rewardrewardsactual_start
actual_end
lastgaelamadvantages_reversed
gen_lengtht
nextvaluesdelta
advantagesreturnsppo_epoch_idxb_indsminibatch_idxmini_batch_startmini_batch_endmini_batch_indsgradient_accumulation_idxmicro_batch_startmicro_batch_endmicro_batch_indsmb_advantagemb_responsesmb_query_responsesmb_logprobs	mb_return	mb_valuesrP   
vpred_tempnew_logprobsvpredvpredclipped
vf_losses1
vf_losses2vf_loss_maxvf_lossvf_clipfraclogprobs_diffratio	pg_losses
pg_losses2pg_loss_maxpg_losslosspg_clipfrac	prob_distentropyapproxklmean_klmean_entropymean_non_score_rewardrlhf_rewardr   r   r   ss                                                                                                                     @rH   r   zPPOTrainer.train[  s   yy&&NN	

^^
((00__
##	& /12,//))D0
 	12YY[
**D,A,A4CcCcd[@!KKFCK?K?!KKFCK?kk+f= "#



#55

&*&9&9D<R<R&R

#)!!A%+/99TZZ5I5IDL^L^5^+_

(+/+=+=

(??&"(,		$**2F2F2X(Y

%(,

%??&"(,		$**2F2F2X(Y

%(,

%,,;;D$**dll[ $$!ZZDN!%DAt559: L	FJJ!doo"55(D z{+..v6!(q!1	*,'!#% 0JJ 0 0499KnKn 	$/?'..==(55)0,OW	 q'--"2D4Y4YZ -)A#AD,Q,Q(QRE%4QT=b=b9b%cN-a.@AH$QT-R-R)RSF3FHEGM!)!224 n)0~O_OlOl)mJn n &-ZIYIfIf%g
!+!2!21nq6H26M3M!NJ$"2"2T"99J"7
H"MK"JM .6*))51B ..0@0M0Mx2.
 4999eE[=\^_3`0&89OScSpSp9p&qtu&uO,7,D,DU,K,W,W)'1-~?O?\?\^l($J1 'q.1*<r*A'ABJJ2NE",$&BDTDaDacq#KAua $$X.+223IJOOG, ''4$++O<MM%(MM%([-)\ "IIi3	*/))4KQ*O' 99Xq1$yyq9#(99-=q#A 61-61-k:ue_

 %*II.EI^I^IkIk.kqs$t!9900<--.$))2O2OO. !&Y__Q-?	HXHX Y ` `ajapapqrasuv w,/?/I/I!/LL ,,X|_U$00|_]&6&:#"/3F3P3PQR3S"T**6?AF $h."//47dUdhhj1nPT=T$(LL=2#5 *002$||GLLOGNNS"[[)<w||A)NPceuv
z23v=3 &&+G?:JW\]G#//!LG 
&(#&__Q/
!%
"34 ;A56a5G1q5!1SJ#AqDMDJJ,CCfQPQTlRE!&dhh)>)K!KJ'..z:	;
 #[[)<TrT)BK
$v-*:}E
"..z<K
uzz "'t':':!; N"..t/D/DE !(-a1F1FHbHb(c K"$%58R8R%RN&,-=n&MO01--21d6P6PRVRrRr-s <7)(33E: :t.?$BbBb.bO/>?PQ`/a,+56F+GL+45E+FL1@AQ1R.*23C*DK(/0@(AI(./?(@I18@RTdTqTq1r.FJ%+]]1nq6H26M3M%NF"d&6&6&==F+@+VL+0+<+< ,l;K.Lo,L %/q.12Dr2I/I$J$R$RSU$VE$)$5$5e_M]=^`a$bE+0;; % )D,@,@ @ )D,@,@ @,L
 */ei6G)HJ).lY6N)OJ*/))J
*KK&)KoVfFgEg,h&hG*5!+j!8 ? ? AOTdDeCe+K -9;,FM$)IIm$<E)5(=I*6UCRVR`R`L`behlhvhvbv9w)wJ*/))Iz*JK&1+M]@^?^&_G#*T\\G-C#CD'006%NN,%//1!& t.9%/)%;$B$B$D|TdGeFe/" -2HH,?,?,G,GTV,G,W	*///&b*IEIIV_bhVhnpLq*q+.-2B1H1H1J+Jjr}mMf/f g$/ !2-Pi2i j jqm]Le.e fipm]Le.e f$/ !2-Pi2i j jqiuiuiwm]Le.e fglgqgqgsM=Jc,c d#tS:tv 2Q61y<7z "Q&M 
FL%"JmUZ\egqs~{IwR[$i?QS^  MWK"N"^  "&&)..*!)	q1668(8(<(<Q(?(D(D(F%3fkkmC$**,,		j0HIJ!$*.*:*:*M*Mg*V*[*[*]*b*b*d'/3/?/?/R/RS_/`/e/e/g/l/l/n+,$$778MNSSUZZ\ 45 483C3C3V3VWb3c3h3h3j3o3o3q/0.2.>.>.Q.QRXR]R]R_.`.e.e.g.l.l.n*+151A1A1T1TUc1d1i1i1k1p1p1r-.151A1A1T1TUf1g1l1l1n1s1s1u-.-1-=-=-P-PQ^-_-d-d-f-k-k-m)*,0,<,<,O,OP],^,c,c,e,j,j,l().2.>.>.Q.QRc.d.i.i.k.p.p.r*+040@0@0S0STa0b0g0g0i0n0n0p,-'+'7'7'J'J;'W'\'\'^'c'c'e$+/+;+;+N+N{+[+_+_+a+f+f+h(1:>N>[>[1[0`0`0b0g0g0i,- $ 1 1 = = ? B%)ZZ%7%7	"#'::#5#58N8N#N

 

&&!+&!9"< ""$00<<T4::t||\DL||''%%e4%8#44<<TYY

TXT`T`aG\+@&'ScMJJL**Q.FQJ$B^B^3^bc3c))4)8' !#MYL	^ ,,99$

DLLY<<##!!%tT!B0088DJJPTP\P\]DL $G	 	*n n?z z\t tS:t :tN" "s   A(AW01AV6!BAW;"AW	U#AW=IAW*C)AW;AW*Q9AW7V6AW V;AWWAWWAWWAW	WAW'W"AW*W*AW4W7AX	r   c                    | j                   }| j                  }t        | j                   j                  dddd      }t	        t
              }t        | j                  | j                  | j                   j                        5 }| j                  D ]  }|d   }t        j                         5  |j                  d   }	t        |j                  ||j                  d	   |j                   |      \  }
}|
d d |	d f   }|}| j"                  !t%        | j"                  |j                   |      }|d
   j'                  t)        |j+                  |d                   |d   j'                  t)        |j+                  |                   t        j,                  ||fd      }t/        | j0                  ||j                   |	      \  }}}|d   j'                  | j                  j3                  |      j5                         j7                         j9                                d d d        |s n d d d        t;        j<                  |      }| j                  j>                  rtA               rtC        |jD                  d	d        d|jF                  v r5d	d l$}|jJ                  % |jL                  d |jN                  |      i       d|jF                  v rtQ        d|       y y y # 1 sw Y   xY w# 1 sw Y   xY w)Ngaz?r   r:   Tr   r   r   r%   r   rW  )skip_special_tokenszmodel responserM      wandbcompletions)	dataframecomet_mlzcompletions.csv)nametable))r\   r]   r   r   r   listr$   r^   r   r  r   r   r  r  r(   rA   r
  r   r6   extendr   batch_decoder  r0   r`   r3  r'  cpunumpypd	DataFrameis_main_processr   r4   ilocr   r  runr7  Tabler1   )rF   r   r\   r]   r   r  rS  batchrW  rK  rX  rc  rY  r^  r_  rM   dfr  s                     rH   r;  zPPOTrainer.generate_completions  s   yy00,9944$
 D!(JJ((499CfCf
 "	-- k*]]_ l%*[[^N(8'..A(55))%NA  .a.@AH-5*))51B ..0@0M0Mx2. 'N))%&6&C&CE_c&C&de *+22%&6&C&CDZ&[\ 4999eE[=\^_3`0",))+GIYIfIfhv#KAua 'N))$*:*:*M*Me*T*Z*Z*\*`*`*b*h*h*jk5l8 ?"	F \\% ++ " U!34$..(99(EII}kekkB.GHIT^^+-* , ,Al l"	 "	s+   *K),E,K
K)$K)K&"K))K2c                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )N/rK   )
model_name)	r\   r   r   r   r  splitcreate_model_cardr?   r9  )rF   r^   r   r  rG   s       rH   r9  zPPOTrainer._save_checkpoint  sl    99!!)dii22388J//55c:2>J*5 .rI   r  dataset_nametagsc                    | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        j                  d      }t!        ||| j"                  ||t%               r.t&        j(                  t&        j(                  j+                         ndt-               d|dd	      }|j/                  t        j
                  j1                  | j2                  j4                  d
             y)a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        N_name_or_pathunsloth_versionunslotha          @article{mziegler2019fine-tuning,
            title        = {{Fine-Tuning Language Models from Human Preferences}},
            author       = {Daniel M. Ziegler and Nisan Stiennon and Jeffrey Wu and Tom B. Brown and Alec Radford and Dario Amodei and Paul F. Christiano and Geoffrey Irving},
            year         = 2019,
            eprint       = {arXiv:1909.08593}
        }PPOz2Fine-Tuning Language Models from Human Preferencesz
1909.08593)
base_modelr  r   r  r  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)rs   r   r^   r   r   pathisdirr  setr   straddrH  r   textwrapdedentr.   r   r   r  r  get_urlr/   savejoinr\   r   )rF   r  r  r  r  citation
model_cards          rH   r  zPPOTrainer.create_model_card  sG   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$?? $  )!!**%-?-AeiiF[eii'')ae.0%L!

 	TYY%9%9;GHrI   )NN)NNNN)NF)F)NNN)&rS   rT   rU   r   r&   r   r   r   r   r   r   r-  Moduler   r   dictr  tupler   optim	Optimizerr   LambdaLRr  r   r@   r   r   r   r   r   boolr   r   r;  r9  r  rV   rW   s   @rH   rY   rY   c   s   J <@EIVb59.2!PNPN #)+=?UWeef
PN yyPN BII&PN iiPN PN YYPN   78PN uWd3<.@%@ABPN %++//1I1I1R1RRSPN D12PN  l+!PN" 
#PNdj $Z $ T T.Xc] .4 .R^h
>T >B/ %)&*,0	<ISM<I sm<I CcD()	<IrI   rY   )Yr  r   r   r  r   collectionsr   
contextlibr   r   pathlibr   typingr   r   r  r   pandasr  r   torch.nnr-  
accelerater	   accelerate.utilsr
   r   datasetsr   torch.utils.datar   transformersr   r   r   r   r   r   r   r   r   r   transformers.integrationsr   transformers.trainerr   r   transformers.trainer_callbackr   r   r   transformers.utilsr   r   corer!   r"   modelsr#   models.utilsr$   
ppo_configr&   utilsr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   peftr7   r8   r9   r  r  r  r<   rY   rL   rI   rH   <module>r     s    
  	   # 2  "     " 5  '   J M [ [ C - + 6 !    ( :: 

-BII 
-QI QIrI   