
    bi4                     @    d dl mZmZ d dlmZ e G d de             Zy)    )	dataclassfield)OnlineDPOConfigc                   N     e Zd ZU dZ ed ddi      Zee   ed<    fdZ	 xZ
S )NashMDConfiga  
    Configuration class for the [`NashMDTrainer`].

    Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

    Parameters:
        mixture_coef (`float` or `list[float]`, *optional*, defaults to `0.5`):
            Logit mixture coefficient for the model and reference model. If a list of floats is provided then the
            mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the
            epochs.
    c                      dgS )Ng      ? r	       U/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/nash_md_config.py<lambda>zNashMDConfig.<lambda>#   s     r
   helpzLogit mixture coefficient for the model and reference model. If a list of floats is provided then the mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the epochs.)default_factorymetadatamixture_coefc                     t         |           t        | j                  d      r.t	        | j                        dk(  r| j                  d   | _        y y y )N__len__   r   )super__post_init__hasattrr   len)self	__class__s    r   r   zNashMDConfig.__post_init__+   sL    4$$i0S9J9J5Kq5P $ 1 1! 4D 6Q0r
   )__name__
__module____qualname____doc__r   r   listfloat__annotations__r   __classcell__)r   s   @r   r   r      s9    
 !&% "
!L$u+ 5 5r
   r   N)dataclassesr   r   trl.trainer.online_dpo_configr   r   r	   r
   r   <module>r$      s(    ) 9 5? 5 5r
   