
    bi                     @    d dl mZmZ d dlmZ e G d de             Zy)    )	dataclassfield)OnlineDPOConfigc                   N     e Zd ZU dZ ed ddi      Zee   ed<    fdZ	 xZ
S )	XPOConfiga  
    Configuration class for the [`XPOTrainer`].

    Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

    Parameters:
        alpha (`float` or `list[float]`, *optional*, defaults to `1e-5`):
            Weight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch
            and the last alpha is used for the rest of the epochs.
    c                      dgS )Ngh㈵> r	       Q/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/xpo_config.py<lambda>zXPOConfig.<lambda>"   s     r
   helpzWeight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch and the last alpha is used for the rest of the epochs.)default_factorymetadataalphac                     t         |           t        | j                  d      r.t	        | j                        dk(  r| j                  d   | _        y y y )N__len__   r   )super__post_init__hasattrr   len)self	__class__s    r   r   zXPOConfig.__post_init__)   sC    4::y)c$**o.BADJ /C)r
   )__name__
__module____qualname____doc__r   r   listfloat__annotations__r   __classcell__)r   s   @r   r   r      s:    	 & O
E4; ' 'r
   r   N)dataclassesr   r   trl.trainer.online_dpo_configr   r   r	   r
   r   <module>r$      s(    ) 9 ' ' 'r
   