
    bie5                    
   d dl Z d dlmZ d dlZd dlZd dlZd dlmZ d dl m	Z	m
Z
 d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dlmc mZ d dlZd dlmZmZ d dl m!Z! d dl"m#Z#m$Z$ d d	l%m&Z& d d
lm'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9  e4       rd dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA  e0       rd dlBZB e3       rd dlCmDZDmEZE  G d de*      ZFe	 G d d             ZGe	 G d d             ZH	 	 	 ddeIej                     deKdeLdeeK   dej                  f
dZMe	 G d  d!             ZN G d" d#e'      ZOe	 G d$ d%             ZP ej                         	 dd&ej                  deRej                  ej                  eKf   fd'       ZSd(e+deTeLeUf   fd)ZVdd*ej                  d+eKd,eeKeUf   d-eKdej                  f
d.ZWd/ej2                  j                  ddfd0ZYdd1ZZ G d2 d3      Z[d4 Z\d5e9dee)   fd6Z]deeTeLeKf      fd7Z^d5e9dd8fd9Z_dd:Z`dd;Zad<ej                  ddfd=Zcd>Zdd?Zee	 G d@ dAe.             Zfe	 G dB dCe/             Zgej                  fdDej                  dej                  fdEZid/ej2                  j                  dFej                  dGeKdHeKdeRej                  ej                  ej                  f   f
dIZjd/ej2                  j                  dFej                  dGeKde2fdJZk	 dd/ej2                  j                  dKeKdLeldMeldej2                  j                  f
dNZmdOeKdGeKdPej                  dej                  fdQZndRej2                  j                  dSej                  dGeKdTe,deRej                  ej                  f   f
dUZo ej                         d/ej2                  j                  dSej                  dVeKdGeKdTe,f
dW       ZpdXeeK   dYeKdZeTeLeIeK   f   d[eKd\eTeLeIeK   f   d]eKd^eTeLeIeK   f   fd_Zqd`eKd\eTeLeIeK   f   d^eTeLeIeK   f   fdaZrdbej                  dOeKdGeKdeRej                  ej                  f   fdcZsdddZtdeej                  dfe-deIeL   fdgZu	 	 	 	 ddheeL   dieLdjeLdkeeL   dleIeL   dmeeL   dneLdoeeL   dpeeL   dqeeL   dreeL   de#fdsZvdeeL   fdtZwdueLdvej                  ddfdwZxdxej                  dej                  deej                  eRej                  dyf   f   fdzZydxej                  dej                  deej                  eRej                  dyf   f   fd{Zzdej                  fd|Z{	 dd}eIeL   d~eIeL   deTeLeIeU   f   deIeU   deKdeKddfdZ|y)    N)deque)	dataclassfield)version)AnyLiteralOptionalUnion)AcceleratorPartialState)AcceleratorState)	ModelCardModelCardData)pad_sequence)IterableDataset)BitsAndBytesConfigDataCollatorForLanguageModelingEvalPredictionGenerationConfigPreTrainedTokenizerBaseTrainerStateTrainingArgumentsis_comet_available)ModelOutputis_peft_availableis_rich_availableis_torch_mlu_availableis_torch_npu_availableis_torch_xpu_available   )ModelConfig)Console)Panel)Table)Text)
LoraConfig
PeftConfigc                        e Zd ZdZ	 ddddddeeee   f   deeeee   f      de	ded	e	f
 fd
Z
deeee   eeeef   f      deeef   f fdZ xZS )DataCollatorForCompletionOnlyLMa  
    Data collator used for completion tasks. It ensures that all the tokens of the labels are set to an 'ignore_index'
    when they do not come from the assistant. This ensure that the loss is only calculated on the completion made by
    the assistant.

    Args:
        response_template (`Union[str, list[int]]`):
            the template form that indicates the start of the response, typically something like '### Response:
'. It
            can also be passed as tokenized ids, which can be useful when using a tokenizer that encodes the response
            differently if it does not have proper context.
        instruction_template (`Union[str, list[int]]`):
            the template form that indicates the start of the human instruction, typically something like '###
            Human:
'. Useful for assistant-style conversation datasets. It can also be passed as tokenized ids.
        mlm (`bool`, *optional*, defaults to `False`): Whether to use masked language modeling in the underlying
            `DataCollatorForLanguageModeling` class. Note that this option currently has no effect but is present
             for flexibility and backwards-compatibility.
        ignore_index (`int`, *optional*, defaults to `-100`):
            The index to use to ignore the initial tokens with
    F)mlmignore_indexpadding_freeresponse_templateinstruction_templater+   r,   r-   c                b   t        |   |d|i| t        j                  dt               || _        t        |t              r-| j                  j                  | j
                  d      | _
        n|| _
        || _        t        |t              r-| j                  j                  | j                  d      | _        n|| _        | j                  sS| j
                  rG| j                  j                  | j                  j                  k(  rt        j                  dt                || _        || _        y )Nr+   zThis class is deprecated and will be removed in version 0.20.0. To train on completion only, please use the parameter `completion_only_loss` of `SFTConfig` instead.F)add_special_tokensa  The pad_token_id and eos_token_id values of this tokenizer are identical. If you are planning for multi-turn training, it can result in the model continuously generating questions and answers without eos token. To avoid this, set the pad_token_id to a different value.)super__init__warningswarnDeprecationWarningr/   
isinstancestr	tokenizerencodeinstruction_token_idsr.   response_token_idsr+   pad_token_ideos_token_idUserWarningr,   r-   )	selfr.   r/   r+   r,   r-   argskwargs	__class__s	           L/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/utils.pyr3   z(DataCollatorForCompletionOnlyLM.__init__\   s	    	$2C262K	
 %9!*C0)-)>)>t?X?Xmr)>)sD& *>D&!2'-&*nn&;&;D<R<Rgl&;&mD# '8D#xxD55$..:U:UY]YgYgYtYt:tMML  )(    examplesreturnc           
      l   t         |   |      }| j                  "t        t	        |            D ]  }d }t        j                  |d   |   | j                  d   k(        d   D ]A  }| j                  |d   |   ||t	        | j                        z    j                         k(  s@|}C |at        j                  d| j                   d| j                  j                  |d   |          dt               | j                  |d   |d d f<   |t	        | j                        z   }| j                  |d   |d |f<    nt        t	        |            D ]  }g }g }t        j                  |d   |   | j                  d   k(        d   D ]f  }	| j                  |d   |   |	|	t	        | j                        z    j                         k(  s@|j!                  |	t	        | j                        z          h t	        |      dk(  r`t        j                  d| j                   d| j                  j                  |d   |          dt               | j                  |d   |d d f<   | j"                  }
t        j                  |d   |   |
d   k(        d   D ]<  }|
|d   |   ||t	        |
      z    j                         k(  s,|j!                  |       > t	        |      dk(  r`t        j                  d| j                   d| j                  j                  |d   |          dt               | j                  |d   |d d f<   t	        |      dkD  rt	        |      dkD  r|d   |d   kD  rdg|z   }t%        t'        ||            D ]:  \  }\  }}|dk7  r| j                  |d   |||f<   %| j                  |d   |d |f<   < t	        |      t	        |      k  s| j                  |d   ||d   d f<    | j(                  r|j+                  d	      }|d   |j-                            j/                  d      |d<   |j1                  d
      |j-                            j/                  d      d
z
  |d<   |d   |j-                            j/                  d      |d<   | j                  |d   |d   dk(  <   |d   j3                         }t5        j6                  |j9                  d      |j:                  t4        j<                        }t5        j>                  ||dk(     t5        j@                  |j9                         |j:                  t4        j<                        f      j/                  d      |d<   |d   |d<   t5        j@                  |jC                         jE                         d
z   g      |d<   |d   |d<   |S )Nlabelsr   zCould not find response key `z` in the following instance: 	input_idszw. This instance will be ignored in loss calculation. Note, if this happens often, consider increasing the `max_length`.z Could not find instruction key `attention_mask   position_ids)devicedtypecu_seq_lens_qcu_seq_lens_kmax_length_kmax_length_q)#r2   
torch_callr/   rangelennpwherer<   tolistr4   r5   r.   r9   decoder?   r,   appendr;   	enumeratezipr-   popbool	unsqueezecumsumflattentorcharangesizerO   int32cattensormaxitem)r@   rF   batchiresponse_token_ids_start_idxidxresponse_token_ids_end_idxresponse_token_ids_idxshuman_token_ids_idxsassistant_idxhuman_token_ids	human_idxstartend	attn_maskflattened_position_ids	indices_qrC   s                    rD   rU   z*DataCollatorForCompletionOnlyLM.torch_call   s   "8,$$,3x=) X/3,88E(OA$6$:Q:QRS:T$TUVWX ;C // ?1-cC#d>U>U:V4VW^^`a 8;4; 07MM78N8N7OOl>>00{1CA1FGH Ijj $	 -1,=,=E(OAqD)1MPSTXTkTkPl1l. GKFWFWE(OA'B(B'B$BC/X4 3x=) 3W*,'')$%'XXeHoa.@DD[D[\]D^.^%_`a%b eM // ?1-mmcRVRiRiNj>jkrrtu 066}s4KbKbGc7cde ./14MM78N8N7OOl>>00{1CA1FGH Ijj $	 -1,=,=E(OAqD)"&"<"<!#%/!*<PQ@R*R!STU!V ?I&%/!*<YUXYhUiIi*j*q*q*ss,33I>?
 +,1MM:4;T;T:UUr>>00{1CA1FGH Ijj $	 -1,=,=E(OAqD) ,-134q8,Q/2I!2LL,-31E+E()237KMd3e)f E%C%ax8<8I8Ih595373D3Dh4C40E ./#6J2KKEIEVEVE(OA';B'?'A$ABg3Wj 		"23I!&{!3INN4D!E!O!OPQ!RE+$-$4$4Q$7	8H$I$S$STU$VYZ$ZE.!#Hoinn.>?II!LE(O:>:K:KE(OE.1Q67 &+>%:%B%B%D"&++A.7M7T7T\a\g\gI &+YY49:LL.335>T>[>[chcncn& il /" &+?%;E/" %*LL2H2L2L2N2S2S2UXY2Y1Z$[E.!$).$9E.!rE   N)__name__
__module____qualname____doc__r
   r8   listintr	   r`   r3   r   dictrU   __classcell__rC   s   @rD   r)   r)   G   s    . AE+)
  "+) d3i0+) 'uS$s)^'<=+)
 +) +) +)Zn4d3id38n.L(M#N nSWX[]`X`Sa n nrE   r)   c                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
ed<   d	Ze
ed
<   d Zdeee
ef      dee
ej"                  f   fdZy)DataCollatorForChatMLz3
    Data collator for ChatML format datasets.
    r9   r*   r,   N
max_lengthprompt
prompt_keymessagesmessages_keyc                     | j                   j                  t        d      | j                  &t	        | j                   j
                  d      | _        y y )NzTThe tokenizer does not have a pad token. Please set `pad_token_id` in the tokenizer.   )r9   r=   
ValueErrorr   minmodel_max_lengthr@   s    rD   __post_init__z#DataCollatorForChatML.__post_init__  sE    >>&&.stt??"!$.."A"A4HDO #rE   rF   rG   c           	         g }g }g }g }g }|D ]  }|j                  | j                  d       }|0|| j                     d d }	| j                  j	                  |	dd      }d|vr|| j                     }
| j                  j	                  |
dd      }| j                  |d| j
                  dd d      }|j                  |d          d|v r|j                  |d          np|j                  dgt        |d         z         nN|j                  |d          d|v r|j                  |d          n!|j                  dgt        |d         z         | j                  |dt        |d         dd d      }|j                  |d          |j                  |d          | j                  gt        |d         z  }t        |d         }|d   |d  ||d  |j                  |        |D cg c]'  }t        j                  |t        j                  	      ) }}|D cg c]'  }t        j                  |t        j                  	      ) }}|D cg c]'  }t        j                  |t        j                  	      ) }}t        |d
| j                  j                        }t        |d
d      }t        |d
| j                        }|D cg c]'  }t        j                  |t        j                  	      ) }}|D cg c]'  }t        j                  |t        j                  	      ) }}t        |d
| j                  j                        }t        |d
d      }|||||dS c c}w c c}w c c}w c c}w c c}w )NrK   FT)tokenizeadd_generation_promptrJ   )
truncationr   paddingreturn_tensorsr1   rL   rM   rP   left)padding_sidepadding_valuer   )rJ   rL   rI   promptsprompt_attention_mask)getr   r   r9   apply_chat_templater   r\   rW   r,   rd   ri   longpadr=   )r@   rF   rJ   rL   prompts_input_idsr   rI   exampleformatted_promptr   messageformatted_messagetokenized_messagetokenized_promptlabelcompletion_start_idxidsmasks                     rD   __call__zDataCollatorForChatML.__call__  s   	 " 1	!G&{{4??DA' !2!23CR8#'>>#E#EU$ $F $  ')!$"3"34$(NN$F$Fe5 %G %! %)NN%##!#'', %3 %!   !2;!?@#w."))*;<L*MN"))1#4Ek4R0S*ST  !56#w."))'2B*CD"))1#GK4H0I*IJ#~~ y}-##(  .   $$%5k%BC!(()9:J)KL &&'#im*<<E#&'7'D#E +4R=9M9N+OE&'(MM% c1	!h ENNSU\\#UZZ8N	NKYZ4%,,t5::>ZZEKLE%,,uEJJ7LL	dnnFaFab	^&PQRV&@Q@QRL]^SU\\#UZZ@^^Rg h$d%**!E h h 1VZVdVdVqVqr #$9^_ ` #,(%:
 	
 OZL
 _ hs   &,M,M!
,M&,M+6,M0)r|   r}   r~   r   r   __annotations__r,   r   r   r   r8   r   r   r   r   r   rd   Tensorr    rE   rD   r   r      sn     '&L#JJ"L#"IM
d38n!5 M
$sELL?P:Q M
rE   r   c                       e Zd ZU dZeed<   dZeee	f   ed<   dZ
ee   ed<   dZe	ed<   d	eee	ef      d
ee	ef   fdZy)RewardDataCollatorWithPaddinga\  
    Reward DataCollator class that pads the inputs to the maximum length of the batch.

    Args:
        tokenizer (`PreTrainedTokenizerBase`):
            The tokenizer used for encoding the data.
        padding (`Union[bool, str, `PaddingStrategy`]`, `optional`, defaults to `True`):
            padding_strategy to pass to the tokenizer.
        pad_to_multiple_of (`int` or `None`, `optional`, defaults to `None`):
            If set will pad the sequence to a multiple of the provided value.
        return_tensors (`str`, `optional`, defaults to `"pt"`):
            The tensor type to use.
    r9   Tr   Npad_to_multiple_ofptr   featuresrG   c                 n   g }g }g }d|d   v }|D ]h  }d|vsd|vsd|vsd|vrt        d      |j                  |d   |d   d       |j                  |d   |d   d       |sU|j                  |d          j | j                  j                  || j                  | j
                  | j                  	      }| j                  j                  || j                  | j
                  | j                  	      }|d
   |d   |d
   |d   dd}	|r*t        j                  |t        j                        }||	d<   |	S )Nmarginr   input_ids_choseninput_ids_rejectedattention_mask_chosenattention_mask_rejectedz{The features should include `input_ids_chosen`, `attention_mask_chosen`, `input_ids_rejected` and `attention_mask_rejected`)rJ   rL   )r   r   r   rJ   rL   T)r   r   r   r   return_lossr   )
r   r\   r9   r   r   r   r   rd   ri   float)
r@   r   features_chosenfeatures_rejectedr   
has_marginfeaturebatch_chosenbatch_rejectedrl   s
             rD   r   z&RewardDataCollatorWithPadding.__call__r  s   !,
 	1G #'1'w6*'9,G;  R  ""!();!<&-.E&F $$!()=!>&-.G&H gh/03	14 ~~))LL#66..	 * 
 ++LL#66..	 , 
 !-[ 9%12B%C"0"='56F'G
 \\&<F$E(OrE   )r|   r}   r~   r   r   r   r   r
   r`   r8   r   r	   r   r   r   r   r   r   r   rE   rD   r   r   ]  sc     '& $GU49$(,,NC6d38n!5 6$sCx. 6rE   r   tensorsr   r   r   rG   c                 V   t        j                  | D cg c]  }|j                   c}d      j                         }||d   |z  }|dk7  r|dxx   ||z
  z  cc<   t	        j
                  t        |       g||| d   j                  | d   j                        }t        |       D ]z  \  }}|dk(  r|d   |j                  d   z
  }	n|dk(  rd}	nt        d      t        |	|	|j                  d   z         }
|
ft        d |j                  dd D              z   }|||   |<   | |S c c}w )	a  
    Pads a list of tensors to the same shape along the first dimension.

    Args:
        tensors (`list[torch.Tensor]`):
            List of input tensors to pad.
        padding_value (`int`):
            Value to use for padding. Default is 0.
        padding_side (`str`):
            Side on which to add padding. Must be 'left' or 'right'. Default is 'right'.
        pad_to_multiple_of (`int`, *optional*, defaults to `None`):
            If set will pad the sequence to a multiple of the provided value.

    Returns:
        `torch.Tensor`:
            A single tensor containing the padded tensors.

    Examples:
    ```python
    >>> import torch

    >>> pad([torch.tensor([1, 2, 3]), torch.tensor([4, 5])])
    tensor([[1, 2, 3],
            [4, 5, 0]])

    >>> pad([torch.tensor([[1, 2], [3, 4]]), torch.tensor([[5, 6]])])
    tensor([[[1, 2],
            [3, 4]],
            [[5, 6],
            [0, 0]]])
    ```
    r   NrP   rO   r   rightz&padding_side must be 'left' or 'right'c              3   4   K   | ]  }t        d |        yw)r   N)slice).0ss     rD   	<genexpr>zpad.<locals>.<genexpr>  s     %GaeAqk%Gs   rM   )rX   rj   shaperZ   rd   fullrW   rP   rO   r]   r   r   tuple)r   r   r   r   toutput_shape	remainderoutputrm   	seq_start	seq_sliceslicess               rD   r   r     s>   N 66G4q1774a8??AL % O&88	>O1I==O ZZW55}GTUJL\L\elmneoevevwF'" 16!$Q!''!*4IW$IEFF )Y%;<	%G17712;%G GGq	& M1 5s   D&c                   l    e Zd ZU dZdZeed<   dZeed<   dZe	e
   ed<   deeeef      d	eeef   fd
Zy)DPODataCollatorWithPaddinga  
    DPO DataCollator class that pads the tokenized inputs to the maximum length of the batch.

    Args:
        pad_token_id (`int` defaults to 0):
            The tokenizer's pad_token_id.
        label_pad_token_id (`int`, defaults to -100):
            The label used for masking.
        is_encoder_decoder (`bool` or `None`, `optional`, defaults to `None`):
            Whether you model has an encoder_decoder architecture.
    r   r=   r*   label_pad_token_idFis_encoder_decoderr   rG   c                    i }|d   j                         D ]4  }|j                  d      r| j                  r|D cg c]  }t        j                  ||          }}|j                  d      r5|j                  d      r$| j                  t        d      | j                  }nE|j                  d      rd}n1|j                  d      sd|v r| j                  }nt        d	| d
      t        |d|      ||<   |j                  d      r$| j                  t        d      | j                  }nU|j                  d      r| j                  }n7|j                  d      rd}n#|j                  d      rd}nt        d	| d
      |dv rd}nd}|j                  d      rt        j                  }nt        j                  }|D cg c]  }t        j                  ||   |       }}t        |||      ||<   |j                  d      r+t        j                  |D cg c]  }||   	 c}      ||<   |D cg c]  }||   	 c}||<   7 |S c c}w c c}w c c}w c c}w )Nr   )
_input_ids_attention_mask_labels_pixel_valuesr   rJ   zPadding is enabled, but the tokenizer is not configured with a padding token. Explicitly set `tokenizer.pad_token` (e.g. `tokenizer.pad_token = tokenizer.eos_token`) before calling the trainer.r   )chosenrejected
completiondecoderzUnexpected key in batch ''T)batch_firstr   r   r   r   )prompt_input_idsr   r   r   r   r   r   _logps)keysendswithr   rd   
LongTensor
startswithr=   r   r   r   float32int64ri   r   )	r@   r   padded_batchkexto_padr   r   rP   s	            rD   r   z#DPODataCollatorWithPadding.__call__  sT   !!!# :	=AzzWX**@HI"e..r!u5IFIX.QZZ5L,,4",!?# 
 )-(9(9$56()&JKPY]^P^(,(?(?(+DQCq)IJJ&26t[h&iLO zz,/,,4",!?# 
 )-(9(9I.(,(?(?$56()O4()(+DQCq)IJJ II'-'. zz/2 % % JRR2ell2a5>RFR&)&\h&iLOH%"',,/I"1/I"JQ3;"<R2a5"<Qu:	=x s Jb S 0J"<s   I!II
9IN)r|   r}   r~   r   r=   r   r   r   r   r	   r`   r   r   r8   r   r   r   rE   rD   r   r     sU    
 L#"")..?d38n!5 ?$sCx. ?rE   r   c                   8    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 ddZd Zd Zy)ConstantLengthDataseta  
    Iterable dataset that returns constant length chunks of tokens from stream of text files. The dataset also formats
    the text before tokenization with a specific format that is provided by the user.

    Args:
        tokenizer (`transformers.PreTrainedTokenizer`):
            The processor used for processing the data.
        dataset (`dataset.Dataset`):
            Dataset with text files.
        dataset_text_field (`str` or `None`, *optional*, defaults to `None`):
            Name of the field in the dataset that contains the text. Only one of `dataset_text_field` and
            `formatting_func` should be provided.
        formatting_func (`Callable`, *optional*):
            Function that formats the text before tokenization. Usually it is recommended to follow a certain pattern
            such as `"### Question: {question} ### Answer: {answer}"`. Only one of `dataset_text_field` and
            `formatting_func` should be provided.
        infinite (`bool`, *optional*, defaults to `False`):
            If True the iterator is reset after dataset reaches end else stops.
        seq_length (`int`, *optional*, defaults to `1024`):
            Length of token sequences to return.
        num_of_sequences (`int`, *optional*, defaults to `1024`):
            Number of token sequences to keep in buffer.
        chars_per_token (`int`, *optional*, defaults to `3.6`):
            Number of characters per token used to estimate number of tokens in text buffer.
        eos_token_id (`int`, *optional*, defaults to `0`):
            Id of the end of sequence token if the passed tokenizer does not have an EOS token.
        shuffle (`bool`, *optional*, defaults to `True`)
            Shuffle the examples before they are returned
        append_concat_token (`bool`, *optional*, defaults to `True`)
            If true, appends `eos_token_id` at the end of each sample being packed.
        add_special_tokens (`bool`, *optional*, defaults to `True`)
            If true, tokenizers adds special tokens to each sample being packed.
    Nc                 2   t        j                  dt               || _        |j                  r|j                  n|	| _        || _        || _        || _        d| _	        ||z  |z  | _
        |
| _        || _        || _        |t        j                  dt               ||| _        nfd| _        nt!        d      d| _        t%        |t&        j(                  t&        j*                  f      r|j,                  nd }|d|v rd| _        ||z  | _
        y y y )	NzThis class is deprecated and will be removed in version 0.20.0. To use packing, use the argument `packing` of `SFTConfig` instead.r   zOnly one of `dataset_text_field` and `formatting_func` should be provided. Ignoring `dataset_text_field` and using `formatting_func`.c                     |    S r{   r   )xdataset_text_fields    rD   <lambda>z0ConstantLengthDataset.__init__.<locals>.<lambda>  s    Q/A-B rE   zDEither `dataset_text_field` or `formatting_func` should be provided.FrJ   T)r4   r5   r6   r9   r>   concat_token_iddataset
seq_lengthinfinitecurrent_sizemax_buffer_sizeshuffleappend_concat_tokenr1   r?   formatting_funcr   pretokenizedr7   datasetsDatasetr   column_names)r@   r9   r   r   r  r   r   num_of_sequenceschars_per_tokenr>   r  r  r1   r  s      `          rD   r3   zConstantLengthDataset.__init__d  s-    	0	

 #9B9O9Oy55Ua$ )O;>NN#6 "4)o.IMMM &#2D +#BD cdd!$.w9I9I8KcKc8d$eG  ko 	 #|(C $D#-0@#@D  )D#rE   c                 ,    t        | j                        S r{   )rW   r   r   s    rD   __len__zConstantLengthDataset.__len__  s    4<<  rE   c              #     K   t        | j                        }d}|rg d}}	 || j                  k\  rn<	 |j                  | j	                  t        |                   |t        |d         z  }L| j                  rt        j                  |       | j                  r|}n!| j                  || j                  d      d   }g }|D ]/  }| j                  r|| j                  gz   }|j!                  |       1 g }t#        dt        |      | j$                        D ]>  }	||	|	| j$                  z    }
t        |
      | j$                  k(  s.|j                  |
       @ | j                  rt        j                  |       |D ]D  }| xj&                  dz  c_        t)        j*                  |      t)        j*                  |      d F |ry y # t        $ r+ | j                  rt        | j                        }nd}Y Y w xY ww)	NTr   rK   F)r1   r   rJ   rM   )rJ   rI   )iterr   r   r\   r  nextrW   StopIterationr   r  randomr  r9   r1   r  r   extendrV   r   r   rd   r   )r@   iteratormore_examplesbuffer
buffer_lentokenized_inputsall_token_idstokenized_inputrF   rm   rJ   r   s               rD   __iter__zConstantLengthDataset.__iter__  s    %!#QJF!5!55MM$"6"6tH~"FG#fRj/1J  ||v&  #) #'>>t/F/FSX $2 $$  M#3 6++&59M9M8N&NO$$_56 H1c-0$//B /)!a$//.AB	y>T__4OOI./ ||x(# !!Q&!!&!1!1'!:#..w7 G  % }}#'#5(- !s6   0G?:G -CG?A?G?G?-G<5G?;G<<G?)
NNFr   r   g@r   TTT)r|   r}   r~   r   r3   r  r  r   rE   rD   r   r   A  s9     L   4Al!+rE   r   c                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed	<    ej                         d
ej                  deeef   fd       ZdefdZededefd       Zy)RunningMomentsz
    Calculates the running mean and standard deviation of a data stream. Reference:
    https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L75
    acceleratorr   meanrM   stdvargW:countxsrG   c                 >   | j                   j                  rt        | j                   |      \  }}}n*|j                         }t	        j
                  |d      \  }}|j                         |j                         }}|| j                  z
  }| j                  |z   }||z  }| j                  | j                  z  |dz  | j                  z  |z  |z  z   }||z   }	| xj                  ||z  |z  j                         z  c_        |	|z  }
|
|z  |dz
  z  j                         j                         j                         | _        |
j                         | _	        || _        |j                         ||z  |dz
  z  j                         j                         j                         fS )zT
        Updates running moments from batch's moments computed across ranks
        F)unbiasedr    rM   )r  use_distributedget_global_statisticsnumelrd   var_meanr   r  r   r  rk   sqrtr  )r@   r!  xs_meanxs_varxs_countdelta	tot_countnew_sumold_sumtot_sumnew_vars              rD   updatezRunningMoments.update  sh   
 ++(=d>N>NPR(S%GVXxxzH#nnR%@OFG!--/6<<>$))#JJ)	8#((TZZ'%(TZZ*?(*JY*VVG#		eh&288::	I%i'9q=9@@BGGINNP<<>
||~ 1X\ BIIKPPRWWYYYrE   	json_pathc                     | j                   j                  r\t        j                  | d       }t	        j
                  |dd      dz   }t        |dd	      5 }|j                  |       d
d
d
       y
y
# 1 sw Y   y
xY w)zDSave the content of this instance in JSON format inside `json_path`.c                 F    | D ci c]  \  }}|dk7  s|| c}}S c c}}w )Nr  r   )r   r   vs      rD   r   z-RunningMoments.save_to_json.<locals>.<lambda>  s)    \]HtRXSTVWabfsasAHt Hts   )dict_factoryr    T)indent	sort_keys
wutf-8encodingN)r  is_main_processdataclassesasdictjsondumpsopenwrite)r@   r3  	save_dictjson_stringfs        rD   save_to_jsonzRunningMoments.save_to_json  sv     ++#**4>tuI**YqDIDPKiw7 %1$% % ,% %s   A44A=c                     t        |d      5 }|j                         }ddd        | dd|it        j                        S # 1 sw Y   &xY w)z3Create an instance from the content of `json_path`.r<  r=  Nr  r   )rD  readrB  loads)clsr  r3  rH  texts        rD   load_from_jsonzRunningMoments.load_from_json  sL     )g. 	!668D	?{?djj.>??	 	s   AAN)r|   r}   r~   r   r   r   r  r   r  r  r   rd   no_gradr   r   r2  r8   rI  classmethodrO  r   rE   rD   r  r    s    
 D%OCNCNE5U]]_Z Z%u*= Z Z6%c % @ @ @ @rE   r  r!  c                    |j                  | j                        }t        j                  |j	                         ||j                         n|j	                         g|j                        }| j                  |      }|\  }}||z  }t        j                  ||z
  dz  j                  |dn|            }| j                  |      }||z  }	|j                  |      |	j                  |      |j                         fS )z
    Computes element-wise mean and variance of the tensor across processes. Reference:
    https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L57C1-L73C75
    rO   r    rM   )	torO   rd   ri   sumr&  reducemulrk   )
r  r!  r   rO   sum_and_count
global_sumr   global_meansum_var
global_vars
             rD   r%  r%    s     
{!!	"BLL"&&(4<RXXZTXXZ!YbdbkbklM&&}5M%Ju$Kii"{*q0554<aTRSG  )G5J>>&!:==#8%**,FFrE   	eval_predc                     | \  }}|j                   dk(  rt        j                  |d      }t        j                  t	        ||      D cg c]!  \  }}t	        ||      D ]  \  }}|dk7  s| # c}}}}      }t        j                  |D cg c]  }|D ]
  }|dk7  s	|  c}}      }n|d d df   |d d df   k(  }t        |j                               }|dkD  r1t        j                  d| dt        |d d df          d	t               ||    }||    }t        j                  |d      }t        j                  ||k(  t        
      j                         j                         }	d|	iS c c}}}}w c c}}w )N   r    )axisr*   r   rM   z
There are z out of zu instances where the predictions for both options are equal. These instances are ignored in the accuracy computation.r   accuracy)ndimrX   argmaxarrayr^   r   rU  r4   r5   rW   r?   r   r  rk   )
r]  predictionsrI   
predictionr   plbl
equal_maskequal_predictions_countra  s
             rD   compute_accuracyrk    s   #K1 ii!4 hh(+K(@ww$:uQTU_afQgwXaknrvkvQwQw
 FS5eSssd{3S3ST
 !A&+ad*;;
"%jnn&6"7"Q&MM45Xc+aQRdBS>T=U Vg g ":+.$ ii!4xxv-U;@@BGGIH!!3 xSs   E2,E2E:
 E:
ri   length	pad_valuedimc           
         | j                  |      |k\  r| S t        | j                        }|| j                  |      z
  ||<   t        j                  | |t        j
                  || j                  | j                  dz  g|      S )Nr   rn  )rf   r   r   rd   rh   onesrP   rO   )ri   rl  rm  rn  pad_sizes        rD   pad_to_lengthrs  @  sx    {{36!%S!11yyEJJV]][[ 
 	
rE   modelc                     | j                         D ].  }t        |t        j                  j                        s(d|_        0 y )Nr   )modulesr7   rd   nnDropoutrg  )rt  modules     rD   disable_dropout_in_modelrz  O  s2    --/ fehh../FHrE   c           
      R    | |z  }| ||z  k7  rt        | d|  d| d| |z         |S )Nz, inexact division: z / z = )r   )abcustom_error_messageqs       rD   	exact_divr  U  sH    	QAAEz011EaSA3cRSVWRWQXYZZHrE   c                   "    e Zd ZdZd Zd Zd Zy)PerPromptStatTrackeraI  
    Class for tracking statistics per prompt. Mainly used to calculate advantage for the DPPO algorithm

    Args:
        buffer_size (`int`):
            Size of the buffer to keep for each prompt.
        min_count (`int`):
            Minimum number of samples to keep in the buffer before calculating the mean and std.
    c                 .    || _         || _        i | _        y r{   )buffer_size	min_countstats)r@   r  r  s      rD   r3   zPerPromptStatTracker.__init__h  s    &"
rE   c                    t        j                  |      }t        j                  |      }t        j                  |      }t        j                  |      }|D ]  }|||k(     }|| j                  vr#t        | j                        | j                  |<   | j                  |   j                  |       t        | j                  |         | j                  k  r.t        j                  |      }t        j                  |      dz   }nGt        j                  | j                  |         }t        j                  | j                  |         dz   }||z
  |z  |||k(  <    |S )N)maxlengư>)rX   rd  unique
empty_liker  r   r  r  rW   r  r  r  )	r@   r   rewardsr  
advantagesr   prompt_rewardsr  r  s	            rD   r2  zPerPromptStatTracker.updatem  s$   ((7#((7#7#]]7+
 	JF$W%67NTZZ'%*$2B2B%C

6"JJv%%n54::f%&7www'ffWo,wwtzz&12ffTZZ/047-;d-Bc,IJw&()	J rE   c           
          | j                   j                         D ci c];  \  }}|t        j                  |      t        j                  |      t        |      d= c}}S c c}}w )N)r  r  r   )r  itemsrX   r  r  rW   )r@   r   r6  s      rD   	get_statszPerPromptStatTracker.get_stats  sJ    W[WaWaWgWgWijtqRSBGGAJrvvay3q6JJjjjs   A A"N)r|   r}   r~   r   r3   r2  r  r   rE   rD   r  r  ]  s    
*krE   r  c                    | j                         D ]  \  }t        |t        j                  j                        sdv r |j                  t        j                        }Nt        fddD              sct        |d      sp|j                  j                  t        j                  k(  s|j                  t        j                        } y )Nnormc              3   &   K   | ]  }|v  
 y wr{   r   )r   r   names     rD   r   z.peft_module_casting_to_bf16.<locals>.<genexpr>  s     NqdNs   )lm_headembed_tokenswtewpeweight)named_modulesr7   rd   rw  	LayerNormrT  r   anyhasattrr  rP   bfloat16)rt  ry  r  s     @rD   peft_module_casting_to_bf16r    s    ++- 7ffehh001Vt^YYu}}-FN$MNNvx(==&&%--7#YYu~~6F7rE   
model_argsc                     | j                   r:t        d| j                  | j                  | j                  | j                        }|S | j
                  rt        d      }|S d }|S )NT)load_in_4bitbnb_4bit_compute_dtypebnb_4bit_quant_typebnb_4bit_use_double_quantbnb_4bit_quant_storage)load_in_8bit)r  r   torch_dtyper  use_bnb_nested_quantr  )r  quantization_configs     rD   get_quantization_configr    st    0#-#9#9 * > >&0&E&E#-#9#9
  
	 	 0
  #rE   c                      t         j                  j                         s
t               rdt	               j
                  iS y )N )rd   cudais_availabler   r   local_process_indexr   rE   rD   get_kbit_device_mapr    s.    zz $:$<LN6677rE   zOptional[PeftConfig]c                    | j                   du ry t               st        d      t        | j                  | j
                  | j                  | j                  | j                  d| j                  | j                  | j                  	      }|S )NFzYou need to have PEFT library installed in your environment, make sure to install `peft`. Make sure to run `pip install -U peft`.none)		task_typertarget_modules
lora_alphalora_dropoutbias
use_rslorause_doramodules_to_save)use_peftr   r   r&   lora_task_typelora_rlora_target_modulesr  r  r  r  lora_modules_to_save)r  peft_configs     rD   get_peft_configr    s    e#6
 	

 ++


!55((,,(($$"77
K rE   c                 `   t        j                  dg      j                  | j                        t        j                  | j                        j
                  z   }t        j                  |      j                  | j                        }|dkD  r!t        j                  |d|z  z        d|z  z  S |S )a1  
    Get the exponent cap of a value. This is used to cap the exponent of a value to avoid overflow. The formula is :
    log(value.dtype.max) E.g.
      For float32 data type, the maximum exponent value is 88.7228 to 4 decimal points.

    Args:
        value (`torch.Tensor`):
            The input tensor to obtain the data type
        decimal (`int`):
            The number of decimal points of the output exponent cap. eg: direct calling exp(log(torch.float32.max))
            will result in inf so we cap the exponent to 88.7228 to avoid overflow.
    rM   r   
   )	rd   zerosrT  rP   finforj   logrO   floor)valuedecimal
vdtype_maxvdtype_log_maxs       rD   get_exp_capr    s     aS!$$U[[1EKK4L4P4PPJYYz*--ell;NFMPQk5;;~G34r7{BeWeerE   c                 z    |dk  rt        |       n|}t        j                  t        j                  | |            S )Nr   )rj   )r  rd   expclamp)r  caps     rD   cap_expr    s.     #a+e
SC99U[[C011rE   dfc                 P   t               st        d      t               }t        d      }| j                  D ]  }|j                  |        | j                         D ]5  \  }} |j                  |j                  t              j                           7 |j                  |       y )NzgThe function `print_rich_table` requires the `rich` library. Please install it with `pip install rich`.T)
show_lines)r   ImportErrorr"   r$   columns
add_columniterrowsadd_rowastyper8   rZ   print)r  consoletablecolumn_rows         rD   print_rich_tabler    s    u
 	
 iGT"E** ! !++- 13szz#--/01MM%rE   zT{% for message in messages %}{{' ' + message['content']}}{% endfor %}{{ eos_token }}z{% for message in messages %}{{message['role'].capitalize() + ': ' + message['content'] + '

'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}c                       e Zd ZU dZeed<   y)OnlineTrainerStater   episodeN)r|   r}   r~   r  r   r   r   rE   rD   r  r    s    GSrE   r  c                   j    e Zd ZU dZ edddi      Zeed<    edddi      Ze	e
   ed	<    eddd
i      Ze	e   ed<    edddi      Ze	e   ed<    edddi      Zeed<    edddi      Ze	e   ed<    edddi      Zeed<    edddi      Zeed<    edddi      Zeed<    edddi      Ze	ed      ed<    edddi      Ze	e   ed<    ed dd!i      Zeed"<    eddd#i      Ze	e   ed$<    ed%dd&i      Zeed'<    eddd(i      Ze	e   ed)<    eddd*i      Ze	e   ed+<    eddd,i      Ze	e   ed-<    eddd.i      Ze	e   ed/<    eddd0i      Ze	e   ed1<    eddd2i      Ze	e   ed3<    eddd4i      Z e	e   ed5<    ed6dd7i      Z!e
ed8<    fd9Z" xZ#S ):OnPolicyConfiga  
    Base configuration class for on-policy trainers.

    This class includes only the parameters that are specific to some on-policy training. For a full list of training
    arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this
    class may differ from those in [`~transformers.TrainingArguments`].

    Using [`~transformers.HfArgumentParser`] we can turn this class into
    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
    command line.

    Parameters:
        run_name (`str` or `None`, *optional*, defaults to `None`):
            Name of the run.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of processes to use for processing the dataset.
        num_mini_batches (`int`, *optional*, defaults to `1`):
            Number of minibatches to split a batch into.
        total_episodes (`int` or `None`, *optional*, defaults to `None`):
            Total number of episodes in the dataset.
        local_rollout_forward_batch_size (`int`, *optional*, defaults to `64`):
            Per rank no grad forward pass in the rollout phase.
        num_sample_generations (`int`, *optional*, defaults to `10`):
            Number of debugging samples generations (i.e., `generate_completions` calls) throughout training.
        response_length (`int`, *optional*, defaults to `53`):
            Length of the response.
        stop_token (`str` or `None`, *optional*, defaults to `None`):
            Specifies the stop token to use for text generation. This parameter is mutually exclusive with
            `stop_token_id`.

            - `None`: No stop token is applied, unless `stop_token_id` is specified.
            - `'eos'`: Uses the tokenizer's `eos_token`.

        stop_token_id (`int` or `None`, *optional*, defaults to `None`):
            Specifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied,
            unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.
        temperature (`float`, *optional*, defaults to `0.7`):
            Sampling temperature.
        missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
            Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
            generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
            value.
        sft_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
            Path to the SFT model.
        world_size (`int` or `None`, *optional*, defaults to `None`):
            Number of processes (GPUs) to use for the training.
        num_total_batches (`int` or `None`, *optional*, defaults to `None`):
            Number of total batches to train.
        micro_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Micro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).
        local_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Batch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).
        batch_size (`int` or `None`, *optional*, defaults to `None`):
            Batch size across devices (HF's `per_device_train_batch_size` * `world_size` *
            `gradient_accumulation_steps`).
        local_mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Mini batch size per GPU.
        mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Mini batch size across GPUs.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the model to the Hub after training.
    r  helpzLog every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.)defaultmetadatalogging_stepsNzWhether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if `fp16` is not set.bf16zName of the run.run_namez6Number of processes to use for processing the dataset.dataset_num_procrM   z,Number of minibatches to split a batch into.num_mini_batchesz(Total number of episodes in the dataset.total_episodes@   z3Per rank no grad forward pass in the rollout phase. local_rollout_forward_batch_sizezaNumber of debugging samples generations (i.e., `generate_completions` calls) throughout training.num_sample_generations5   zLength of the response.response_lengthzoSpecifies the stop token to use for text generation. This parameter is mutually exclusive with `stop_token_id`.eos
stop_tokenzSpecifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied, unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.stop_token_idgffffff?zSampling temperature.temperaturezPenalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive value.missing_eos_penaltyzEleutherAI/pythia-160mzPath to the SFT model.sft_model_pathz3Number of processes (GPUs) to use for the training.
world_sizez!Number of total batches to train.num_total_batcheszTMicro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).micro_batch_sizezXBatch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).local_batch_sizeznBatch size across devices (HF's `per_device_train_batch_size` * `world_size` * `gradient_accumulation_steps`).
batch_sizezMini batch size per GPU.local_mini_batch_sizezMini batch size across GPUs.mini_batch_sizeFz4Whether to push the model to the Hub after training.push_to_hubc                 v    | j                   | j                   n| j                   | _         t        |           y r{   )r  fp16r2   r   )r@   rC   s    rD   r   zOnPolicyConfig.__post_init__  s*    '+yy'8Odii	rE   )$r|   r}   r~   r   r   r  r   r   r  r	   r`   r  r8   r  r   r  r  r  r  r  r  r   r  r  r  r  r  r  r  r   r  r  r  r  r   r   r   s   @rD   r  r    s   =@ ! D
M5  ! !
D(4.  $,-Hhsm  ',RS'hsm  "HIc  %*DE%NHSM  -2OP-$c  #(w
#C  !34OS  ,1 
,J(  $) q
$M8C=  12K  ,1  
,%   (23NC  !&OP!J  (-=>(x}  ',pq'hsm  ',tu'hsm  !& .
!J  ,145,8C=  &+89&OXc]  PQK 
   rE   r  boolsc                     | j                  d      }||  j                  |      z  t        j                  ||| j                        z   }t        j
                  |d      j                  S )a  
    Takes an N-dimensional bool tensor and returns an (N-1)-dimensional tensor of integers giving the position of the
    first True in each "row".

    Returns the length of the rows (bools.size(-1)) if no element is True in a given row.

    Args:
        bools (`torch.Tensor`):
            An N-dimensional boolean tensor.
        dtype (`torch.dtype`, optional):
            The desired data type of the output tensor. Defaults to `torch.long`.

    Returns:
        `torch.Tensor`:
            An (N-1)-dimensional tensor of integers indicating the position of the first True in each row. If no True
            value is found in a row, returns the length of the row.
    rK   r   rp  )rf   typerd   re   rO   r   values)r  rP   row_lenzero_or_indexs       rD   first_true_indicesr    sW    $ jjnGvmmE22U\\'QV_d_k_k5llM99]+222rE   query_responsesr=   context_lengthc                    ||k7  }|j                  d      |j                         z
  }t        | | j                        }t	        j
                  || d      } ||||ddd      }| j                  |j                  d         }	t        |dd|df   |k(        dz
  |z   }
|	|	t	        j                  |	j                  d      |	j                        |
f   j                  d      |
fS )	a5  
    Computes the reward logits and the rewards for a given model and query responses.

    Args:
        model (`torch.nn.Module`):
            The model used to compute the reward logits.
        query_responses (`torch.Tensor`):
            The tensor containing the query responses.
        pad_token_id (`int`):
            The token ID representing the pad token.
        context_length (`int`):
            The length of the context in the query responses.

    Returns:
        tuple:
            - `reward_logits` (`torch.Tensor`):
                The logits for the reward model.
            - `final_rewards` (`torch.Tensor`):
                The final rewards for each query response.
            - `sequence_lengths` (`torch.Tensor`):
                The lengths of the sequences in the query responses.
    rM   r   TF)rJ   rL   rN   return_dictoutput_hidden_states	use_cacherK   NrS  )rb   r   getattrbase_model_prefixrd   masked_fillscorehidden_statesr  re   rf   rO   squeeze)rt  r  r=   r  rL   rN   lm_backbonerJ   r   reward_logitssequence_lengthss              rD   
get_rewardr    s    2 %4N!((+n.A.A.CCL%!8!89K!!/N?AFI%!!F KK 4 4R 89M)/!^_:L*MQ]*]^abbess 	LL++A.}7K7KL	
 '"+ rE   c                     ||k7  }|j                  d      |j                         z
  }t        j                  || d      } | |||dd      S )a  
    Performs a forward pass through the model with the given query responses and pad token ID.

    Args:
        model (`torch.nn.Module`):
            The model to perform the forward pass.
        query_responses (`torch.Tensor`):
            The tensor containing the query responses.
        pad_token_id (`int`):
            The token ID representing the pad token.

    Returns:
        `ModelOutput`:
            The output of the model, including hidden states.
    rM   r   T)rJ   rL   rN   r  r  )rb   r   rd   r  )rt  r  r=   rL   rN   rJ   s         rD   forwardr    s^    ( %4N!((+n.A.A.CCL!!/N?AFI%!! rE   per_device_train_batch_sizer  r  c                    ddl }t               j                  }|j                  }|d   d   dk7  r"||d<   |d   ddd}|rd	d
i|d<   n|rd	d
i|d<   nt	        | d      rut        | j                  dd      rt        | j                  j                        nt        | j                  dd      }|&|d   d   dk(  r|j                  ||z  d|z  dd       |j                  | |      ^} }| j                          | S )a  
    Prepares the model for training with DeepSpeed (both for stage 2 and 3), configuring the appropriate settings based
    on the model and batch size.

    Args:
        model (`torch.nn.Module`):
            The model to be prepared for DeepSpeed training.
        per_device_train_batch_size (`int`):
            The training batch size per device.

    Returns:
        `torch.nn.Module`:
            The model initialized and configured with DeepSpeed for training.
    r   Nzero_optimizationstager_  train_micro_batch_size_per_gpuF)r$  prescale_gradientswall_clock_breakdownenabledTr  r  confighidden_sizeshidden_sizer  )z$zero_optimization.reduce_bucket_sizez4zero_optimization.stage3_param_persistence_thresholdz-zero_optimization.stage3_prefetch_bucket_size)rt  r(  )	deepspeedr   deepspeed_plugindeepspeed_configr  r  r(  rj   r)  r2  
initializeeval)	rt  r   r  r  r+  r,  config_kwargsr*  r  s	            rD   prepare_deepspeedr1    s-   " ')::$55M()'2a7:U67.;<\.]"'$)

 %.$5M&!%.$5M&!5(# 5<<> ELL--.U\\=$? 
 &=9L+Mg+VZ[+[ $$@Kk@YPRU`P`IJ $$5$GIEA	JJLLrE   r  	responsesc                 L   t        || k(        j                  d      }dgt        |j                               dz
  z  |j                  d   gz   } t        j                  |j                  d   |j                        j                  | }t        j                  |||kD  |      }|S )aA  
    Truncates the responses at the first occurrence of the stop token, filling the rest with pad tokens.

    Args:
        stop_token_id (`int`):
            The token ID representing the stop token where truncation occurs.
        pad_token_id (`int`):
            The token ID representing the pad token used to fill the truncated responses.
        responses (`torch.Tensor`):
            The tensor containing the responses to be truncated.

    Returns:
        `torch.Tensor`:
            The truncated responses tensor with pad tokens filled after the stop token.
    rK   rM   rS  )
r  ra   rW   rf   r   rd   re   rO   viewr  )r  r=   r2  
trunc_idxsnew_sizeidxspostprocessed_responsess          rD   truncate_responser9  K  s      $I$>?II"MJsc)..*+a/0IOOA4F3GGHI5<<	*93C3CDII8TD#//	4*;Ll[""rE   r  queriesgeneration_configc                 "   |j                   d   }||k7  }t        j                  || d      }| j                  |||dd      }t        j                  |j
                  d      }t        j                  ||j                  dd|df   fd      |fS )a  
    Generates sequences from the language model backbone in a way that does not affect padding tokens.

    Args:
        lm_backbone (`torch.nn.Module`):
            The language model backbone used for generation.
        queries (`torch.Tensor`):
            The tensor containing the input queries.
        pad_token_id (`int`):
            The token ID representing the pad token.
        generation_config (`GenerationConfig`):
            The configuration for the generation process.

    Returns:
        tuple:
            - `generated_sequences` (`torch.Tensor`):
                The concatenated tensor of input queries and generated sequences.
            - `logits` (`torch.Tensor`):
                The logits output from the generation process.
    rM   r   T)rJ   rL   r;  return_dict_in_generateoutput_scoresNrp  )r   rd   r  generatestackscoresrh   	sequences)	r  r:  r=   r;  r  rL   rJ   r   logitss	            rD   r?  r?  b  s    . ]]1%N,N!!'N?A>I!!% , $ " F [[*F99gv//>?0BCD!LfTTrE   r  c                    g }g }|j                   d   }t        d||      D ]=  }||||z    }	t        | |	||      \  }
}|j                  |
       |j                  |       ? t	        ||d      }t	        |dd      }|j                  d|j                   d         d | } |j
                  dg|j                   dd   d | }||fS )Nr   r   r   rK   r    )r   rV   r?  r\   r   r4  )rt  r:  r  r=   r;  r  logitssr  rm   queryquery_responserC  padded_query_responsespadded_logitsss                 rD   batch_generationrJ    s     OGq!J1j"BC 	A @@A!)	"
 	~.v	 ![bcHN 488=S=Y=YZ\=]^_j`jk(^((Gn.B.B12.FGTN!>11rE   bos_token_idprompt_len_input_idsprompt_tokenschosen_prompt_len_input_idschosen_tokensrejected_prompt_len_input_idsrejected_tokensc                     | x|dk(  s| |d   d   k7  r| g|d   z   |d<   dg|d   z   |d<   |dk(  s| |d   d   k7  r| g|d   z   |d<   dg|d   z   |d<   |dk(  s| |d   d   k7  r| g|d   z   |d<   dg|d   z   |d<   |||fS )Nr   r   rM   r   r   )rK  rL  rM  rN  rO  rP  rQ  s          rD   add_bos_token_if_neededrS    s     1$FX8YZ[8\(\1=OaAb0bM,-67S=I`;a5aM12&!+|}M_?`ab?c/c1=OaAb0bM,-67S=I`;a5aM12(A-QcAdefAg1g3?.?SeCf2fO./89s_Md=e7eO34-88rE   r>   c                    t        |d         dk(  s| |d   d   k7  r(|d   j                  |        |d   j                  d       t        |d         dk(  s| |d   d   k7  r(|d   j                  |        |d   j                  d       ||fS )NrJ   r   rK   rL   rM   )rW   r\   )r>   rO  rQ  s      rD   add_eos_token_if_neededrU    s     =%&!+|}[?YZ\?]/]k")),7&'..q1
?;'(A-Q\A]^`Aa1a$++L9()003/))rE   rJ   c                    t        | |k(        j                  d      }dgt        | j                               dz
  z  | j                  d   gz   } t        j                  | j                  d   | j                        j                  | }t        j                  | ||kD  |      }t        j                  t        j                  |       ||kD  d      }||fS )a  
    Truncates the input tensor from the right side after the first occurrence of the stop token.

    Args:
        input_ids (`torch.Tensor`):
            The tensor containing the responses to be truncated
        stop_token_id (`int`):
            The token ID representing the stop token where truncation occurs
        pad_token_id (`int`):
            The token ID representing the pad token used to fill the truncated responses

    Returns:
        tuple:
            - `output_ids` (`torch.Tensor`):
                The truncated responses tensor with pad tokens filled after the stop token
            - `mask` (`torch.Tensor`):
                The mask tensor to indicate the padding tokens
    rK   rM   rS  r   )r  ra   rW   rf   r   rd   re   rO   r4  r  	ones_like)rJ   r  r=   r5  r6  r7  
output_idsr   s           rD   truncate_rightrY    s    * $I$>?II"MJsc)..*+a/0IOOA4F3GGHI5<<	*93C3CDII8TD""9dZ.?NJU__Y7
9JANDtrE   c                  6   t               rt        j                  j                          yt	               rt        j
                  j                          yt               rt        j                  j                          yt        j                  j                          y)a3  Empties the cache of the available torch device.

    This function checks for the availability of different torch devices (XPU, MLU, NPU, CUDA) and empties the cache of
    the first available device it finds.

    If none of the specific devices are available, it defaults to emptying the CUDA cache.
    N)	r   rd   xpuempty_cacher   mlur   npur  r   rE   rD   r\  r\    sV     				!				!		

 rE   inputsr9   c                     |j                  | d      }|D cg c]  }|j                  |j                  d        c}S c c}w )ay  
    Decodes the input tensor and strips the padding tokens.

    Args:
        inputs (`torch.Tensor`):
            The input tensor to be decoded.
        tokenizer (`transformers.PreTrainedTokenizerBase`):
            The tokenizer used to decode the input tensor.

    Returns:
        `list[str]`:
            The list of decoded strings with padding tokens stripped.
    F)skip_special_tokensr  )batch_decodereplace	pad_token)r_  r9   decodedds       rD   decode_and_strip_paddingrg    s>     $$V$GG8?@1AIIi))2.@@@s   #>
base_model
model_namehub_model_iddataset_nametags	wandb_urltrainer_nametrainer_citationpaper_titlepaper_id	comet_urlc           
      f   t        | |dd|dg|      }t        j                  |fi dt        t	        j
                  d      j                  d            d| d	|d
|d|d|d|
d|d|d|d|	dt        d      dt        d      dt        d      dt        d      dt        d      }|S )a  
    Generate a `ModelCard` from a template.

    Args:
        base_model (`str` or `None`):
            Base model name.
        model_name (`str`):
            Model name.
        hub_model_id (`str`):
            Hub model ID as `username/model_id`.
        dataset_name (`str` or `None`):
            Dataset name.
        tags (`list[str]`):
            Tags.
        wandb_url (`str` or `None`):
            Weights & Biases run URL.
        comet_url (`str` or `None`):
            Comet experiment URL.
        trainer_name (`str`):
            Trainer name.
        trainer_citation (`str` or `None`, defaults to `None`):
            Trainer citation as a BibTeX entry.
        paper_title (`str` or `None`, defaults to `None`):
            Paper title.
        paper_id (`str` or `None`, defaults to `None`):
            ArXiv paper ID as `YYMM.NNNNN`.

    Returns:
        `ModelCard`:
            A ModelCard object.
    transformerslicensegenerated_from_trainer)rh  r  library_namelicenceri  rl  template_pathtrlztemplates/lm_model_card.mdrh  ri  rj  rk  rm  rr  rn  ro  rp  rq  trl_versiontransformers_versionpytorch_versionrd   datasets_versionr  tokenizers_version
tokenizers)r   r   from_templater8   pkg_resourcesfilesjoinpathr   )rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rr  	card_datacards                rD   generate_model_cardr    s   X #&..I ""---e4==>Z[\  	
 " "   " *    EN %^4  (  !,!" #<0#D& KrE   c                  ~    t               syt        j                         t        j                         j                  S y)zt
    If Comet integration is enabled, return the URL of the current Comet experiment; otherwise, return `None`.
    N)r   comet_mlget_running_experimenturlr   rE   rD   get_comet_experiment_urlr  W  s4     &&(4..0444rE   r  r  c                     t               st        d      t        j                         }||j	                  ||        yy)a  
    If Comet integration is enabled logs a table to the Comet experiment if it is currently running.

    Args:
        name (`str`):
            Table name.
        table (`pd.DataFrame`):
            The Pandas DataFrame containing the table to log.
    zLThe comet-ml is not installed. Please install it first: pip install comet-mlN)tabular_datafilename)r   ModuleNotFoundErrorr  r  	log_table)r  r  
experiments      rD   log_table_to_comet_experimentr  d  sB     !"pqq002J%$? rE   r   .c                    | j                   \  }}| j                         }|D cg c]  }|j                          }}|j                  d      }t        j                  ||j
                        j                  d      }||j                  d      z   |z  }|j                  d|      }	|D cg c]  }|j                  d|       }
}|	j                  d      }|dk(  }|j                         r6t        |j                  t        j                        j                               n|}|	ddd|f   }|
D cg c]  }|ddd|f    }}|s|S |g|S c c}w c c}w c c}w )a  
    Shift non-zero elements in the mask and corresponding tensors to the left.

    This function operates on a binary mask and any number of additional tensors with the same dimensions as the mask.
    For each row, non-zero values are shifted to the leftmost positions. Then, columns that contain only zeros across
    all rows are truncated from the mask and tensors. Visually, this operation can be represented as follows:

    ```
    [[0, 0, x, x, x, x],  ->  [[x, x, x, x],
     [0, x, x, x, 0, 0]]       [x, x, x, 0]]
    ```

    Args:
        mask (`torch.Tensor`):
            2D tensor (binary mask) with shape `(N, M)`.
        *tensors (`torch.Tensor`)
            One or more 2D tensors with the same shape as `mask`. These tensors will be processed alongside `mask`,
            with non-zero values shifted and excess zero columns truncated in the same manner.

    Returns:
        `torch.Tensor`:
            Updated binary mask with non-zero values flushed to the left and trailing zero columns removed.
        `*torch.Tensor`
            Updated tensors, processed in the same way as the mask.

    Example:
    ```python
    >>> mask = torch.tensor([[0, 0, 1, 1, 1], [0, 1, 1, 0, 0]])
    >>> tensor = torch.tensor([[9, 9, 2, 3, 4], [9, 5, 6, 9, 9]])
    >>> new_mask, new_tensor = flush_left(mask, tensor)
    >>> print(new_mask)
    tensor([[1, 1, 1],
            [1, 1, 0]])

    >>> print(new_tensor)
    tensor([[2, 3, 4],
            [5, 6, 0]])
    ```
    rM   rp  rS  r   N)r   clonerc  rd   re   rO   ra   gatherrU  r  r   rT  int8)r   r   r  M	mask_copyr   first_non_zeroposidx_roll	mask_rollrolled_tensorscol_sums
empty_colsfirst_empty_colflushed_maskflushed_tensorss                   rD   
flush_leftr  v  s^   P ::DAq 

I")*Qqwwy*G* %%!%,N
,,q!1!1
2
<
<Q
?Cn..q11Q6H  H-I5<=ahhq(+=N= }}}#HQJAKAQc*--

3::<=WXOQ 0 001L7EF!q,_,,-FOF)/))% + > Gs   E,E5Ec                    | j                   \  }}| j                         }|D cg c]  }|j                          }}t        j                  |      }|j	                  d      }t        j
                  ||j                        j                  d      }||j                  d      z
  |z  }	|j                  d|	      }
|D cg c]  }|j                  d|	       }}|
j                  d      }|dk7  }|j                         r6t        |j                  t        j                        j	                               n|}|
dd|df   }|D cg c]  }|dd|df    }}|s|S |g|S c c}w c c}w c c}w )zs
    Shift non-zero elements in the mask and corresponding tensors to the right. See `flush_left` for details.
    rM   rp  rS  r   N)r   r  rd   fliplrrc  re   rO   ra   r  rU  r  r   rT  r  )r   r   r  r  r  r   flipped_maskr  r  r  r  r  r  non_empty_colsfirst_non_empty_colr  r  s                    rD   flush_rightr    sl    ::DAq 

I")*Qqwwy*G* <<	*L!((Q(/N
,,q!1!1
2
<
<Q
?Cn..q11Q6H  H-I5<=ahhq(+=N= }}}#H]NIWI[I[I]#n//

;BBDEcdQ 3 445L;IJaq/001JOJ)/))' + > Ks   E(E-
E2c           	      Z   | j                   t        j                  t        j                  fv rut        j                  | d|j                  d            j                  d      }t        j                  | D cg c]  }t        j                  |d       c}      }||z
  }|S g }t        | |      D ]^  \  }}t        j                  |d      }|j	                  d|j                  d            j                  d      }	|j                  |	       ` t        j                  |      }|S c c}w )aw  
    A memory-efficient implementation of the common `log_softmax -> gather` operation.

    This function is equivalent to the following naive implementation:
    ```python
    logps = torch.gather(logits.log_softmax(-1), dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
    ```

    Args:
        logits (`torch.Tensor`):
            Logits tensor of shape `(..., num_classes)`.
        index (`torch.Tensor`):
            Index tensor of shape `(...)`, specifying the positions to gather from the log-softmax output.

    Returns:
        `torch.Tensor`:
            Gathered log probabilities with the same shape as `index`.
    rK   )rn  indexrp  )rP   rd   r   float64r  ra   r  r@  	logsumexpr^   Flog_softmaxr\   )
rC  r  selected_logitslglogsumexp_valuesper_token_logps
row_logits
row_labels	row_logpsrow_per_token_logpss
             rD   selective_log_softmaxr    s   & ||u}}55,,v2U__R=PQYYZ\] ;;f'U(C'UV),<<  &)&%&8 	8"J
jb9I"+"2"2rAUAUVXAY"2"Z"b"bce"f""#67	8  ++o6 (Vs   6D(r   completionsr  r  stepnum_samplesc                 
   t               st        d      t               }t        ddd      }|j	                  dd       |j	                  dd	       |j                         D ]  }|j	                  |d
d        |j	                  ddd       ||t        |       k\  rd}n|dk  ry|t        j                  t        t        |             |      }	|	D 
cg c]  }
| |
   	 } }
|	D 
cg c]  }
||
   	 }}
|j                         D 
ci c]  \  }}||	D 
cg c]  }
||
   	 c}
 }}}}
|	D 
cg c]  }
||
   	 }}
t        t        |             D ]j  }
|j                         D cg c]  }||   |
   d }} |j                  t        | |
         t        ||
         g|||
   d  |j                          l t        |dd| d      }|j                  |       yc c}
w c c}
w c c}
w c c}
}}w c c}
w c c}w )u  
    Print out a sample of model completions to the console with multiple reward metrics.

    This function creates a nicely formatted table showing prompt-completion pairs, useful for monitoring model outputs
    during training. It requires the `rich` library to be installed.

    Args:
        prompts (`list[str]`):
            List of prompts.
        completions (`list[str]`):
            List of completions corresponding to the prompts.
        rewards (`dict[str, list[float]]`):
            Dictionary where keys are reward names and values are lists of rewards.
        advantages (`list[float]`):
            List of advantages corresponding to the prompts and completions.
        step (`int`):
            Current training step number, used in the output title.
        num_samples (`int` or `None`, *optional*, defaults to `None`):
            Number of random samples to display. If `None` (default), all items will be displayed.

    Example:
    ```python
    >>> from trl.trainer.utils import print_prompt_completions_sample

    >>> prompts = ["The sky is", "The sun is"]
    >>> completions = [" blue.", " in the sky."]
    >>> rewards = {"Correctness": [0.123, 0.456], "Format": [0.789, 0.101]}
    >>> advantages = [0.987, 0.654]
    >>> print_prompt_completions_sample(prompts, completions, rewards, advantages, 42)
    ╭──────────────────────────── Step 42 ─────────────────────────────╮
    │ ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┓ │
    │ ┃ Prompt     ┃ Completion   ┃ Correctness ┃ Format ┃ Advantage ┃ │
    │ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━┩ │
    │ │ The sky is │  blue.       │        0.12 │   0.79 │      0.99 │ │
    │ ├────────────┼──────────────┼─────────────┼────────┼───────────┤ │
    │ │ The sun is │  in the sky. │        0.46 │   0.10 │      0.65 │ │
    │ └────────────┴──────────────┴─────────────┴────────┴───────────┘ │
    ╰──────────────────────────────────────────────────────────────────╯
    ```
    zvThe function `print_prompt_completions_sample` requires the `rich` library. Please install it with `pip install rich`.Tz
bold white)show_headerheader_styleexpandPromptbright_yellow)style
Completionbright_greenz	bold cyanr   )r  justify	Advantagezbold magentaNr   z.2fFzStep )r  titleborder_style)r   r  r"   r$   r  r   rW   r  samplerV   r  r  r%   add_sectionr#   r  )r   r  r  r  r  r  r  r  reward_nameindicesrm   keyvalreward_valuespanels                  rD   print_prompt_completions_sampler    s   ` "
 	
 iGddKE 
X_5	\8||~ JKIJ	[H #g,&KA --c'l 3[A'./!71:///67!{1~77BI--/RRhc331AQ11RR-45jm5
53w<  =D\\^LcGCLOC01LLd71:&[^(<e}eQ[\]Q^_bPce
 %uTF^,WEMM% 071R5 Ms0   G%*G*G4G/%G42G;)H /G4)r   r   N)Ncpu)rK   )r  )   )FF)rG   N)NNNNr{   )}r@  importlib.resources	resourcesr  rB  r  r4   collectionsr   r   r   importlib.metadatar   typingr   r   r	   r
   r  numpyrX   pandaspdrd   torch.nn.functionalrw  
functionalr  torch.utils.data
accelerater   r   accelerate.stater   huggingface_hubr   r   torch.nn.utils.rnnr   r   rt  r   r   r   r   r   r   r   r   transformers.utilsr   r   r   r   r   r   trainer.model_configr!   rich.consoler"   
rich.panelr#   
rich.tabler$   	rich.textr%   r  peftr&   r'   r)   r   r   r   r   r   r8   r   r   r   r  rP  r   r%  r   r   rk  rs  Modulerz  r  r  r  r  r  r  r  r  	DataFramer  SIMPLE_SFT_CHAT_TEMPLATESIMPLE_CHAT_TEMPLATEr  r  r   r  r  r  r`   r1  r9  r?  rJ  rS  rU  rY  r\  rg  r  r  r  r  r  r  r  r   rE   rD   <module>r     s    +     ( & 0 0        0 - 4 + ,	 	 	  / $  +p&E pf _
 _
 _
D J J J^ (,	?%,,?? ? !	?
 \\?D P P PfGO GT 7@ 7@ 7@t 5:G\\G
5<<s*+G G("" ""4U
3C ""J
%,, 
 
c5j@Q 
X[ 
ejeqeq 
EHHOO  &k &kR7 AS8T &Xd38n5  0F 2f$2 $  r  C     t & t  t n 38** 3ell 3 3./88??/-2\\/IL/^a/
5<<u||34/d88??\\  	B `e388??39<3DH3X\3
XX__3l#S # # #Y^YeYe #.$U$U+0<<$UGJ$U_o$U
5<<%&$UN 288??2\\2 '*2 	2
 (2 2@93-99 T#Y'9 "%	9
 T#Y'9 $'9 #tCy.)9,	*	*&*3S	>&:	*MQRUW[\_W`R`Ma	*||,/?B
5<<%&:!$AU\\ A>U AZ^_bZc A4 '+!%"#GGG G 3-	G
 s)G }G G smG #G smG }G GT
(3- 
@ @BLL @T @$>*U\\ >*U\\ >*eELLRWX]XdXdfiXiRjDj>k >*B*ell *ell *uU\\SXY^YeYegjYjSkEk?l *< ELL  R T#YTcT #tE{"#T U	T
 T T 
TrE   