
    biA                     >   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d d	l m!Z! d d
l"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)  e#       r
d dl*m+Z+m,Z,m-Z-  e       rd dl.Z. G d de      Z/y)    N)chain)Path)CallableOptionalUnion)PartialState)Datasetfeatures)	BaseImageProcessorDataCollator"DataCollatorForTokenClassificationFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTraineris_wandb_available)TrainerCallback)EvalPrediction)is_peft_available   )	PRMConfig)compute_accuracydisable_dropout_in_modelgenerate_model_card)	PeftModelget_peft_modelprepare_model_for_kbit_trainingc                        e Zd ZdZddgZ	 	 	 	 	 	 	 	 	 	 	 	 ddeeeej                  f      dee
   dee   dee   d	eeeeeef   f      d
eeeeeef      deeg ef      deeegef      deee      deej2                  j4                  ej2                  j6                  j8                  f   deeej:                  ej:                  gej:                  f      dee   f fdZed        Z  fdZ!	 	 	 ddee   dee   deeee   df   fdZ" xZ#S )
PRMTrainera  
    Initialize PRMTrainer.

    Args:
        model (`transformers.PreTrainedModel`):
            The model to train, preferably an `AutoModelForTokenClassification`.
        args (`PRMConfig`):
            The arguments to use for training.
        data_collator (`transformers.DataCollator`):
            The data collator to use for training. If None is specified, the default data collator
            (`DataCollatorForTokenClassification`) will be used which will pad the sequences to the maximum length of
            the sequences in the batch, given a dataset of paired sequences.
        train_dataset (`datasets.Dataset`):
            The dataset to use for training.
        eval_dataset (`datasets.Dataset`):
            The dataset to use for evaluation.
        processing_class (`PreTrainedTokenizerBase` or `BaseImageProcessor` or `FeatureExtractionMixin` or `ProcessorMixin`, *optional*):
            Processing class used to process the data. If provided, will be used to automatically process the inputs
            for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
            reuse the fine-tuned model.
        model_init (`Callable[[], transformers.PreTrainedModel]`):
            The model initializer to use for training. If None is specified, the default model initializer will be
            used.
        compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional* defaults to `compute_accuracy`):
            The metrics to use for evaluation. If no metrics are specified, the default metric (`compute_accuracy`)
            will be used.
        callbacks (`list[transformers.TrainerCallback]`):
            The callbacks to use for training.
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
            The optimizer and scheduler to use for training.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
            The function to use to preprocess the logits before computing the metrics.
        peft_config (`dict`, defaults to `None`):
            The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
            a PEFT model.
    trlprmNmodelargsdata_collatortrain_dataseteval_datasetprocessing_class
model_initcompute_metrics	callbacks
optimizerspreprocess_logits_for_metricspeft_configc                 n   t               s|t        d      t               r|t        |t              st	        |dd      st	        |dd      rdt        t        j                  t              j                        v }d|j                  i}|s"|j                  t        j                  d       n|r|j                  |j                  |d<   t        |fi |}t        ||      }|j                  rt!        |       |t"        }|$|t        d      t%        ||j&                  	      }d
|j(                  vrt+               j-                         5  ||j.                  |j&                  |j0                  |j2                  |j4                  d}i |ddi}|j7                  | j8                  ||j:                  |j<                  dt=        j>                  t=        j@                  t=        jB                  d            t=        j@                  t=        jB                  d            d            }i |ddi}||j7                  | j8                  ||j:                  |j<                  dt=        j>                  t=        j@                  t=        jB                  d            t=        j@                  t=        jB                  d            d            }d d d        tD        |   |||||||||	|
|       tI        | jJ                  d      r&| jJ                  jM                  | jN                         y y # 1 sw Y   `xY w)NzvPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT modelsis_loaded_in_8bitFis_quantizedgradient_checkpointing_kwargsuse_gradient_checkpointingzYou passed `gradient_checkpointing_kwargs` in the trainer's kwargs, but your peft version does not support it. please update to the latest version of peft to use `gradient_checkpointing_kwargs`.z^A processing_class must be specified when using the default DataCollatorForTokenClassification)
max_length	input_ids)	tokenizerstep_separatorr4   max_prompt_lengthmax_completion_lengthtrain_on_last_step_onlyis_evalzTokenizing train datasetint64)labelsr5   )	fn_kwargsnum_procremove_columnsdescr
   TzTokenizing eval dataset)r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   add_model_tags)(r   
ValueError
isinstancer   getattrlistinspect	signaturer   
parametersgradient_checkpointingr2   warningswarnr   disable_dropoutr   r   r   r4   column_namesr   main_process_firstr7   r8   r9   r:   maptokenize_rowdataset_num_procr
   FeaturesSequenceValuesuper__init__hasattrr#   rB   
_tag_names)selfr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   _supports_gc_kwargsprepare_model_kwargsr>   train_fn_kwargseval_fn_kwargs	__class__s                     R/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/prm_trainer.pyrW   zPRMTrainer.__init__]   s#   ( !"{'> I   [%<eY/5"5u=P^`eAf*IT))*IJUUN +' -I$JeJe+f(.43U3U3a r -1S1S1_PTPrPr,-LM;EZEYZE&uk: $U+".O ' t  ??O\`\k\klMm888224 &!1&*&9&9"&//)-)?)?-1-G-G/3/K/K	 #BY"A	5"A - 1 1%%-!22#0#9#93%..&.&7&7w8O&P)1):):8>>';R)S !2 ! "@I!?y$!?+#/#3#3))"0!%!6!6'3'<'<6!)!2!2*2*;*;HNN7<S*T-5->->x~~g?V-W" $4 $L5&P 	''%-!+!*G 	 	
 4::/0JJ%%doo6 1o& &s   E<L++L4c                     || d   d      d   }| d   D 	cg c]  }	 ||	d      d    }
}	|r+|s)dgt        | d         dz
  z  t        | d   d	         gz   }n| d   D cg c]  }t        |       }}|j                  |d      }|
D 	cg c]  }	|	|z   	 }
}	t        |
|      D 	cg c]  \  }	}dgt        |	      dz
  z  |gz    }}	}t	        t        |
       }t	        t        |       }|j                  |j                  g|z   }||| d
 }|
|d
| }|d
| }||z   }dgt        |      z  |z   }|
|d
| }|d
| }||dS c c}	w c c}w c c}	w c c}}	w )a/	  
        Tokenize a row of the dataset.

        Args:
            features (`dict[str, str]`):
                Row of the dataset, should contain the keys `"prompt"`, `"completions"`, and `"labels"`.
            tokenizer (`PreTrainedTokenizerBase`):
                Tokenizer used to process the data.
            step_separator (`str`):
                Separator between steps in the completion.
            max_length (`int` or `None`):
               Maximum length of the sequences (prompt + completion). If `None`, the sequences are not truncated.
            max_prompt_length (`int` or `None`):
                Maximum length of the prompt. If `None`, the prompt is not truncated.
            max_completion_length (`int` or `None`):
                Maximum length of the completion sequences. If `None`, the completion sequences are not truncated.
            train_on_last_step_only (`bool`):
                Whether to train only on the last step. If `True`, the labels are `-100` for all tokens except the last
                token of the completion.
            is_eval (`bool`):
                Whether the function is used to tokenize samples from a training or an evaluation dataset. Used only if
                `train_on_last_step_only` is set to `True`.

        Returns:
            `dict[str, list[int]]`:
                Tokenized sequences with the keys `"input_ids"`, and `"labels".

        Example:
        ```python
        >>> from transformers import AutoTokenizer

        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
        >>> features = {
        ...     "prompt": "Which number is larger, 9.8 or 9.11?",
        ...     "completions": ["11 is greater than 8.", "Hence, 9.11 > 9.8."],
        ...     "labels": [True, False],
        ... }
        >>> PRMTrainer.tokenize_row(
        ...     features, tokenizer, "\n", max_completion_length=None, train_on_last_step_only=False, is_eval=False
        ... )
        {'input_ids': [23085, 1372, 374, 8131, 11, 220, 24, 13, 23, 476, 220, 24, 13, 16, 16, 30, 16, 16, 374, 7046, 1091, 220, 23, 13, 198, 39, 763, 11, 220, 24, 13, 16, 16, 861, 220, 24, 13, 23, 13, 198],
         'labels': [-100, -100, -100, -100, -100, -100, -100, -100, 1, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 0]}
        ```
        promptF)add_special_tokensr5   completionsir=   r   N)r5   r=   )lenintencodeziprF   r   bos_token_id)r
   r6   r7   r4   r8   r9   r:   r;   
prompt_ids
completioncompletions_idsr=   labelseparator_idscompletion_idsr5   s                   r`   rQ   zPRMTrainer.tokenize_row   s   p x1eL[Y
[cdq[r
MWIjU;KH
 
 #7Vs8H#56:;s8HCUVXCY?Z>[[F.6x.@AUc%jAFA "((E(RHWX*:5XX UXXgioTpq?Pz54&C
Oa/0E7:qq e_56eVn%!!-#001J>J (#%6$6$78J ,+,B-BCN223F/	#j/)F2!!+:.IKZ(F&&99I
 B Y rs   E"EE* Ec                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )N/re   )
model_name)	r$   hub_model_idr   
output_dirnamesplitcreate_model_cardrV   _save_checkpoint)rZ   r#   trialrs   r_   s       r`   ry   zPRMTrainer._save_checkpoint3  sl    99!!)dii22388J//55c:2>J*5 .    rs   dataset_nametagsc                 r   | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        j                  d      }t!        ||| j"                  ||t%               r.t&        j(                  t&        j(                  j+                         ndd|d	      }|j-                  t        j
                  j/                  | j0                  j2                  d	             y)
a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        N_name_or_pathunsloth_versionunslotha          @article{uesato2022solving,
            title        = {{Solving Math Word Problems With Process- and Outcome-Based Feedback}},
            author       = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
            year         = 2022,
            journal      = {arXiv preprint arXiv:2211.14275}
        }PRMzBSolving math word problems with process-and outcome-based feedback)	
base_modelrs   rt   r|   r}   	wandb_urltrainer_nametrainer_citationpaper_titlez	README.md)is_world_process_zerorX   r#   configospathisdirr   setrD   straddupdaterY   textwrapdedentr   rt   r   wandbrunget_urlsavejoinr$   ru   )rZ   rs   r|   r}   r   citation
model_cards          r`   rx   zPRMTrainer.create_model_card;  s>   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$?? $  )!!**%-?-AeiiF[eii'')ae%\


 	TYY%9%9;GHr{   )NNNNNNNNN)NNNN)NNN)$__name__
__module____qualname____doc__rY   r   r   r   nnModuler   r   r	   dictr   r   r   r   r   r   r   rF   r   tupletorchoptim	Optimizerlr_schedulerLambdaLRTensorrW   staticmethodrQ   ry   rx   __classcell__)r_   s   @r`   r    r    5   s   #J J >B$(04+/EI >BFJ59W
 im&*%t7oryy89:t7 y!t7  -	t7
  (t7 uWd3<.@%@ABt7 #)+=?UWeef
t7 Xb/&9:;t7 "(N+;T+A"BCt7 D12t7 %++//1I1I1R1RRSt7" (0%,,9UW\WcWc9c0d'e#t7$ d^%t7l \: \:~/ %)&*,0	:ISM:I sm:I CcD()	:Ir{   r    )0rG   r   r   rK   	itertoolsr   pathlibr   typingr   r   r   r   torch.nnr   
accelerater   datasetsr	   r
   transformersr   r   r   r   r   r   r   r   r   transformers.trainer_callbackr   transformers.trainer_utilsr   transformers.utilsr   
prm_configr   utilsr   r   r   peftr   r   r   r   r     r{   r`   <module>r      su     	     , ,   # &
 
 
 : 5 0 ! R R OO@I @Ir{   