
    biX                        d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  ddl!m"Z" ddl#m$Z$m%Z%  e       rd dl&m'Z'  e       rd dl(Z( G d de      Z)y)    N)Path)CallableOptionalUnion)Dataset)
DataLoader)AutoModelForCausalLMAutoTokenizerBaseImageProcessorDataCollatorDataCollatorForLanguageModelingDataCollatorForSeq2SeqFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainingArgumentsis_wandb_available)EvalLoopOutput)is_peft_available   )PPODecorators   )IterativeSFTConfig)generate_model_cardget_comet_experiment_url)	PeftModelc                       e Zd ZdZddgZ	 	 	 	 	 	 	 	 	 	 d"deeef   deee	e
f      dee   deeeeeef   f      d	eeeeeef      d
eej(                  j*                  ej(                  j,                  j.                  f   deeej2                  ej2                  gej2                  f      deeegef      dee   dee   dee   f fdZdede	defdZdej2                  dej2                  dej2                  fdZe de!ejD                     de!ejD                     de!ejD                     de!e   de!e   f
d       Z# e$jJ                         	 	 	 	 	 d#dee!ejD                        dee!ejD                        dee!ejD                        dee!e      dee!e      f
d       Z&d Z' fdZ(	 	 	 d$dee   dee   d eee!e   df   fd!Z) xZ*S )%IterativeSFTTrainera
  
    The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization.

    Args:
        model (`Union[str, PreTrainedModel]`):
            Model to be trained. Can be either:

            - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
              path to a *directory* containing model weights saved using
              [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
              using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
              `args.model_init_kwargs`.
            - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
        args ([`IterativeSFTConfig`], *optional*, defaults to `None`):
            Configuration for this trainer. If `None`, a default configuration is used.
        data_collator (`DataCollator`, *optional*):
            Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
            Will default to [`~transformers.default_data_collator`] if no `processing_class` is provided, an instance
            of [`~transformers.DataCollatorWithPadding`] otherwise if the processing_class is a feature extractor or
            tokenizer.
        eval_dataset (`datasets.Dataset`):
            The dataset to use for evaluation.
        processing_class ([`~transformers.PreTrainedTokenizerBase`], *optional*, defaults to `None`):
            Processing class used to process the data. If `None`, the processing class is loaded from the model's name
            with [`~transformers.AutoTokenizer.from_pretrained`].
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
            The optimizer and scheduler to use for training.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
            The function to use to preprocess the logits before computing the metrics.
        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
            The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
            metric values.
        max_length (`int`, *optional*, deprecated):
            Maximum length of the tokenized sequence. Use `args.max_length` instead.
        truncation_mode (`str`, *optional*, deprecated):
            The truncation mode to use. Use `args.truncation_mode` instead.
        optimize_device_cache (`bool`, *optional*, deprecated):
            Whether to optimize accelerator cache. Use `args.optimize_device_cache` instead.
    trlziterative-sftNmodelargsdata_collatoreval_datasetprocessing_class
optimizerspreprocess_logits_for_metricscompute_metrics
max_lengthtruncation_modeoptimize_device_cachec           
         i }|	|	|d<   t        j                  dt               |
|
|d<   t        j                  dt               |||d<   t        j                  dt               t        |t              r|n|j
                  j                  }|#|j                  d      d   }t        | d	      }n[t        |t              rKt        |t              s;|j                         }|j                  |d
<   |j                  d       t        di |}|r%|j                         D ]  \  }}t        |||        |t        j                   |      }|j"                  %t        |t              st        j                  d       t        |t              r| j%                  ||      }t'               rt        |t(              rd| _        nd| _        || _        t/        |j
                  dd      | _        |=| j0                  rt3        |dd      | _        n$t7        | j,                  d      | _        n|| _        |j8                  | _        |j:                  | _        |j<                  | _        t>        |   ||| j4                  |||||       tC        | jD                  d      r%| jD                  jG                  | jH                         | jK                  | jL                  jN                         | jP                  jS                  | jD                  | jT                  | jV                        \  | _"        | _*        | _+        | j:                  dk(  rdnd| j,                  _,        tC        | d      st[        d      | j<                  t\        _        y )Nr*   zThe `max_length` parameter is deprecated and will be removed in version 0.20. Pass it through the `args` parameter using `IterativeSFTConfig(max_length=...)` instead.r+   zThe `truncation_mode` parameter is deprecated and will be removed in version 0.20. Pass it through the `args` parameter using `IterativeSFTConfig(truncation_mode=...)` instead.r,   zThe `optimize_device_cache` parameter is deprecated and will be removed in version 0.20  Pass it through the `args` parameter using `IterativeSFTConfig(optimize_device_cache=...)` instead./z-IterativeSFT	hub_tokenpush_to_hub_tokenzYou passed model_init_kwargs to the `IterativeSFTConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.TFis_encoder_decoder   )label_pad_token_idpad_to_multiple_of)mlm)r"   r#   r$   r%   r&   r)   r'   r(   add_model_tagskeep_endleftrightacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`. )/warningswarnDeprecationWarning
isinstancestrconfig_name_or_pathsplitr   r   to_dictr0   popitemssetattrr
   from_pretrainedmodel_init_kwargs_create_model_from_pathr   r   is_peft_modelr&   getattrr2   r   r$   r   r*   r+   r,   super__init__hasattrr"   r8   
_tag_namescreate_optimizer_and_schedulerr#   	max_stepsr<   prepare	optimizerlr_schedulertruncation_sideAttributeErrorr   )selfr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   deprecated_paramsmodel_id
model_name	dict_argskeyvalue	__class__s                     \/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/trainer/iterative_sft_trainer.pyrP   zIterativeSFTTrainer.__init__a   s<   * !.8l+MMk"
 &3B/0MMp"
 !,9N56MMv" 'uc258R8R<!,R0J%M&BCD/0DJ\9]I%)^^Ik"MM-.%2	2D /557 *
Uc5)* #,<<XF !!-j6LMM; eS!00=E :eY#?!%D!&D 0")%,,8Le"T &&%;$RS&" &ETEZEZ`e%f"!.D//#33%)%?%?",,%-+!*G 	 		
 4::/0JJ%%doo6++DII,?,?@ 9=8H8H8P8PJJ(9(99
5
DND$5 ;?:N:NR\:\bi-t]+ j  /3.H.H+    
model_pathreturnc                 N    |j                   xs i }t        j                  |fi |S )z0Creates a model from a path or model identifier.)rK   r	   rJ   )rZ   rd   r#   rK   s       rb   rL   z+IterativeSFTTrainer._create_model_from_path   s*     228b#33JTBSTTrc   	input_idsattention_masklabelsc                    |"|D cg c]  }t        j                  |       }}| j                  r| j                  t	        |||      D cg c]  \  }}}|||d c}}}      j                  | j                  j                        }|j                  dd        d|d   |d   | j                  j                  k(  <   nT| j                  t	        ||      D cg c]
  \  }}||d c}}      j                  | j                  j                        }| j                  | j                  dk(  r2|j                         D 	ci c]  \  }}	||	d | j                    }}}	|S | j                  dk(  r3|j                         D 	ci c]  \  }}	||	| j                   d   }}}	|S t        d| j                         |S c c}w c c}}}w c c}}w c c}	}w c c}	}w )	Nrg   rh   ri   decoder_input_idsr3   ri   )rg   rh   
keep_startr9   zUnknown truncation mode: )torch	ones_liker2   r$   ziptor"   devicerG   r&   pad_token_idr*   r+   rH   
ValueError)
rZ   rg   rh   ri   idsattlab
input_datakvs
             rb   prepare_model_inputsz(IterativeSFTTrainer.prepare_model_inputs   s   !>GHseooc2HNH""++ *-Y)O %S# #&L
 b""#  NN.5_cJx H!59N9N9[9[![\ ++KNyZhKijxsCsc:jb""# 
 ??&##|3BLBRBRBTU$!Qa#4T__!55U
U  %%3CMCSCSCUV41aaDOO#3#5!66V
V  !#<T=Q=Q<R!STT9 I k VVs   F<GG
GGtextstexts_labelsc           
      `   ||}t        ddg| |g      D ]h  \  }}t        |t              st        | dt	        |             t        |d   t
        j                        rMt        d| dt	        |d                 n't        g d| ||g      D ]h  \  }}t        |t              st        | dt	        |             t        |d   t
        j                        rMt        d| dt	        |d                 nt        |t              st        dt	        |             t        |d   t              st        d	t	        |d                |Tt        |t              st        d
t	        |             t        |d   t              st        dt	        |d                | ||||fS )a  
        Check if the input data is valid for training.

        Args:
            input_ids (list[`torch.LongTensor`]):
                List of tensors containing the input_ids
            attention_mask (list[`torch.LongTensor`]):
                List of tensors containing the attention_mask
            labels (list[`torch.FloatTensor`]):
                List of tensors containing the labels
            texts (list[`str`]):
                List of string containing the text input.
            texts_labels (list[`str`]):
                List of string containing the text labels.

        Returns:
            `tuple`: The input data.
        rg   ri   z! must be a list of tensors - got r   zElements in z must be tensors - got rk   z''text' must be a list of strings - got z)Elements in 'text' must be strings - got z.'text_labels' must be a list of strings - got z0Elements in 'text_labels' must be strings - got )rp   rA   listrt   typern   TensorrB   )rg   rh   ri   r|   r}   nametensor_lists          rb   _step_safety_checkerz(IterativeSFTTrainer._step_safety_checker  s   4 =%),k8-DyRXFY)Z m%D+%k48(D61RSWXcSdRe)fgg%k!nellC(<v=TUYZefgZhUiTj)kll	m *-=	>[a?b* m%D+ &k48(D61RSWXcSdRe)fgg%k!nellC(<v=TUYZefgZhUiTj)kllm eT* #J4PU;-!XYYeAh, #LTRWXYRZ^L\!]^^'!,5$'UVZ[gVhUi%jkk!,q/37$'WX\]ijk]lXmWn%opp.&%EErc   c                      j                   j                           j                  j                  dk(  rXt	        j
                  d      j                   j                  j                         _	         j                  j                   _
        ||t        d      ||t        j                  dt               || j                  rt        d      ||dd nd}||dd nd}||dd nd}||dd nd}||dd nd} j!                  |||||      \  }}}}}|* j#                  | j$                  ddd	      }|d
   |d   }}|# j#                  | j$                  ddd	      d
   }||} j'                  |||      }t)        |j+                               }i }|j-                  |        fd}	t/        j0                  |      }
|
j3                  d       t5        |
 j                  j6                  d|	      }t9        |      D ]  \  }} j:                  j=                   j                         5  |D ci c]  }|||   
 }} j?                   j                   |      } j                  j@                  dkD  r|jC                         }|jE                         } j:                  jG                  |        j:                  jH                  r^ j                  jJ                  H j:                  jM                   j                   jO                          j                  jJ                          jP                  jS                           jP                  jU                           jV                   jV                  jS                           j                  xj                  dz  c_         xj                  |z  c_	         jY                          ddd        yc c}w # 1 sw Y   xY w)a  
        Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and
        text_labels.

        Args:
            input_ids (list[`torch.LongTensor`]):
                List of tensors containing the input_ids (if not provided, text will be used)
            attention_mask (list[`torch.LongTensor`], , *optional*):
                List of tensors containing the attention_mask
            labels (list[`torch.FloatTensor`], *optional*):
                List of tensors containing the labels (if set to None, will default to input_ids)
            texts (list[`str`], *optional*):
                List of strings containing the text input (if not provided, input_ids will directly be used)
            texts_labels (list[`str`], *optional*):
                List of strings containing the text labels (if set to None, will default to text)

        Returns:
            `dict[str, Any]`: A summary of the training statistics
        r   g        Nz@Step should include `input_ids` or `texts` as keyword arguments.ztBoth `input_ids` and `texts` argument are provided. `input_ids` will be ignored. Please provide only one of the two.zNo 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed.Tpt)r*   
truncationpaddingreturn_tensorsrg   rh   c                     t               }| d   D ]S  }|dv st        j                  | D cg c]  }||   	 c}      j                  j                  j
                        ||<   U |S c c}w )Nr   rk   )dictrn   stackrq   r"   rr   )datareturn_dictr_   drZ   s       rb   collatorz*IterativeSFTTrainer.step.<locals>.collator  sm    &KAw aCC',{{D3IqAcF3I'J'M'MdjjN_N_'`K$a  4Js   A)
rn   )
batch_sizeshuffle
collate_fnr   )-r"   trainstateglobal_steprn   tensorrq   r#   rr   tr_loss_globalstep_last_loggedrt   r>   r?   UserWarningr2   r   r&   r*   r{   r   keysupdater   	from_dict
set_formatr   per_device_train_batch_size	enumerater<   
accumulatecompute_lossn_gpumeandetachbackwardsync_gradientsmax_grad_normclip_grad_norm_
parametersrV   step	zero_gradrW   _maybe_log_save_evaluate)rZ   rg   rh   ri   r|   r}   model_inputsmodel_inputs_names
batch_dictr   
batch_datastep_dataloader_batchry   losstr_loss_steps   `                rb   r   zIterativeSFTTrainer.step;  s   8 	

::!!Q& <<,//		0@0@ADL+/::+A+AD(_``"u'8MM6 >l2t7N7N S 
 %.$9IaLt	.<.H*d$0d!-a4*6*B|AAEAZAZ~vulB
>	>65, 00$//dDae 1 L )5[(A<P`Ca~I#**$//dDae + F >F00NFS!,"3"3"56
,'	 &&z2
g&$yy<<	
 "/2 	0HAu!!,,TZZ8 05GH58HH((\B99??Q&99;D#{{}  ))$/##22tyy7N7N7Z$$44

--/		//
 ##%((*$$0%%**,

&&!+& ,--/70 0	0H0 0s   6O;OFOOO'	c                 R   | j                   j                  d| j                  j                  | j                   j                  z  dk(  r4| j                  j                  dk7  r| j	                  | j
                         | j                   j                  | j                  j                  | j                   j                  z  dk(  r| j                  j                  dk7  ri }| j                  | j                        j                         j                         }| xj                  | j                  z  c_        t        || j                  j                  | j                  z
  z  d      |d<   | j                         |d<   | j                  j                  | _        | j                  |       y y y y )Nr      r   learning_rate)r#   
eval_stepsr   r   evaluater%   logging_steps_nested_gatherr   r   itemroundr   _get_learning_ratelog)rZ   logstr_loss_scalars      rb   r   z,IterativeSFTTrainer._maybe_log_save_evaluate  sF   99+zz%%		(<(<<AdjjF\F\`aFad//0 99"".zz%%		(?(??1DI_I_cdId)+!%!4!4T\\!B!G!G!I!N!N!P ,$^tzz7M7MPTPlPl7l%mopqV(,(?(?(A_%/3zz/E/E, JeD /rc   c                    | j                   j                  *t        | j                   j                        j                  }n(| j                   j                  j                  d      d   }| j                  |       t        | !  ||       y )Nr.   r/   )r]   )	r#   hub_model_idr   
output_dirr   rE   create_model_cardrO   _save_checkpoint)rZ   r"   trialr]   ra   s       rb   r   z$IterativeSFTTrainer._save_checkpoint  sl    99!!)dii22388J//55c:2>J*5 .rc   r]   dataset_nametagsc           
      V   | j                         syt        | j                  j                  d      r^t        j
                  j                  | j                  j                  j                        s!| j                  j                  j                  }nd}|t               }nt        |t              r|h}nt        |      }t        | j                  j                  d      r|j                  d       |j                  | j                         t        ||| j                  ||t!               r.t"        j$                  t"        j$                  j'                         ndt)               d      }|j+                  t        j
                  j-                  | j.                  j0                  d             y)a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        NrD   unsloth_versionunslothzIterative SFT)
base_modelr]   r   r   r   	wandb_url	comet_urltrainer_namez	README.md)is_world_process_zerorQ   r"   rC   ospathisdirrD   setrA   rB   addr   rR   r   r   r   wandbrunget_urlr   savejoinr#   r   )rZ   r]   r   r   r   
model_cards         rb   r   z%IterativeSFTTrainer.create_model_card  s)   " ))+4::$$o6rww}}TZZM^M^MlMl?m**88JJ <5Dc"6Dt9D4::$$&78HHYDOO$(!!**%-?-AeiiF[eii'')ae.0(	

 	TYY%9%9;GHrc   )
NNNN)NNNNNNN)NNNNN)NNN)+__name__
__module____qualname____doc__rR   r   rB   r   r   r   r   r   r   r   r   r   r   r   tuplern   optim	OptimizerrW   LambdaLRr   r   r   intboolrP   rL   r{   staticmethodr   
LongTensorr   r   empty_device_cacher   r   r   r   __classcell__)ra   s   @rb   r    r    6   s:   &P )J
 HL04EI W
 imFJ$()-04%}IS/)*}I u/1BBCD}I  -	}I
 uWd3<.@%@AB}I #)+=?UWeef
}I %++//1I1I1R1RRS}I (0%,,9UW\WcWc9c0d'e}I "(N+;T+A"BC}I  SM!}I" "##}I$  (~%}I~U# U=O UTc U
ell ELL bgbnbn @ 3F(()3FU--.3F U%%&3F Cy	3F
 3i3F 3Fj &]%%' 7;;?37%),0}0D!1!123}0 !e&6&6!78}0 e../0	}0
 S	"}0 tCy)}0 (}0~0/ %)&*,0	1ISM1I sm1I CcD()	1Irc   r    )*r   r>   pathlibr   typingr   r   r   rn   datasetsr   torch.utils.datar   transformersr	   r
   r   r   r   r   r   r   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   corer   iterative_sft_configr   utilsr   r   peftr   r   r    r=   rc   rb   <module>r      sj    
   , ,   '    6 0   4 @  VI' VIrc   