
    biO                         d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZ ddlmZ d Zddej(                  fd	Zed
k(  r& e       Zej1                         \  ZZZ eeee       yy)a  
# Full training
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --dataset_streaming     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-7     --num_train_epochs 1     --per_device_train_batch_size 2     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns
    --report_to wandb
```

# LoRA:
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --dataset_streaming     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-6     --num_train_epochs 1     --per_device_train_batch_size 2     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns     --use_peft     --lora_r 32     --lora_alpha 16
    --report_to wandb
```
    N)load_dataset)AutoModelForCausalLMAutoTokenizer)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATEc           	      >   |j                   dv r|j                   nt        t        |j                         }t        |      }t	        |j
                  |j                  ||j                  rdnd|
t               nd |      }t        j                  |j                  fd|j                  i|}t        |      }|-t        j                  |j                  fd|j                  i|}nd }t        j                  |j                  |j                        }	|	j                  |	j                   |	_        |	j"                  t$        |	_        | j&                  rF|j)                         D 
cg c]%  \  }
}|j*                  t        j,                  k(  s$|
' c}}
|_        t1        | j2                  | j4                  | j6                        }t9        ||||| j:                     |j<                  dk7  r|| j>                     nd |	|	      }|jA                          |j<                  dk7  r4|jC                         }|jE                  d
|       |jG                  d
|       |jI                  |jJ                         |jL                  r|jM                  | j2                         y y c c}}
w )N)autoNFT)revisionattn_implementationtorch_dtype	use_cache
device_mapquantization_configtrust_remote_code)r   )name	streamingno)argstrain_dataseteval_datasetprocessing_classpeft_configeval)dataset_name)'r   getattrtorchr   dictmodel_revisionr   gradient_checkpointingr   r   from_pretrainedmodel_name_or_pathr   r   r   	pad_token	eos_tokenchat_templater   ignore_bias_buffersnamed_buffersdtypebool!_ddp_params_and_buffers_to_ignorer   r!   dataset_configdataset_streamingr   dataset_train_spliteval_strategydataset_test_splittrainevaluatelog_metricssave_metrics
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsr   r   model_kwargsmodelr   	ref_model	tokenizerr   bufferdatasettrainermetricss                  J/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/scripts/dpo.pymainrI   K   s|   
 #-"8"8N"J
PWX]_i_u_uPv  2*=**&::(??%T,?,K&(QU/L !00%%9C9U9UYeE "*-K(88))
=G=Y=Y
]i
	 	--%%9U9UI "'11	&"6	&& &+%8%8%:3
!T6fllejj>XD3
/   ''//G k==>@M@[@[_c@cW[;;<im"G MMO""d*""$FG,VW- }//0  )A)AB !G3
s   %%JJ
subparsersc                 n    t         t        t        f}| | j                  dd|      }|S t	        |      }|S )NdpozRun the DPO training script)helpdataclass_types)r	   r   r   
add_parserr
   )rJ   rN   parsers      rH   make_parserrQ      sE    &	;?O&&u3Pbq&r M ?+M    __main__)N)__doc__argparser#   datasetsr   transformersr   r   trlr   r   r   r	   r
   r   r   r   trl.trainer.utilsr   rI   _SubParsersActionrQ   __name__rP   parse_args_and_configr=   r>   r?    rR   rH   <module>r^      s~   'R   ! <	 	 	 3GCTH66  z]F-3-I-I-K*K
mZ0 rR   