
    bi                         d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZ d Zd
dej                   fdZed	k(  r& e       Zej)                         \  ZZZ eeee       yy)a\  
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to
that of DPO.

# Full training:
```bash
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 16     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model     --warmup_ratio 0.1     --report_to wandb     --bf16     --logging_first_step
```

# QLoRA:
```bash
# QLoRA:
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 8     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model-lora     --warmup_ratio 0.1     --report_to wandb     --bf16     --logging_first_step     --use_peft     --load_in_4bit     --lora_target_modules=all-linear     --lora_r=16     --lora_alpha=16
```
    N)load_dataset)AutoModelForCausalLMAutoTokenizer)	KTOConfig
KTOTrainerModelConfigScriptArguments	TrlParserget_peft_configsetup_chat_formatc                    t        j                  |j                  |j                        }t        j                  |j                  |j                        }t	        j                  |j                  |j                        }|j
                  |j                  |_        |j                  t        ||      \  }}t        | j                  | j                        }t        ||||| j                     |j                  dk7  r|| j                     nd |t!        |            }|j#                          |j%                  |j&                         |j(                  r|j)                  | j                         y y )N)trust_remote_code)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)r   from_pretrainedmodel_name_or_pathr   r   	pad_token	eos_tokenchat_templater   r   r   dataset_configr   dataset_train_spliteval_strategydataset_test_splitr   train
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsmodel	ref_model	tokenizerdatasettrainers           J/home/cdr/jupyterlab/.venv/lib/python3.12/site-packages/trl/scripts/kto.pymainr-   N   sQ    00%%9U9UE %44%%9U9UI --%%9U9UI "'11	 &,UI>y ;33+:T:TUG k==>@M@[@[_c@cW[;;<im"#J/G MMO }//0  )A)AB !    
subparsersc                 n    t         t        t        f}| | j                  dd|      }|S t	        |      }|S )NktozRun the KTO training script)helpdataclass_types)r	   r   r   
add_parserr
   )r/   r3   parsers      r,   make_parserr6   x   sE    &	;?O&&u3Pbq&r M ?+Mr.   __main__)N)__doc__argparsedatasetsr   transformersr   r   trlr   r   r   r	   r
   r   r   r-   _SubParsersActionr6   __name__r5   parse_args_and_configr$   r%   r&    r.   r,   <module>rA      su   -^  ! <  'CTH66  z]F-3-I-I-K*K
mZ0 r.   