29 lines
619 B
YAML
29 lines
619 B
YAML
### model
|
|
model_name_or_path: F:\微调实验\llamafactory\models\Qwen\Qwen3-8B
|
|
adapter_name_or_path: ./saves/qwen3_8b_soviet_v2
|
|
|
|
### method
|
|
stage: dpo
|
|
do_train: true
|
|
finetuning_type: lora
|
|
quantization_bit: 4
|
|
|
|
### dataset
|
|
dataset: dpo_soviet_jokes_v3
|
|
template: qwen
|
|
cutoff_len: 256 # 极限压缩
|
|
|
|
### output
|
|
output_dir: saves/qwen3_8b_soviet_dpo_v1
|
|
overwrite_cache: true
|
|
|
|
### train
|
|
per_device_train_batch_size: 1
|
|
gradient_accumulation_steps: 1
|
|
learning_rate: 5e-5
|
|
num_train_epochs: 5
|
|
pref_beta: 0.1
|
|
pref_loss: sigmoid
|
|
gradient_checkpointing: true
|
|
lora_rank: 8 # 最小了
|
|
lora_alpha: 4 |