llm_learn/配置文件/qwen3-8b/配置/qwen3_8b_dpo.yaml
2025-10-16 08:46:13 +08:00

29 lines
619 B
YAML

### model
model_name_or_path: F:\微调实验\llamafactory\models\Qwen\Qwen3-8B
adapter_name_or_path: ./saves/qwen3_8b_soviet_v2
### method
stage: dpo
do_train: true
finetuning_type: lora
quantization_bit: 4
### dataset
dataset: dpo_soviet_jokes_v3
template: qwen
cutoff_len: 256 # 极限压缩
### output
output_dir: saves/qwen3_8b_soviet_dpo_v1
overwrite_cache: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 5e-5
num_train_epochs: 5
pref_beta: 0.1
pref_loss: sigmoid
gradient_checkpointing: true
lora_rank: 8 # 最小了
lora_alpha: 4