### model model_name_or_path: F:\微调实验\llamafactory\models\Qwen\Qwen3-8B adapter_name_or_path: ./saves/qwen3_8b_soviet_v2 ### method stage: dpo do_train: true finetuning_type: lora quantization_bit: 4 ### dataset dataset: dpo_soviet_jokes_v3 template: qwen cutoff_len: 256 # 极限压缩 ### output output_dir: saves/qwen3_8b_soviet_dpo_v1 overwrite_cache: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 1 learning_rate: 5e-5 num_train_epochs: 5 pref_beta: 0.1 pref_loss: sigmoid gradient_checkpointing: true lora_rank: 8 # 最小了 lora_alpha: 4