29 lines
764 B
YAML
29 lines
764 B
YAML
### model
|
||
model_name_or_path: F:\微调实验\llamafactory\models\Qwen\Qwen3-8B # 或具体路径
|
||
|
||
### method
|
||
stage: sft
|
||
do_train: true
|
||
finetuning_type: lora
|
||
lora_target: all
|
||
quantization_bit: 4 # 关键!4bit量化
|
||
|
||
### dataset
|
||
dataset: sft_complete_v3 # 用你之前的60条数据
|
||
template: qwen
|
||
cutoff_len: 2048
|
||
|
||
### output
|
||
output_dir: saves/qwen3_8b_soviet_v2
|
||
overwrite_cache: true
|
||
|
||
### train
|
||
per_device_train_batch_size: 1 # 8B模型,保守点
|
||
gradient_accumulation_steps: 4
|
||
learning_rate: 3e-4
|
||
num_train_epochs: 30 # 8B不容易过拟合,可以少点
|
||
lora_rank: 64 # 8B模型可以用小点的rank
|
||
lora_alpha: 32
|
||
|
||
warmup_ratio: 0.1 # 加个warmup
|
||
gradient_checkpointing: true # 省显存,能让batch更大 |