Files
DAPO_E2H-gsm8k-gaussian_0p2…/.hydra/config.yaml
ModelHub XC f7ae09a148 初始化项目,由ModelHub XC社区提供模型
Model: divelab/DAPO_E2H-gsm8k-gaussian_0p25_0p75
Source: Original Platform
2026-04-29 01:01:06 +08:00

99 lines
2.7 KiB
YAML

mode: train
experiment:
dataset_size: 6000
dataset_seed: 1234
test_size: 0.1
hf_token: ${oc.env:HF_TOKEN,null}
output:
root_path: ${oc.env:ROOT_PATH}
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
lora:
r: 32
alpha: 64
dropout: 0.1
target_modules:
- q_proj
- v_proj
task_type: CAUSAL_LM
occupy_gpu_memory: false
occupy_gpu_memory_gb: 50
gpu_device: cuda:0
model:
family: Qwen
trim: Qwen2.5-1.5B-Instruct
name: ${model.family}/${model.trim}
trust_remote_code: true
torch_dtype: bfloat16
attn_implementation: flash_attention_2
task:
name: gsm8k
data_files:
- datasets/gsm8k/trivial
- datasets/gsm8k/easy
- datasets/gsm8k/medium
- datasets/gsm8k/hard
training:
data_files:
- datasets/gsm8k/trivial
- datasets/gsm8k/easy
- datasets/gsm8k/medium
- datasets/gsm8k/hard
max_prompt_length: 1600
max_completion_length: 1024
inference:
max_model_len: 3200
checkpoint: outputs/Qwen2.5-1.5B-Instruct_gsm8k_grpo_gaussian_0.25_0.75_SEC0.3DRO1.0G0.0_minpTrue_1600/checkpoint-1600
data_files:
- datasets/gsm8k/trivial
- datasets/gsm8k/easy
- datasets/gsm8k/medium
- datasets/gsm8k/hard
max_prompt_length: 1600
max_completion_length: 512
temperature: 0
'n': 1
sc_num: 1
use_icl: false
max_new_tokens: 1600
max_tokens: 1600
algorithm:
name: grpo
training:
resume_from_checkpoint: null
learning_rate: 1.0e-06
lr_scheduler_type: cosine
logging_steps: 10
max_steps: 1600
per_device_train_batch_size: 16
generation_batch_size: null
steps_per_generation: 1
gradient_accumulation_steps: 4
gradient_checkpointing: true
bf16: true
report_to:
- wandb
push_to_hub: true
save_strategy: steps
save_steps: ${algorithm.training.max_steps}
tf32: true
num_generations: 8
beta: 0.001
use_vllm: true
vllm_mode: colocate
vllm_gpu_memory_utilization: 0.3
vllm_server_port: 8000
curriculum: false
curriculum_schedule: gaussian
scheduler_params:
mu_exp: 0.25
sigma: 0.75
vrex_adds:
groupdro: 1.0
gaussian: 0.0
sec: 0.3
beta: 1.0
min_prob: true
td_alpha: 0.5
sec_temperature: 0.3
max_dapo_iter: 4