初始化项目,由ModelHub XC社区提供模型
Model: divelab/DAPO_E2H-math-gaussian_0p5_0p5 Source: Original Platform
This commit is contained in:
88
.hydra/config.yaml
Normal file
88
.hydra/config.yaml
Normal file
@@ -0,0 +1,88 @@
|
||||
mode: train
|
||||
experiment:
|
||||
dataset_size: 6000
|
||||
dataset_seed: 1234
|
||||
test_size: 0.1
|
||||
hf_token: ${oc.env:HF_TOKEN,null}
|
||||
output:
|
||||
root_path: ${oc.env:ROOT_PATH}
|
||||
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
|
||||
lora:
|
||||
r: 32
|
||||
alpha: 64
|
||||
dropout: 0.1
|
||||
target_modules:
|
||||
- q_proj
|
||||
- v_proj
|
||||
task_type: CAUSAL_LM
|
||||
occupy_gpu_memory: false
|
||||
occupy_gpu_memory_gb: 50
|
||||
gpu_device: cuda:0
|
||||
model:
|
||||
family: Qwen
|
||||
trim: Qwen2.5-1.5B-Instruct
|
||||
name: ${model.family}/${model.trim}
|
||||
trust_remote_code: true
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
task:
|
||||
name: math
|
||||
data_files:
|
||||
- data/math/level_1
|
||||
- data/math/level_2
|
||||
- data/math/level_3
|
||||
- data/math/level_4
|
||||
training:
|
||||
max_prompt_length: 1600
|
||||
max_completion_length: 1600
|
||||
inference:
|
||||
data_files:
|
||||
- data/math/level1
|
||||
- data/math/level2
|
||||
- data/math/level3
|
||||
- data/math/level4
|
||||
- data/math/level5
|
||||
max_prompt_length: 1600
|
||||
max_completion_length: 1600
|
||||
temperature: 0.0
|
||||
'n': 1
|
||||
algorithm:
|
||||
name: grpo
|
||||
training:
|
||||
resume_from_checkpoint: null
|
||||
learning_rate: 1.0e-06
|
||||
lr_scheduler_type: cosine
|
||||
logging_steps: 10
|
||||
max_steps: 1600
|
||||
per_device_train_batch_size: 16
|
||||
generation_batch_size: null
|
||||
steps_per_generation: 1
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
bf16: true
|
||||
report_to:
|
||||
- wandb
|
||||
push_to_hub: true
|
||||
save_strategy: steps
|
||||
save_steps: ${algorithm.training.max_steps}
|
||||
tf32: true
|
||||
num_generations: 8
|
||||
beta: 0.001
|
||||
use_vllm: true
|
||||
vllm_mode: colocate
|
||||
vllm_gpu_memory_utilization: 0.25
|
||||
vllm_server_port: 8000
|
||||
curriculum: false
|
||||
curriculum_schedule: gaussian
|
||||
scheduler_params:
|
||||
mu_exp: 0.5
|
||||
sigma: 0.5
|
||||
vrex_adds:
|
||||
groupdro: 1.0
|
||||
gaussian: 0.0
|
||||
sec: 0.3
|
||||
beta: 1.0
|
||||
min_prob: true
|
||||
td_alpha: 0.5
|
||||
sec_temperature: 0.3
|
||||
max_dapo_iter: 2
|
||||
Reference in New Issue
Block a user