初始化项目,由ModelHub XC社区提供模型

Model: divelab/DAPO_E2H-math-gaussian_0p5_0p5
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-26 00:29:08 +08:00
commit 7856869d2f
15 changed files with 152136 additions and 0 deletions

88
.hydra/config.yaml Normal file
View File

@@ -0,0 +1,88 @@
mode: train
experiment:
dataset_size: 6000
dataset_seed: 1234
test_size: 0.1
hf_token: ${oc.env:HF_TOKEN,null}
output:
root_path: ${oc.env:ROOT_PATH}
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
lora:
r: 32
alpha: 64
dropout: 0.1
target_modules:
- q_proj
- v_proj
task_type: CAUSAL_LM
occupy_gpu_memory: false
occupy_gpu_memory_gb: 50
gpu_device: cuda:0
model:
family: Qwen
trim: Qwen2.5-1.5B-Instruct
name: ${model.family}/${model.trim}
trust_remote_code: true
torch_dtype: bfloat16
attn_implementation: flash_attention_2
task:
name: math
data_files:
- data/math/level_1
- data/math/level_2
- data/math/level_3
- data/math/level_4
training:
max_prompt_length: 1600
max_completion_length: 1600
inference:
data_files:
- data/math/level1
- data/math/level2
- data/math/level3
- data/math/level4
- data/math/level5
max_prompt_length: 1600
max_completion_length: 1600
temperature: 0.0
'n': 1
algorithm:
name: grpo
training:
resume_from_checkpoint: null
learning_rate: 1.0e-06
lr_scheduler_type: cosine
logging_steps: 10
max_steps: 1600
per_device_train_batch_size: 16
generation_batch_size: null
steps_per_generation: 1
gradient_accumulation_steps: 4
gradient_checkpointing: true
bf16: true
report_to:
- wandb
push_to_hub: true
save_strategy: steps
save_steps: ${algorithm.training.max_steps}
tf32: true
num_generations: 8
beta: 0.001
use_vllm: true
vllm_mode: colocate
vllm_gpu_memory_utilization: 0.25
vllm_server_port: 8000
curriculum: false
curriculum_schedule: gaussian
scheduler_params:
mu_exp: 0.5
sigma: 0.5
vrex_adds:
groupdro: 1.0
gaussian: 0.0
sec: 0.3
beta: 1.0
min_prob: true
td_alpha: 0.5
sec_temperature: 0.3
max_dapo_iter: 2

165
.hydra/hydra.yaml Normal file
View File

@@ -0,0 +1,165 @@
hydra:
run:
dir: ${output.root_path}/outputs/${mode2name:${mode},${output.run_name},${model.trim}}
sweep:
dir: ${output.root_path}/multirun/${now:%Y%m%d}
subdir: ${hydra.job.override_dirname}
launcher:
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
sweeper:
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
max_batch_size: null
params: null
help:
app_name: ${hydra.job.name}
header: '${hydra.help.app_name} is powered by Hydra.
'
footer: 'Powered by Hydra (https://hydra.cc)
Use --hydra-help to view Hydra specific help
'
template: '${hydra.help.header}
== Configuration groups ==
Compose your configuration from those groups (group=option)
$APP_CONFIG_GROUPS
== Config ==
Override anything in the config (foo.bar=value)
$CONFIG
${hydra.help.footer}
'
hydra_help:
template: 'Hydra (${hydra.runtime.version})
See https://hydra.cc for more info.
== Flags ==
$FLAGS_HELP
== Configuration groups ==
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
to command line)
$HYDRA_CONFIG_GROUPS
Use ''--cfg hydra'' to Show the Hydra config.
'
hydra_help: ???
hydra_logging:
version: 1
formatters:
simple:
format: '[%(asctime)s][HYDRA] %(message)s'
handlers:
console:
class: logging.StreamHandler
formatter: simple
stream: ext://sys.stdout
root:
level: INFO
handlers:
- console
loggers:
logging_example:
level: DEBUG
disable_existing_loggers: false
job_logging:
version: 1
formatters:
simple:
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
handlers:
console:
class: logging.StreamHandler
formatter: simple
stream: ext://sys.stdout
file:
class: logging.FileHandler
formatter: simple
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
root:
level: INFO
handlers:
- console
- file
disable_existing_loggers: false
env: {}
mode: RUN
searchpath: []
callbacks: {}
output_subdir: .hydra
overrides:
hydra:
- hydra.mode=RUN
task:
- mode=train
- task=math
- algorithm=grpo
- algorithm.training.curriculum_schedule=gaussian
- model=qwen15
- algorithm.training.max_steps=1600
- algorithm.training.vllm_mode=colocate
- algorithm.training.vllm_gpu_memory_utilization=0.25
job:
name: main
chdir: false
override_dirname: algorithm.training.curriculum_schedule=gaussian,algorithm.training.max_steps=1600,algorithm.training.vllm_gpu_memory_utilization=0.25,algorithm.training.vllm_mode=colocate,algorithm=grpo,mode=train,model=qwen15,task=math
id: ???
num: ???
config_name: config
env_set: {}
env_copy: []
config:
override_dirname:
kv_sep: '='
item_sep: ','
exclude_keys: []
runtime:
version: 1.3.2
version_base: '1.3'
cwd: /mnt/data/shared/shparashar/Sys2Bench
config_sources:
- path: hydra.conf
schema: pkg
provider: hydra
- path: /mnt/data/shared/shparashar/Sys2Bench/methods/RL/conf
schema: file
provider: main
- path: ''
schema: structured
provider: schema
output_dir: /mnt/data/shared/shparashar/Sys2Bench/outputs/Qwen2.5-1.5B-Instruct_math_grpo_gaussian_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600
choices:
algorithm: grpo
task: math
model: qwen15
hydra/env: default
hydra/callbacks: null
hydra/job_logging: default
hydra/hydra_logging: default
hydra/hydra_help: default
hydra/help: default
hydra/sweeper: basic
hydra/launcher: basic
hydra/output: default
verbose: false

8
.hydra/overrides.yaml Normal file
View File

@@ -0,0 +1,8 @@
- mode=train
- task=math
- algorithm=grpo
- algorithm.training.curriculum_schedule=gaussian
- model=qwen15
- algorithm.training.max_steps=1600
- algorithm.training.vllm_mode=colocate
- algorithm.training.vllm_gpu_memory_utilization=0.25