初始化项目,由ModelHub XC社区提供模型
Model: divelab/DAPO_E2H-countdown-gaussian_0p5_0p5 Source: Original Platform
This commit is contained in:
89
.hydra/config.yaml
Normal file
89
.hydra/config.yaml
Normal file
@@ -0,0 +1,89 @@
|
||||
mode: train
|
||||
experiment:
|
||||
dataset_size: 6000
|
||||
dataset_seed: 1234
|
||||
test_size: 0.1
|
||||
hf_token: ${oc.env:HF_TOKEN,null}
|
||||
output:
|
||||
root_path: ${oc.env:ROOT_PATH}
|
||||
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
|
||||
lora:
|
||||
r: 32
|
||||
alpha: 64
|
||||
dropout: 0.1
|
||||
target_modules:
|
||||
- q_proj
|
||||
- v_proj
|
||||
task_type: CAUSAL_LM
|
||||
occupy_gpu_memory: false
|
||||
occupy_gpu_memory_gb: 50
|
||||
gpu_device: cuda:0
|
||||
model:
|
||||
family: Qwen
|
||||
trim: Qwen2.5-1.5B-Instruct
|
||||
name: ${model.family}/${model.trim}
|
||||
trust_remote_code: true
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
task:
|
||||
name: countdown2345
|
||||
data_files:
|
||||
- citrinegui/countdown_n2t100_1-100
|
||||
- citrinegui/countdown_n3t100_1-100
|
||||
- citrinegui/countdown_n4t100_1-100
|
||||
- citrinegui/countdown_n5t100_1-100
|
||||
test_file: citrinegui/countdown_n6t100_1-100
|
||||
force_redownload: false
|
||||
train_size: 327680
|
||||
test_size: 1024
|
||||
training:
|
||||
max_prompt_length: 1000
|
||||
max_completion_length: 512
|
||||
inference:
|
||||
checkpoint: outputs/Qwen2.5-1.5B-Instruct_countdown2345_grpo_balanced_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600/checkpoint-1600/
|
||||
temperature: 0.0
|
||||
sc_num: 1
|
||||
pass_at_k: 1
|
||||
resume: 0
|
||||
max_new_tokens: 512
|
||||
batch_size: 32
|
||||
algorithm:
|
||||
name: grpo
|
||||
training:
|
||||
resume_from_checkpoint: null
|
||||
learning_rate: 1.0e-06
|
||||
lr_scheduler_type: cosine
|
||||
logging_steps: 10
|
||||
max_steps: 1600
|
||||
per_device_train_batch_size: 16
|
||||
generation_batch_size: null
|
||||
steps_per_generation: 1
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
bf16: true
|
||||
report_to:
|
||||
- wandb
|
||||
push_to_hub: true
|
||||
save_strategy: steps
|
||||
save_steps: ${algorithm.training.max_steps}
|
||||
tf32: true
|
||||
num_generations: 8
|
||||
beta: 0.001
|
||||
use_vllm: true
|
||||
vllm_mode: colocate
|
||||
vllm_gpu_memory_utilization: 0.3
|
||||
vllm_server_port: 8000
|
||||
curriculum: false
|
||||
curriculum_schedule: gaussian
|
||||
scheduler_params:
|
||||
mu_exp: 0.5
|
||||
sigma: 0.5
|
||||
vrex_adds:
|
||||
groupdro: 1.0
|
||||
gaussian: 0.0
|
||||
sec: 0.3
|
||||
beta: 1.0
|
||||
min_prob: true
|
||||
td_alpha: 0.5
|
||||
sec_temperature: 0.3
|
||||
max_dapo_iter: 2
|
||||
164
.hydra/hydra.yaml
Normal file
164
.hydra/hydra.yaml
Normal file
@@ -0,0 +1,164 @@
|
||||
hydra:
|
||||
run:
|
||||
dir: ${output.root_path}/outputs/${mode2name:${mode},${output.run_name},${model.trim}}
|
||||
sweep:
|
||||
dir: ${output.root_path}/multirun/${now:%Y%m%d}
|
||||
subdir: ${hydra.job.override_dirname}
|
||||
launcher:
|
||||
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
||||
sweeper:
|
||||
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
||||
max_batch_size: null
|
||||
params: null
|
||||
help:
|
||||
app_name: ${hydra.job.name}
|
||||
header: '${hydra.help.app_name} is powered by Hydra.
|
||||
|
||||
'
|
||||
footer: 'Powered by Hydra (https://hydra.cc)
|
||||
|
||||
Use --hydra-help to view Hydra specific help
|
||||
|
||||
'
|
||||
template: '${hydra.help.header}
|
||||
|
||||
== Configuration groups ==
|
||||
|
||||
Compose your configuration from those groups (group=option)
|
||||
|
||||
|
||||
$APP_CONFIG_GROUPS
|
||||
|
||||
|
||||
== Config ==
|
||||
|
||||
Override anything in the config (foo.bar=value)
|
||||
|
||||
|
||||
$CONFIG
|
||||
|
||||
|
||||
${hydra.help.footer}
|
||||
|
||||
'
|
||||
hydra_help:
|
||||
template: 'Hydra (${hydra.runtime.version})
|
||||
|
||||
See https://hydra.cc for more info.
|
||||
|
||||
|
||||
== Flags ==
|
||||
|
||||
$FLAGS_HELP
|
||||
|
||||
|
||||
== Configuration groups ==
|
||||
|
||||
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
||||
to command line)
|
||||
|
||||
|
||||
$HYDRA_CONFIG_GROUPS
|
||||
|
||||
|
||||
Use ''--cfg hydra'' to Show the Hydra config.
|
||||
|
||||
'
|
||||
hydra_help: ???
|
||||
hydra_logging:
|
||||
version: 1
|
||||
formatters:
|
||||
simple:
|
||||
format: '[%(asctime)s][HYDRA] %(message)s'
|
||||
handlers:
|
||||
console:
|
||||
class: logging.StreamHandler
|
||||
formatter: simple
|
||||
stream: ext://sys.stdout
|
||||
root:
|
||||
level: INFO
|
||||
handlers:
|
||||
- console
|
||||
loggers:
|
||||
logging_example:
|
||||
level: DEBUG
|
||||
disable_existing_loggers: false
|
||||
job_logging:
|
||||
version: 1
|
||||
formatters:
|
||||
simple:
|
||||
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
||||
handlers:
|
||||
console:
|
||||
class: logging.StreamHandler
|
||||
formatter: simple
|
||||
stream: ext://sys.stdout
|
||||
file:
|
||||
class: logging.FileHandler
|
||||
formatter: simple
|
||||
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
||||
root:
|
||||
level: INFO
|
||||
handlers:
|
||||
- console
|
||||
- file
|
||||
disable_existing_loggers: false
|
||||
env: {}
|
||||
mode: RUN
|
||||
searchpath: []
|
||||
callbacks: {}
|
||||
output_subdir: .hydra
|
||||
overrides:
|
||||
hydra:
|
||||
- hydra.mode=RUN
|
||||
task:
|
||||
- mode=train
|
||||
- task=countdown2345
|
||||
- algorithm=grpo
|
||||
- algorithm.training.curriculum_schedule=gaussian
|
||||
- model=qwen15
|
||||
- algorithm.training.max_steps=1600
|
||||
- algorithm.training.vllm_mode=colocate
|
||||
job:
|
||||
name: main
|
||||
chdir: false
|
||||
override_dirname: algorithm.training.curriculum_schedule=gaussian,algorithm.training.max_steps=1600,algorithm.training.vllm_mode=colocate,algorithm=grpo,mode=train,model=qwen15,task=countdown2345
|
||||
id: ???
|
||||
num: ???
|
||||
config_name: config
|
||||
env_set: {}
|
||||
env_copy: []
|
||||
config:
|
||||
override_dirname:
|
||||
kv_sep: '='
|
||||
item_sep: ','
|
||||
exclude_keys: []
|
||||
runtime:
|
||||
version: 1.3.2
|
||||
version_base: '1.3'
|
||||
cwd: /mnt/data/shared/shparashar/Sys2Bench
|
||||
config_sources:
|
||||
- path: hydra.conf
|
||||
schema: pkg
|
||||
provider: hydra
|
||||
- path: /mnt/data/shared/shparashar/Sys2Bench/methods/RL/conf
|
||||
schema: file
|
||||
provider: main
|
||||
- path: ''
|
||||
schema: structured
|
||||
provider: schema
|
||||
output_dir: /mnt/data/shared/shparashar/Sys2Bench/outputs/Qwen2.5-1.5B-Instruct_countdown2345_grpo_gaussian_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600
|
||||
choices:
|
||||
algorithm: grpo
|
||||
task: countdown2345
|
||||
model: qwen15
|
||||
hydra/env: default
|
||||
hydra/callbacks: null
|
||||
hydra/job_logging: default
|
||||
hydra/hydra_logging: default
|
||||
hydra/hydra_help: default
|
||||
hydra/help: default
|
||||
hydra/sweeper: basic
|
||||
hydra/launcher: basic
|
||||
hydra/output: default
|
||||
verbose: false
|
||||
7
.hydra/overrides.yaml
Normal file
7
.hydra/overrides.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
- mode=train
|
||||
- task=countdown2345
|
||||
- algorithm=grpo
|
||||
- algorithm.training.curriculum_schedule=gaussian
|
||||
- model=qwen15
|
||||
- algorithm.training.max_steps=1600
|
||||
- algorithm.training.vllm_mode=colocate
|
||||
Reference in New Issue
Block a user