11 lines
337 B
YAML
11 lines
337 B
YAML
|
|
- mode=train
|
||
|
|
- task=gsm8k
|
||
|
|
- algorithm=grpo
|
||
|
|
- algorithm.training.curriculum_schedule=gaussian
|
||
|
|
- model=qwen15
|
||
|
|
- algorithm.training.max_steps=1600
|
||
|
|
- algorithm.training.vllm_mode=colocate
|
||
|
|
- algorithm.training.scheduler_params.mu_exp=0.25
|
||
|
|
- algorithm.training.scheduler_params.sigma=0.75
|
||
|
|
- algorithm.training.scheduler_params.max_dapo_iter=4
|