- mode=train - task=gsm8k - algorithm=grpo - algorithm.training.curriculum_schedule=gaussian - model=qwen15 - algorithm.training.max_steps=1600 - algorithm.training.vllm_mode=colocate - algorithm.training.scheduler_params.mu_exp=0.25 - algorithm.training.scheduler_params.sigma=0.75 - algorithm.training.scheduler_params.max_dapo_iter=4