- mode=train - task=math - algorithm=grpo - algorithm.training.curriculum_schedule=gaussian - model=qwen15 - algorithm.training.max_steps=1600 - algorithm.training.vllm_mode=colocate - algorithm.training.vllm_gpu_memory_utilization=0.25