- mode=train
- task=math
- algorithm=grpo
- algorithm.training.curriculum_schedule=cosine
- model=qwen15
- algorithm.training.max_steps=1600
- algorithm.training.vllm_mode=colocate