- mode=train - task=math - algorithm=grpo - algorithm.training.curriculum_schedule=cosine - model=qwen15 - algorithm.training.max_steps=1600 - algorithm.training.vllm_mode=colocate