9 lines
237 B
YAML
9 lines
237 B
YAML
- mode=train
|
|
- task=math
|
|
- algorithm=grpo
|
|
- algorithm.training.curriculum_schedule=gaussian
|
|
- model=qwen15
|
|
- algorithm.training.max_steps=1600
|
|
- algorithm.training.vllm_mode=colocate
|
|
- algorithm.training.vllm_gpu_memory_utilization=0.25
|