models: - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-ao3_english-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_cs-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_math-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_physics-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-bbc_news-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_cpp-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_javascript-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_markdown-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_python-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-wikipedia_english-mara-360m - model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-general-ao3style-360m merge_method: karcher dtype: bfloat16 tokenizer_source: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave