初始化项目,由ModelHub XC社区提供模型

Model: Lambent/Qwen3-4B-Base-Continued-GRPO-Style-Karcher
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-29 16:56:43 +08:00
commit 38cee162c3
15 changed files with 152433 additions and 0 deletions

17
mergekit_config.yml Normal file
View File

@@ -0,0 +1,17 @@
models:
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-ao3_english-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_cs-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_math-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-arxiv_physics-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-bbc_news-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_cpp-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_javascript-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_markdown-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-github_python-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-wikipedia_english-mara-360m
- model: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave+../rlvr-envs/grpo-general-ao3style-360m
merge_method: karcher
dtype: bfloat16
tokenizer_source: ../rlvr-envs/Qwen3-4B-Base-Continued-GRPO-Wave