初始化项目,由ModelHub XC社区提供模型

Model: InosLihka/rhythm-env-meta-trained-iter2
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-02 19:31:52 +08:00
commit 771a882fff
51 changed files with 493311 additions and 0 deletions

15
training_config.json Normal file
View File

@@ -0,0 +1,15 @@
{
"model_name": "unsloth/Qwen2.5-3B-Instruct",
"max_steps": 400,
"num_episodes": 120,
"max_samples": 1200,
"num_generations": 4,
"learning_rate": 5e-05,
"beta": 0.04,
"lora_rank": 8,
"hint_fraction": 0.0,
"profile_mode": "continuous",
"output_dir": "/tmp/rhythm_env/outputs/rhythmenv_meta_trained",
"use_simple_reward": false,
"report_to": "none"
}