初始化项目,由ModelHub XC社区提供模型

Model: InosLihka/rhythm-env-meta-trained-iter1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-16 23:06:03 +08:00
commit 02f436bc80
36 changed files with 319837 additions and 0 deletions

15
training_config.json Normal file
View File

@@ -0,0 +1,15 @@
{
"model_name": "unsloth/Qwen2.5-3B-Instruct",
"max_steps": 200,
"num_episodes": 80,
"max_samples": 800,
"num_generations": 4,
"learning_rate": 5e-05,
"beta": 0.04,
"lora_rank": 8,
"hint_fraction": 0.15,
"profile_mode": "continuous",
"output_dir": "/tmp/rhythm_env/outputs/rhythmenv_meta_trained",
"use_simple_reward": false,
"report_to": "none"
}