15 lines
374 B
JSON
15 lines
374 B
JSON
{
|
|
"model_name": "unsloth/Qwen2.5-3B-Instruct",
|
|
"max_steps": 400,
|
|
"num_episodes": 120,
|
|
"max_samples": 1200,
|
|
"num_generations": 4,
|
|
"learning_rate": 5e-05,
|
|
"beta": 0.04,
|
|
"lora_rank": 8,
|
|
"hint_fraction": 0.0,
|
|
"profile_mode": "continuous",
|
|
"output_dir": "/tmp/rhythm_env/outputs/rhythmenv_meta_trained",
|
|
"use_simple_reward": false,
|
|
"report_to": "none"
|
|
} |