Files
shopmanager-grpo-smoke-l4-v2/training_summary.json
ModelHub XC b8ef3bde9b 初始化项目,由ModelHub XC社区提供模型
Model: hard007ik/shopmanager-grpo-smoke-l4-v2
Source: Original Platform
2026-05-02 12:37:45 +08:00

63 lines
1.1 KiB
JSON

{
"loss": {
"final": 0.0,
"max": -0.0,
"min": -0.0,
"mean": 0.0,
"n": 8
},
"reward_total": {
"final": 0.8438500165939331,
"max": 0.9020000100135803,
"min": 0.800000011920929,
"mean": 0.8473062515258789,
"n": 8
},
"reward_market": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"reward_warehouse": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"reward_showroom": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"n_log_rows": 9,
"output_dir": "/ws/sm/shopmanager-grpo-smoke-l4-v2",
"run_config": {
"model": "Qwen/Qwen3-0.6B",
"env_url": "https://hard007ik-shopmanagereng.hf.space",
"dataset_size": 300,
"num_generations": 2,
"per_device_batch": 2,
"grad_accum": 1,
"max_completion_length": 64,
"max_turns": 15,
"lr": 5e-06,
"warmup_steps": 10,
"max_steps": 8,
"epochs": 1,
"vllm_gpu_mem": 0.3,
"reward_weights": [
1.0,
0.0,
0.0,
0.0
],
"precision": {
"bf16": true
}
}
}