初始化项目,由ModelHub XC社区提供模型
Model: hard007ik/shopmanager-grpo-smoke-l4-v2 Source: Original Platform
This commit is contained in:
63
training_summary.json
Normal file
63
training_summary.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"loss": {
|
||||
"final": 0.0,
|
||||
"max": -0.0,
|
||||
"min": -0.0,
|
||||
"mean": 0.0,
|
||||
"n": 8
|
||||
},
|
||||
"reward_total": {
|
||||
"final": 0.8438500165939331,
|
||||
"max": 0.9020000100135803,
|
||||
"min": 0.800000011920929,
|
||||
"mean": 0.8473062515258789,
|
||||
"n": 8
|
||||
},
|
||||
"reward_market": {
|
||||
"final": 0.0,
|
||||
"max": 0.0,
|
||||
"min": 0.0,
|
||||
"mean": 0.0,
|
||||
"n": 0
|
||||
},
|
||||
"reward_warehouse": {
|
||||
"final": 0.0,
|
||||
"max": 0.0,
|
||||
"min": 0.0,
|
||||
"mean": 0.0,
|
||||
"n": 0
|
||||
},
|
||||
"reward_showroom": {
|
||||
"final": 0.0,
|
||||
"max": 0.0,
|
||||
"min": 0.0,
|
||||
"mean": 0.0,
|
||||
"n": 0
|
||||
},
|
||||
"n_log_rows": 9,
|
||||
"output_dir": "/ws/sm/shopmanager-grpo-smoke-l4-v2",
|
||||
"run_config": {
|
||||
"model": "Qwen/Qwen3-0.6B",
|
||||
"env_url": "https://hard007ik-shopmanagereng.hf.space",
|
||||
"dataset_size": 300,
|
||||
"num_generations": 2,
|
||||
"per_device_batch": 2,
|
||||
"grad_accum": 1,
|
||||
"max_completion_length": 64,
|
||||
"max_turns": 15,
|
||||
"lr": 5e-06,
|
||||
"warmup_steps": 10,
|
||||
"max_steps": 8,
|
||||
"epochs": 1,
|
||||
"vllm_gpu_mem": 0.3,
|
||||
"reward_weights": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"precision": {
|
||||
"bf16": true
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user