Files
shopmanager-grpo-qwen3/training_summary.json

63 lines
1.1 KiB
JSON
Raw Normal View History

{
"loss": {
"final": 0.0,
"max": 0.1153,
"min": -0.009,
"mean": 0.01637777777777778,
"n": 18
},
"reward_total": {
"final": 0.7750625014305115,
"max": 0.8168656826019287,
"min": 0.7081500291824341,
"mean": 0.7689822945329878,
"n": 18
},
"reward_market": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"reward_warehouse": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"reward_showroom": {
"final": 0.0,
"max": 0.0,
"min": 0.0,
"mean": 0.0,
"n": 0
},
"n_log_rows": 19,
"output_dir": "/workspace/shopmanager-grpo-qwen3",
"run_config": {
"model": "Qwen/Qwen3-1.7B",
"env_url": "https://hard007ik-shopmanagereng.hf.space",
"dataset_size": 300,
"num_generations": 2,
"per_device_batch": 1,
"grad_accum": 32,
"max_completion_length": 64,
"max_turns": 15,
"lr": 5e-06,
"warmup_steps": 10,
"max_steps": -1,
"epochs": 1,
"vllm_gpu_mem": 0.3,
"reward_weights": [
1.0,
0.0,
0.0,
0.0
],
"precision": {
"bf16": true
}
}
}