63 lines
1.1 KiB
JSON
63 lines
1.1 KiB
JSON
{
|
|
"loss": {
|
|
"final": 0.0,
|
|
"max": 0.1153,
|
|
"min": -0.009,
|
|
"mean": 0.01637777777777778,
|
|
"n": 18
|
|
},
|
|
"reward_total": {
|
|
"final": 0.7750625014305115,
|
|
"max": 0.8168656826019287,
|
|
"min": 0.7081500291824341,
|
|
"mean": 0.7689822945329878,
|
|
"n": 18
|
|
},
|
|
"reward_market": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"reward_warehouse": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"reward_showroom": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"n_log_rows": 19,
|
|
"output_dir": "/workspace/shopmanager-grpo-qwen3",
|
|
"run_config": {
|
|
"model": "Qwen/Qwen3-1.7B",
|
|
"env_url": "https://hard007ik-shopmanagereng.hf.space",
|
|
"dataset_size": 300,
|
|
"num_generations": 2,
|
|
"per_device_batch": 1,
|
|
"grad_accum": 32,
|
|
"max_completion_length": 64,
|
|
"max_turns": 15,
|
|
"lr": 5e-06,
|
|
"warmup_steps": 10,
|
|
"max_steps": -1,
|
|
"epochs": 1,
|
|
"vllm_gpu_mem": 0.3,
|
|
"reward_weights": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
],
|
|
"precision": {
|
|
"bf16": true
|
|
}
|
|
}
|
|
} |