63 lines
1.1 KiB
JSON
63 lines
1.1 KiB
JSON
{
|
|
"loss": {
|
|
"final": 0.0,
|
|
"max": -0.0,
|
|
"min": -0.0,
|
|
"mean": 0.0,
|
|
"n": 8
|
|
},
|
|
"reward_total": {
|
|
"final": 0.8438500165939331,
|
|
"max": 0.9020000100135803,
|
|
"min": 0.800000011920929,
|
|
"mean": 0.8473062515258789,
|
|
"n": 8
|
|
},
|
|
"reward_market": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"reward_warehouse": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"reward_showroom": {
|
|
"final": 0.0,
|
|
"max": 0.0,
|
|
"min": 0.0,
|
|
"mean": 0.0,
|
|
"n": 0
|
|
},
|
|
"n_log_rows": 9,
|
|
"output_dir": "/ws/sm/shopmanager-grpo-smoke-l4-v2",
|
|
"run_config": {
|
|
"model": "Qwen/Qwen3-0.6B",
|
|
"env_url": "https://hard007ik-shopmanagereng.hf.space",
|
|
"dataset_size": 300,
|
|
"num_generations": 2,
|
|
"per_device_batch": 2,
|
|
"grad_accum": 1,
|
|
"max_completion_length": 64,
|
|
"max_turns": 15,
|
|
"lr": 5e-06,
|
|
"warmup_steps": 10,
|
|
"max_steps": 8,
|
|
"epochs": 1,
|
|
"vllm_gpu_mem": 0.3,
|
|
"reward_weights": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
],
|
|
"precision": {
|
|
"bf16": true
|
|
}
|
|
}
|
|
} |