初始化项目,由ModelHub XC社区提供模型
Model: abhid1234/qwen-0.5b-tool-agent-grpo Source: Original Platform
This commit is contained in:
16
artifacts/reward_curve.txt
Normal file
16
artifacts/reward_curve.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
Avg reward: -0.208 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 1.969 | Avg tools/rollout: 1.0 | groups with variance: 1/4
|
||||
Avg reward: 0.854 | Avg tools/rollout: 1.0 | groups with variance: 4/4
|
||||
Avg reward: 1.193 | Avg tools/rollout: 0.9 | groups with variance: 3/4
|
||||
Avg reward: -2.094 | Avg tools/rollout: 0.8 | groups with variance: 3/4
|
||||
Avg reward: 0.505 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: -0.141 | Avg tools/rollout: 0.8 | groups with variance: 4/4
|
||||
Avg reward: -0.797 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 0.307 | Avg tools/rollout: 0.9 | groups with variance: 3/4
|
||||
Avg reward: -1.125 | Avg tools/rollout: 1.0 | groups with variance: 1/4
|
||||
Avg reward: -1.359 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 0.484 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: -0.073 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 1.740 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: 0.635 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: 1.615 | Avg tools/rollout: 0.9 | groups with variance: 2/4
|
||||
Reference in New Issue
Block a user