初始化项目,由ModelHub XC社区提供模型
Model: abhid1234/qwen-0.5b-tool-agent-grpo Source: Original Platform
This commit is contained in:
412
artifacts/eval_results.json
Normal file
412
artifacts/eval_results.json
Normal file
@@ -0,0 +1,412 @@
|
||||
{
|
||||
"step": 15,
|
||||
"scenarios_path": "data/scenarios_val.jsonl",
|
||||
"num_generations": 8,
|
||||
"total_scenarios": 50,
|
||||
"total_rollouts": 400,
|
||||
"successes": 18,
|
||||
"accuracy_pct": 4.5,
|
||||
"avg_reward": -1.8850000000000002,
|
||||
"per_scenario": [
|
||||
{
|
||||
"scenario_index": 0,
|
||||
"task": "Convert 98 kg to lbs.",
|
||||
"mean_reward": 3.125,
|
||||
"max_reward": 4.0,
|
||||
"success_count": 7,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 1,
|
||||
"task": "What is the speed of light?",
|
||||
"mean_reward": -2.5,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 2,
|
||||
"task": "What is the distance from Earth to the Sun in km in miles?",
|
||||
"mean_reward": 0.5625,
|
||||
"max_reward": 2.5,
|
||||
"success_count": 1,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 3,
|
||||
"task": "What is 441 plus 23?",
|
||||
"mean_reward": 2.25,
|
||||
"max_reward": 4.0,
|
||||
"success_count": 1,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 4,
|
||||
"task": "Convert 62 kg to lbs.",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 5,
|
||||
"task": "Which is hotter right now, London or Mumbai?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 6,
|
||||
"task": "What is 185 plus 89?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 7,
|
||||
"task": "What's the weather like in Dubai?",
|
||||
"mean_reward": -1.375,
|
||||
"max_reward": 4.0,
|
||||
"success_count": 2,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 8,
|
||||
"task": "What is the population of Germany divided by its area in km2?",
|
||||
"mean_reward": -2.041666666666667,
|
||||
"max_reward": 1.666666666666666,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 9,
|
||||
"task": "What is the boiling point of water?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 10,
|
||||
"task": "Which is hotter right now, London or Mumbai?",
|
||||
"mean_reward": -2.0,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 11,
|
||||
"task": "What is the population of India divided by its area in km2?",
|
||||
"mean_reward": -2.125,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 12,
|
||||
"task": "What is India's population density in people per square mile?",
|
||||
"mean_reward": -1.25,
|
||||
"max_reward": 1.333333333333333,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 13,
|
||||
"task": "What is the tallest mountain?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 14,
|
||||
"task": "What is the distance from Earth to the Sun in km in miles?",
|
||||
"mean_reward": -1.875,
|
||||
"max_reward": 1.5,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 15,
|
||||
"task": "What is the population of Japan divided by its area in km2?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 16,
|
||||
"task": "What is Germany's population density in people per square mile?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 17,
|
||||
"task": "Convert 74 kg to lbs.",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 18,
|
||||
"task": "Which is hotter right now, Paris or Cairo?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 19,
|
||||
"task": "What is India's population density in people per square mile?",
|
||||
"mean_reward": -1.5,
|
||||
"max_reward": 1.333333333333333,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 20,
|
||||
"task": "Which country has a larger population, France or Brazil?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 21,
|
||||
"task": "Convert 64 kg to lbs.",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 22,
|
||||
"task": "Which country has a larger population, Japan or India?",
|
||||
"mean_reward": -1.5625,
|
||||
"max_reward": 3.0,
|
||||
"success_count": 2,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 23,
|
||||
"task": "What is the GDP of Japan?",
|
||||
"mean_reward": -2.75,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 24,
|
||||
"task": "What is the population of France divided by its area in km2?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 25,
|
||||
"task": "What is France's population density in people per square mile?",
|
||||
"mean_reward": -2.5,
|
||||
"max_reward": 1.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 26,
|
||||
"task": "Convert 26 kg to lbs.",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 27,
|
||||
"task": "What is 660 times 87?",
|
||||
"mean_reward": 1.0,
|
||||
"max_reward": 4.0,
|
||||
"success_count": 1,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 28,
|
||||
"task": "What is the boiling point of water?",
|
||||
"mean_reward": -2.5,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 29,
|
||||
"task": "What is the population of Germany divided by its area in km2?",
|
||||
"mean_reward": -1.125,
|
||||
"max_reward": 2.333333333333333,
|
||||
"success_count": 1,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 30,
|
||||
"task": "Convert 40 kg to lbs.",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 31,
|
||||
"task": "What is the speed of light?",
|
||||
"mean_reward": -2.375,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 32,
|
||||
"task": "How old was Guido van Rossum in 2024?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 33,
|
||||
"task": "Which is hotter right now, Paris or Dubai?",
|
||||
"mean_reward": -3.0,
|
||||
"max_reward": -3.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 34,
|
||||
"task": "Which is hotter right now, Tokyo or Dubai?",
|
||||
"mean_reward": -2.875,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 35,
|
||||
"task": "Which is hotter right now, London or Cairo?",
|
||||
"mean_reward": -2.375,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 36,
|
||||
"task": "What is the value of pi?",
|
||||
"mean_reward": -2.125,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 37,
|
||||
"task": "What is the population of Japan divided by its area in km2?",
|
||||
"mean_reward": -1.6666666666666667,
|
||||
"max_reward": 1.666666666666666,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 38,
|
||||
"task": "What is the temperature in London in Fahrenheit?",
|
||||
"mean_reward": -1.375,
|
||||
"max_reward": 1.5,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 39,
|
||||
"task": "What is 464 plus 30?",
|
||||
"mean_reward": -1.75,
|
||||
"max_reward": 2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 40,
|
||||
"task": "Which country has a larger population, France or India?",
|
||||
"mean_reward": -2.1875,
|
||||
"max_reward": 2.5,
|
||||
"success_count": 1,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 41,
|
||||
"task": "What is the distance from Earth to the Sun in km in miles?",
|
||||
"mean_reward": -1.625,
|
||||
"max_reward": 2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 42,
|
||||
"task": "What is the tallest mountain?",
|
||||
"mean_reward": -1.125,
|
||||
"max_reward": 2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 43,
|
||||
"task": "What is the temperature in London in Fahrenheit?",
|
||||
"mean_reward": 0.6875,
|
||||
"max_reward": 2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 44,
|
||||
"task": "What is 496 minus 24?",
|
||||
"mean_reward": 1.0,
|
||||
"max_reward": 2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 45,
|
||||
"task": "What's the weather like in Cairo?",
|
||||
"mean_reward": -1.25,
|
||||
"max_reward": 4.0,
|
||||
"success_count": 2,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 46,
|
||||
"task": "What is the tallest mountain?",
|
||||
"mean_reward": -2.5,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 47,
|
||||
"task": "What is India's population density in people per square mile?",
|
||||
"mean_reward": -1.7916666666666667,
|
||||
"max_reward": 1.333333333333333,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 48,
|
||||
"task": "What is the GDP of France?",
|
||||
"mean_reward": -2.625,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
},
|
||||
{
|
||||
"scenario_index": 49,
|
||||
"task": "How old was Guido van Rossum in 2024?",
|
||||
"mean_reward": -2.75,
|
||||
"max_reward": -2.0,
|
||||
"success_count": 0,
|
||||
"total_attempts": 8
|
||||
}
|
||||
]
|
||||
}
|
||||
16
artifacts/reward_curve.txt
Normal file
16
artifacts/reward_curve.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
Avg reward: -0.208 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 1.969 | Avg tools/rollout: 1.0 | groups with variance: 1/4
|
||||
Avg reward: 0.854 | Avg tools/rollout: 1.0 | groups with variance: 4/4
|
||||
Avg reward: 1.193 | Avg tools/rollout: 0.9 | groups with variance: 3/4
|
||||
Avg reward: -2.094 | Avg tools/rollout: 0.8 | groups with variance: 3/4
|
||||
Avg reward: 0.505 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: -0.141 | Avg tools/rollout: 0.8 | groups with variance: 4/4
|
||||
Avg reward: -0.797 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 0.307 | Avg tools/rollout: 0.9 | groups with variance: 3/4
|
||||
Avg reward: -1.125 | Avg tools/rollout: 1.0 | groups with variance: 1/4
|
||||
Avg reward: -1.359 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 0.484 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: -0.073 | Avg tools/rollout: 0.9 | groups with variance: 4/4
|
||||
Avg reward: 1.740 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: 0.635 | Avg tools/rollout: 1.0 | groups with variance: 3/4
|
||||
Avg reward: 1.615 | Avg tools/rollout: 0.9 | groups with variance: 2/4
|
||||
1054
artifacts/training.log
Normal file
1054
artifacts/training.log
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user