Files
pyine-v1-qwen3-4b-shortcut/reward_state.json

196 lines
3.9 KiB
JSON
Raw Normal View History

{
"adapter": {
"error_count": 0,
"skip_count": 0,
"total_count": 0
},
"manager": {
"difficulty_estimator": {
"bin_reward_stats": [
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
{
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
}
],
"bin_reward_values": [
[],
[],
[],
[],
[],
[],
[],
[],
[],
[],
[]
],
"bin_term_reward_stats": {},
"corr_stats": {
"count": 0,
"sum_x": 0.0,
"sum_x2": 0.0,
"sum_xy": 0.0,
"sum_y": 0.0,
"sum_y2": 0.0
},
"missing_count": 0,
"override_skip_count": 0,
"score_stats": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
"score_values": [],
"secondary_corr_stats": {},
"secondary_stats": {},
"total_count": 0
},
"epoch": 0.3998664886515354,
"global_batch_counts": {
"eval/": 1403,
"train/": 600
},
"global_generation_counts": {
"eval/": 280600,
"train/": 576000
},
"local_generation_counts": {
"eval/": 115,
"train/": 0
},
"parsing_stats": {
"answer_length_tokens": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
"category_answer_length_tokens": {},
"category_malformed_count": {},
"category_missing_answer_count": {},
"category_missing_reasoning_count": {},
"category_output_length_tokens": {},
"category_reasoning_length_tokens": {},
"category_total_count": {},
"malformed_count": 0,
"missing_answer_count": 0,
"missing_reasoning_count": 0,
"output_length_tokens": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
"reasoning_length_tokens": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
"total_count": 0
},
"reward_category_stats": {},
"reward_category_term_stats": {},
"reward_term_stats": {
"soft_match": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
}
},
"reward_total_stats": {
"count": 0,
"max": NaN,
"min": NaN,
"sum": 0.0,
"sumsq": 0.0
},
"reward_total_values": [],
"reward_total_values_count": 0,
"step": null,
"total_global_batch_count": 2003,
"total_global_generation_count": 856600
},
"version": "0.1.4"
}