初始化项目,由ModelHub XC社区提供模型

Model: heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-16 08:16:17 +08:00
commit d5d0e722af
93 changed files with 4106 additions and 0 deletions

151
eval/trained_eval_rows.csv Normal file
View File

@@ -0,0 +1,151 @@
agent,seed,case_id,difficulty,reward,primary_reward,auxiliary_reward,contradictions_total,contradictions_triggered,contradictions_surfaced,questions_used,evidence_presented,evidence_timing_successes,blind_evidence_count,useless_questions_ratio,avg_question_length,model_repo,invalid_tool_calls
random,20260425,timeline_255d67,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260427,workplace_c98377,easy,0.0,0.0,-0.4,1,0,0,3,5,0,5,1.0,5.0,,
random,20260428,motive_66ff59,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260429,timeline_19bb78,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260430,timeline_a97690,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260431,alibi_67ffcd,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260432,alibi_423bca,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260433,knowledge_960d07,medium,0.0,0.0,-0.4,2,0,0,8,0,0,0,1.0,5.0,,
random,20260434,alibi_e829c1,easy,0.0,0.0,-0.4,1,0,0,7,1,0,1,1.0,5.0,,
random,20260435,motive_85e25b,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260436,knowledge_a599e3,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260437,motive_8bca20,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260438,corporate_6b1664,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260439,alibi_a6c582,easy,0.0,0.0,-0.4,1,0,0,8,0,0,0,1.0,5.0,,
random,20260440,workplace_835476,easy,0.0,0.0,-0.4,1,0,0,5,3,0,3,1.0,5.0,,
random,20260441,possession_a079c5,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
random,20260442,possession_9cc45d,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260443,possession_259aa5,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260444,corporate_76724c,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260445,timeline_767821,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260446,motive_c0d166,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260447,corporate_307934,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260448,timeline_592816,hard,0.0,0.0,-0.4,3,0,0,6,2,0,2,1.0,5.0,,
random,20260449,knowledge_b26824,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260450,knowledge_697785,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260451,timeline_81dafd,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260453,possession_dbb5fe,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260454,alibi_a4666f,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
keyword_spam,20260425,timeline_255d67,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260426,knowledge_b28f8c,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260427,workplace_c98377,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260428,motive_66ff59,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260429,timeline_19bb78,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260430,timeline_a97690,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260431,alibi_67ffcd,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260432,alibi_423bca,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260433,knowledge_960d07,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260434,alibi_e829c1,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260435,motive_85e25b,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260436,knowledge_a599e3,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260437,motive_8bca20,easy,0.0,0.0,-0.15000000000000002,1,0,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260438,corporate_6b1664,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260439,alibi_a6c582,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260440,workplace_835476,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260441,possession_a079c5,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260442,possession_9cc45d,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260444,corporate_76724c,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260445,timeline_767821,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260446,motive_c0d166,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260447,corporate_307934,hard,0.020000000000000007,0.0,0.10000000000000003,3,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260448,timeline_592816,hard,0.19,0.0,0.95,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260449,knowledge_b26824,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260450,knowledge_697785,hard,0.12000000000000002,0.0,0.6000000000000001,3,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260451,timeline_81dafd,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260452,corporate_8eb7d7,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260453,possession_dbb5fe,medium,0.030000000000000006,0.0,0.15000000000000002,2,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260454,alibi_a4666f,hard,0.15000000000000002,0.0,0.75,3,2,0,5,0,0,0,0.2,4.2,,
present_all,20260425,timeline_255d67,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260427,workplace_c98377,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260428,motive_66ff59,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260429,timeline_19bb78,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260430,timeline_a97690,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260431,alibi_67ffcd,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260432,alibi_423bca,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260433,knowledge_960d07,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260434,alibi_e829c1,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260435,motive_85e25b,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260436,knowledge_a599e3,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260437,motive_8bca20,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260438,corporate_6b1664,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260439,alibi_a6c582,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260440,workplace_835476,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260441,possession_a079c5,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260442,possession_9cc45d,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260444,corporate_76724c,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260445,timeline_767821,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260446,motive_c0d166,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260447,corporate_307934,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260448,timeline_592816,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260449,knowledge_b26824,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260450,knowledge_697785,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260451,timeline_81dafd,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260453,possession_dbb5fe,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260454,alibi_a4666f,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
scripted_oracle,20260425,timeline_255d67,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260426,knowledge_b28f8c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260427,workplace_c98377,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260428,motive_66ff59,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260429,timeline_19bb78,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260430,timeline_a97690,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260431,alibi_67ffcd,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260432,alibi_423bca,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260433,knowledge_960d07,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260434,alibi_e829c1,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260435,motive_85e25b,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260436,knowledge_a599e3,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260437,motive_8bca20,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260438,corporate_6b1664,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260439,alibi_a6c582,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260440,workplace_835476,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260441,possession_a079c5,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260442,possession_9cc45d,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260443,possession_259aa5,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260444,corporate_76724c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260445,timeline_767821,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260446,motive_c0d166,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260447,corporate_307934,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260448,timeline_592816,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260449,knowledge_b26824,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260450,knowledge_697785,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.6666666666666667,,
scripted_oracle,20260451,timeline_81dafd,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260452,corporate_8eb7d7,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260453,possession_dbb5fe,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260454,alibi_a4666f,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,2.3333333333333335,,
trained_sft_grpo_run2,20260425,timeline_255d67,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260426,knowledge_b28f8c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260427,workplace_c98377,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260428,motive_66ff59,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260429,timeline_19bb78,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,31.428571428571427,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260430,timeline_a97690,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,36.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260431,alibi_67ffcd,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260432,alibi_423bca,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260433,knowledge_960d07,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260434,alibi_e829c1,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,37.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260435,motive_85e25b,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,32.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260436,knowledge_a599e3,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260437,motive_8bca20,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260438,corporate_6b1664,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260439,alibi_a6c582,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260440,workplace_835476,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260441,possession_a079c5,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,47.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260442,possession_9cc45d,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260443,possession_259aa5,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,42.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260444,corporate_76724c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260445,timeline_767821,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,44.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260446,motive_c0d166,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260447,corporate_307934,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260448,timeline_592816,hard,0.010000000000000004,0.0,0.05000000000000002,3,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260449,knowledge_b26824,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260450,knowledge_697785,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260451,timeline_81dafd,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260452,corporate_8eb7d7,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260453,possession_dbb5fe,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260454,alibi_a4666f,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
1 agent seed case_id difficulty reward primary_reward auxiliary_reward contradictions_total contradictions_triggered contradictions_surfaced questions_used evidence_presented evidence_timing_successes blind_evidence_count useless_questions_ratio avg_question_length model_repo invalid_tool_calls
2 random 20260425 timeline_255d67 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
3 random 20260426 knowledge_b28f8c medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
4 random 20260427 workplace_c98377 easy 0.0 0.0 -0.4 1 0 0 3 5 0 5 1.0 5.0
5 random 20260428 motive_66ff59 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
6 random 20260429 timeline_19bb78 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
7 random 20260430 timeline_a97690 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
8 random 20260431 alibi_67ffcd medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
9 random 20260432 alibi_423bca medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
10 random 20260433 knowledge_960d07 medium 0.0 0.0 -0.4 2 0 0 8 0 0 0 1.0 5.0
11 random 20260434 alibi_e829c1 easy 0.0 0.0 -0.4 1 0 0 7 1 0 1 1.0 5.0
12 random 20260435 motive_85e25b hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
13 random 20260436 knowledge_a599e3 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
14 random 20260437 motive_8bca20 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
15 random 20260438 corporate_6b1664 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
16 random 20260439 alibi_a6c582 easy 0.0 0.0 -0.4 1 0 0 8 0 0 0 1.0 5.0
17 random 20260440 workplace_835476 easy 0.0 0.0 -0.4 1 0 0 5 3 0 3 1.0 5.0
18 random 20260441 possession_a079c5 hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
19 random 20260442 possession_9cc45d hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
20 random 20260443 possession_259aa5 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
21 random 20260444 corporate_76724c medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
22 random 20260445 timeline_767821 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
23 random 20260446 motive_c0d166 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
24 random 20260447 corporate_307934 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
25 random 20260448 timeline_592816 hard 0.0 0.0 -0.4 3 0 0 6 2 0 2 1.0 5.0
26 random 20260449 knowledge_b26824 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
27 random 20260450 knowledge_697785 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
28 random 20260451 timeline_81dafd medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
29 random 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
30 random 20260453 possession_dbb5fe medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
31 random 20260454 alibi_a4666f hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
32 keyword_spam 20260425 timeline_255d67 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
33 keyword_spam 20260426 knowledge_b28f8c medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
34 keyword_spam 20260427 workplace_c98377 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
35 keyword_spam 20260428 motive_66ff59 hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
36 keyword_spam 20260429 timeline_19bb78 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
37 keyword_spam 20260430 timeline_a97690 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
38 keyword_spam 20260431 alibi_67ffcd medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
39 keyword_spam 20260432 alibi_423bca medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
40 keyword_spam 20260433 knowledge_960d07 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
41 keyword_spam 20260434 alibi_e829c1 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
42 keyword_spam 20260435 motive_85e25b hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
43 keyword_spam 20260436 knowledge_a599e3 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
44 keyword_spam 20260437 motive_8bca20 easy 0.0 0.0 -0.15000000000000002 1 0 0 5 0 0 0 0.6 4.2
45 keyword_spam 20260438 corporate_6b1664 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
46 keyword_spam 20260439 alibi_a6c582 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
47 keyword_spam 20260440 workplace_835476 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
48 keyword_spam 20260441 possession_a079c5 hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
49 keyword_spam 20260442 possession_9cc45d hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
50 keyword_spam 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
51 keyword_spam 20260444 corporate_76724c medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
52 keyword_spam 20260445 timeline_767821 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
53 keyword_spam 20260446 motive_c0d166 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
54 keyword_spam 20260447 corporate_307934 hard 0.020000000000000007 0.0 0.10000000000000003 3 1 0 5 0 0 0 0.8 4.2
55 keyword_spam 20260448 timeline_592816 hard 0.19 0.0 0.95 3 3 0 5 0 0 0 0.2 4.2
56 keyword_spam 20260449 knowledge_b26824 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
57 keyword_spam 20260450 knowledge_697785 hard 0.12000000000000002 0.0 0.6000000000000001 3 2 0 5 0 0 0 0.4 4.2
58 keyword_spam 20260451 timeline_81dafd medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
59 keyword_spam 20260452 corporate_8eb7d7 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
60 keyword_spam 20260453 possession_dbb5fe medium 0.030000000000000006 0.0 0.15000000000000002 2 1 0 5 0 0 0 0.6 4.2
61 keyword_spam 20260454 alibi_a4666f hard 0.15000000000000002 0.0 0.75 3 2 0 5 0 0 0 0.2 4.2
62 present_all 20260425 timeline_255d67 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
63 present_all 20260426 knowledge_b28f8c medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
64 present_all 20260427 workplace_c98377 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
65 present_all 20260428 motive_66ff59 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
66 present_all 20260429 timeline_19bb78 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
67 present_all 20260430 timeline_a97690 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
68 present_all 20260431 alibi_67ffcd medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
69 present_all 20260432 alibi_423bca medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
70 present_all 20260433 knowledge_960d07 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
71 present_all 20260434 alibi_e829c1 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
72 present_all 20260435 motive_85e25b hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
73 present_all 20260436 knowledge_a599e3 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
74 present_all 20260437 motive_8bca20 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
75 present_all 20260438 corporate_6b1664 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
76 present_all 20260439 alibi_a6c582 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
77 present_all 20260440 workplace_835476 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
78 present_all 20260441 possession_a079c5 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
79 present_all 20260442 possession_9cc45d hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
80 present_all 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
81 present_all 20260444 corporate_76724c medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
82 present_all 20260445 timeline_767821 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
83 present_all 20260446 motive_c0d166 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
84 present_all 20260447 corporate_307934 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
85 present_all 20260448 timeline_592816 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
86 present_all 20260449 knowledge_b26824 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
87 present_all 20260450 knowledge_697785 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
88 present_all 20260451 timeline_81dafd medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
89 present_all 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
90 present_all 20260453 possession_dbb5fe medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
91 present_all 20260454 alibi_a4666f hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
92 scripted_oracle 20260425 timeline_255d67 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
93 scripted_oracle 20260426 knowledge_b28f8c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
94 scripted_oracle 20260427 workplace_c98377 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
95 scripted_oracle 20260428 motive_66ff59 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
96 scripted_oracle 20260429 timeline_19bb78 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
97 scripted_oracle 20260430 timeline_a97690 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
98 scripted_oracle 20260431 alibi_67ffcd medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
99 scripted_oracle 20260432 alibi_423bca medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
100 scripted_oracle 20260433 knowledge_960d07 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
101 scripted_oracle 20260434 alibi_e829c1 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
102 scripted_oracle 20260435 motive_85e25b hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
103 scripted_oracle 20260436 knowledge_a599e3 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
104 scripted_oracle 20260437 motive_8bca20 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
105 scripted_oracle 20260438 corporate_6b1664 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
106 scripted_oracle 20260439 alibi_a6c582 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
107 scripted_oracle 20260440 workplace_835476 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
108 scripted_oracle 20260441 possession_a079c5 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
109 scripted_oracle 20260442 possession_9cc45d hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
110 scripted_oracle 20260443 possession_259aa5 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
111 scripted_oracle 20260444 corporate_76724c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
112 scripted_oracle 20260445 timeline_767821 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
113 scripted_oracle 20260446 motive_c0d166 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
114 scripted_oracle 20260447 corporate_307934 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
115 scripted_oracle 20260448 timeline_592816 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
116 scripted_oracle 20260449 knowledge_b26824 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
117 scripted_oracle 20260450 knowledge_697785 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.6666666666666667
118 scripted_oracle 20260451 timeline_81dafd medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
119 scripted_oracle 20260452 corporate_8eb7d7 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
120 scripted_oracle 20260453 possession_dbb5fe medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
121 scripted_oracle 20260454 alibi_a4666f hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 2.3333333333333335
122 trained_sft_grpo_run2 20260425 timeline_255d67 easy 0.010000000000000004 0.0 0.05000000000000002 1 1 0 7 0 0 0 0.7142857142857143 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
123 trained_sft_grpo_run2 20260426 knowledge_b28f8c medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 45.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
124 trained_sft_grpo_run2 20260427 workplace_c98377 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 31.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
125 trained_sft_grpo_run2 20260428 motive_66ff59 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 31.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
126 trained_sft_grpo_run2 20260429 timeline_19bb78 easy 0.010000000000000004 0.0 0.05000000000000002 1 1 0 7 0 0 0 0.7142857142857143 31.428571428571427 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
127 trained_sft_grpo_run2 20260430 timeline_a97690 medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 36.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
128 trained_sft_grpo_run2 20260431 alibi_67ffcd medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
129 trained_sft_grpo_run2 20260432 alibi_423bca medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
130 trained_sft_grpo_run2 20260433 knowledge_960d07 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
131 trained_sft_grpo_run2 20260434 alibi_e829c1 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 37.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
132 trained_sft_grpo_run2 20260435 motive_85e25b hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 32.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
133 trained_sft_grpo_run2 20260436 knowledge_a599e3 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
134 trained_sft_grpo_run2 20260437 motive_8bca20 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 30.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
135 trained_sft_grpo_run2 20260438 corporate_6b1664 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
136 trained_sft_grpo_run2 20260439 alibi_a6c582 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
137 trained_sft_grpo_run2 20260440 workplace_835476 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 30.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
138 trained_sft_grpo_run2 20260441 possession_a079c5 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 47.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
139 trained_sft_grpo_run2 20260442 possession_9cc45d hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 43.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
140 trained_sft_grpo_run2 20260443 possession_259aa5 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 42.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
141 trained_sft_grpo_run2 20260444 corporate_76724c medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
142 trained_sft_grpo_run2 20260445 timeline_767821 medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 44.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
143 trained_sft_grpo_run2 20260446 motive_c0d166 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
144 trained_sft_grpo_run2 20260447 corporate_307934 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
145 trained_sft_grpo_run2 20260448 timeline_592816 hard 0.010000000000000004 0.0 0.05000000000000002 3 1 0 7 0 0 0 0.7142857142857143 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
146 trained_sft_grpo_run2 20260449 knowledge_b26824 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
147 trained_sft_grpo_run2 20260450 knowledge_697785 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 43.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
148 trained_sft_grpo_run2 20260451 timeline_81dafd medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
149 trained_sft_grpo_run2 20260452 corporate_8eb7d7 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
150 trained_sft_grpo_run2 20260453 possession_dbb5fe medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 45.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
151 trained_sft_grpo_run2 20260454 alibi_a4666f hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0