初始化项目,由ModelHub XC社区提供模型

Model: heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-16 08:16:17 +08:00
commit d5d0e722af
93 changed files with 4106 additions and 0 deletions

39
.gitattributes vendored Normal file
View File

@@ -0,0 +1,39 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
sft_warm_start/tokenizer.json filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

67
README.md Normal file
View File

@@ -0,0 +1,67 @@
---
library_name: transformers
model_name: counsel-sft-grpo-output
tags:
- generated_from_trainer
- hf_jobs
- trl
- grpo
licence: license
---
# Model Card for counsel-sft-grpo-output
This model is a fine-tuned version of [None](https://huggingface.co/None).
It has been trained using [TRL](https://github.com/huggingface/trl).
## Quick start
```python
from transformers import pipeline
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
generator = pipeline("text-generation", model="None", device="cuda")
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
print(output["generated_text"])
```
## Training procedure
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
### Framework versions
- TRL: 1.2.0
- Transformers: 5.6.2
- Pytorch: 2.11.0
- Datasets: 4.8.4
- Tokenizers: 0.22.2
## Citations
Cite GRPO as:
```bibtex
@article{shao2024deepseekmath,
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
year = 2024,
eprint = {arXiv:2402.03300},
}
```
Cite TRL as:
```bibtex
@software{vonwerra2020trl,
title = {{TRL: Transformers Reinforcement Learning}},
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
license = {Apache-2.0},
url = {https://github.com/huggingface/trl},
year = {2020}
}
```

89
chat_template.jinja Normal file
View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9cfa4edd146d430b8bbff0802fd8c777682b57609ee370304d76982a5919498b
size 2384234968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2ca431c190b97d71d34b7476e9c0b446b1f7a0854090fdc7233f7e708432c5f4
size 4768669395

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ce9d06b6c01a6abb92a5b52d5d361d96821b8e35f68158be1830efadebe0f334
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e084e51a71375a2e59178fefd4967d7cdc87b72fa22758078088a60489f32adb
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

View File

@@ -0,0 +1,78 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": true,
"local_files_only": false,
"max_length": 3072,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"stride": 0,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"truncation_strategy": "longest_first",
"unk_token": null
}

View File

@@ -0,0 +1,784 @@
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.78125,
"eval_steps": 500,
"global_step": 125,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 199.6,
"completions/max_terminated_length": 134.0,
"completions/mean_length": 171.9,
"completions/mean_terminated_length": 122.36666870117188,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"entropy": 0.06970996516756714,
"epoch": 0.03125,
"frac_reward_zero_std": 0.6,
"grad_norm": 3.630038261413574,
"kl": 0.00014932112862879875,
"learning_rate": 4.92e-06,
"loss": 0.029165178537368774,
"num_tokens": 15758.0,
"reward": -0.31389998495578764,
"reward_std": 0.2122000053524971,
"rewards/reward_func/mean": -0.31389998495578764,
"rewards/reward_func/std": 0.21219999492168426,
"step": 5,
"step_time": 14.728857926794444,
"tools/call_frequency": 3.45,
"tools/failure_frequency": 0.21573015451431274
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 168.6,
"completions/max_terminated_length": 168.6,
"completions/mean_length": 148.3,
"completions/mean_terminated_length": 148.3,
"completions/min_length": 129.6,
"completions/min_terminated_length": 129.6,
"entropy": 0.042718362715095284,
"epoch": 0.0625,
"frac_reward_zero_std": 0.2,
"grad_norm": 3.325033187866211,
"kl": 0.037860750965774057,
"learning_rate": 4.8200000000000004e-06,
"loss": -0.011221970617771148,
"num_tokens": 31053.0,
"reward": 0.2989000082015991,
"reward_std": 0.4415143087506294,
"rewards/reward_func/mean": 0.2989000082015991,
"rewards/reward_func/std": 0.4415143221616745,
"step": 10,
"step_time": 9.975367512006779,
"tools/call_frequency": 2.5,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 152.8,
"completions/max_terminated_length": 152.8,
"completions/mean_length": 131.3,
"completions/mean_terminated_length": 131.3,
"completions/min_length": 113.6,
"completions/min_terminated_length": 113.6,
"entropy": 0.016039706021547317,
"epoch": 0.09375,
"frac_reward_zero_std": 0.4,
"grad_norm": 1.1289054155349731,
"kl": 0.06640795171260834,
"learning_rate": 4.7200000000000005e-06,
"loss": 0.04752160608768463,
"num_tokens": 45857.0,
"reward": 1.1023000121116637,
"reward_std": 0.4320605039596558,
"rewards/reward_func/mean": 1.1023000121116637,
"rewards/reward_func/std": 0.43206052780151366,
"step": 15,
"step_time": 8.620344271202338,
"tools/call_frequency": 2.35,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1,
"completions/max_length": 172.0,
"completions/max_terminated_length": 172.0,
"completions/mean_length": 136.55,
"completions/mean_terminated_length": 137.2500030517578,
"completions/min_length": 111.4,
"completions/min_terminated_length": 111.4,
"entropy": 0.027425602450966834,
"epoch": 0.125,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.8991426229476929,
"kl": 0.09577701878733932,
"learning_rate": 4.620000000000001e-06,
"loss": -0.1201351523399353,
"num_tokens": 60826.0,
"reward": 0.7200000047683716,
"reward_std": 0.3419178485870361,
"rewards/reward_func/mean": 0.7200000047683716,
"rewards/reward_func/std": 0.3419178485870361,
"step": 20,
"step_time": 11.403528443601681,
"tools/call_frequency": 2.05,
"tools/failure_frequency": 0.026666668057441712
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.35,
"completions/max_length": 203.2,
"completions/max_terminated_length": 161.2,
"completions/mean_length": 188.6,
"completions/mean_terminated_length": 150.6666687011719,
"completions/min_length": 173.4,
"completions/min_terminated_length": 139.6,
"entropy": 0.033282498246990144,
"epoch": 0.15625,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.041987657546997,
"kl": 0.051508421916514634,
"learning_rate": 4.520000000000001e-06,
"loss": 0.03198407888412476,
"num_tokens": 76838.0,
"reward": 1.2669333696365357,
"reward_std": 0.3234894543886185,
"rewards/reward_func/mean": 1.2669333696365357,
"rewards/reward_func/std": 0.32348946332931516,
"step": 25,
"step_time": 13.736867211584467,
"tools/call_frequency": 4.4,
"tools/failure_frequency": 0.14583333432674409
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.65,
"completions/max_length": 209.4,
"completions/max_terminated_length": 160.8,
"completions/mean_length": 192.45,
"completions/mean_terminated_length": 148.3,
"completions/min_length": 169.2,
"completions/min_terminated_length": 135.8,
"entropy": 0.04025774166220799,
"epoch": 0.1875,
"frac_reward_zero_std": 0.4,
"grad_norm": 1.6383038759231567,
"kl": 0.09242036554496735,
"learning_rate": 4.42e-06,
"loss": -0.03659022152423859,
"num_tokens": 93054.0,
"reward": 1.0333500146865844,
"reward_std": 0.38981522917747496,
"rewards/reward_func/mean": 1.0333500146865844,
"rewards/reward_func/std": 0.389815217256546,
"step": 30,
"step_time": 14.735964270806289,
"tools/call_frequency": 3.85,
"tools/failure_frequency": 0.023529411852359773
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.7,
"completions/max_length": 207.4,
"completions/max_terminated_length": 168.8,
"completions/mean_length": 196.4,
"completions/mean_terminated_length": 166.7,
"completions/min_length": 191.2,
"completions/min_terminated_length": 164.6,
"entropy": 0.02646293715806678,
"epoch": 0.21875,
"frac_reward_zero_std": 0.2,
"grad_norm": 0.6842532157897949,
"kl": 0.09354882184416055,
"learning_rate": 4.32e-06,
"loss": 0.014650090038776398,
"num_tokens": 109141.0,
"reward": 1.0134333491325378,
"reward_std": 0.28623148798942566,
"rewards/reward_func/mean": 1.0134333491325378,
"rewards/reward_func/std": 0.2862314820289612,
"step": 35,
"step_time": 14.25194917320332,
"tools/call_frequency": 3.95,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.7,
"completions/max_length": 228.8,
"completions/max_terminated_length": 163.6,
"completions/mean_length": 207.8,
"completions/mean_terminated_length": 162.3,
"completions/min_length": 188.8,
"completions/min_terminated_length": 161.0,
"entropy": 0.049686831969302146,
"epoch": 0.25,
"frac_reward_zero_std": 0.4,
"grad_norm": 2.386836528778076,
"kl": 0.12552661653608083,
"learning_rate": 4.22e-06,
"loss": 0.023246073722839357,
"num_tokens": 125712.0,
"reward": 0.9764333426952362,
"reward_std": 0.3545127585530281,
"rewards/reward_func/mean": 0.9764333426952362,
"rewards/reward_func/std": 0.35451277494430544,
"step": 40,
"step_time": 16.735324517198023,
"tools/call_frequency": 3.45,
"tools/failure_frequency": 0.02857142984867096
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8,
"completions/max_length": 231.4,
"completions/max_terminated_length": 127.2,
"completions/mean_length": 209.4,
"completions/mean_terminated_length": 124.2,
"completions/min_length": 187.2,
"completions/min_terminated_length": 121.2,
"entropy": 0.14096241008955984,
"epoch": 0.28125,
"frac_reward_zero_std": 0.2,
"grad_norm": 5.072839260101318,
"kl": 0.10897002797573804,
"learning_rate": 4.12e-06,
"loss": 0.05337468385696411,
"num_tokens": 142131.0,
"reward": 1.0291000008583069,
"reward_std": 0.5297403573989868,
"rewards/reward_func/mean": 1.0291000008583069,
"rewards/reward_func/std": 0.5297403573989868,
"step": 45,
"step_time": 17.371078941601446,
"tools/call_frequency": 3.4,
"tools/failure_frequency": 0.01428571492433548
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 237.2,
"completions/max_terminated_length": 172.2,
"completions/mean_length": 201.7,
"completions/mean_terminated_length": 160.46666870117187,
"completions/min_length": 186.0,
"completions/min_terminated_length": 150.8,
"entropy": 0.1540619947016239,
"epoch": 0.3125,
"frac_reward_zero_std": 0.0,
"grad_norm": 5.2555952072143555,
"kl": 0.17082785218954086,
"learning_rate": 4.0200000000000005e-06,
"loss": 0.06733548641204834,
"num_tokens": 158431.0,
"reward": 0.8427666783332824,
"reward_std": 0.6860074520111084,
"rewards/reward_func/mean": 0.8427666783332824,
"rewards/reward_func/std": 0.6860074281692505,
"step": 50,
"step_time": 17.60776922639343,
"tools/call_frequency": 3.6,
"tools/failure_frequency": 0.027619048953056335
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.55,
"completions/max_length": 209.2,
"completions/max_terminated_length": 209.2,
"completions/mean_length": 198.0,
"completions/mean_terminated_length": 206.43333435058594,
"completions/min_length": 189.4,
"completions/min_terminated_length": 203.8,
"entropy": 0.11417091116309167,
"epoch": 0.34375,
"frac_reward_zero_std": 0.8,
"grad_norm": 0.1612984985113144,
"kl": 0.14481508396565915,
"learning_rate": 3.920000000000001e-06,
"loss": -0.0013940947130322457,
"num_tokens": 174665.0,
"reward": 1.337833333015442,
"reward_std": 0.04058598577976227,
"rewards/reward_func/mean": 1.337833333015442,
"rewards/reward_func/std": 0.04058598577976227,
"step": 55,
"step_time": 13.894916865596315,
"tools/call_frequency": 3.9,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.35,
"completions/max_length": 210.6,
"completions/max_terminated_length": 210.0,
"completions/mean_length": 189.7,
"completions/mean_terminated_length": 184.93333435058594,
"completions/min_length": 152.0,
"completions/min_terminated_length": 153.4,
"entropy": 0.18207021439447998,
"epoch": 0.375,
"frac_reward_zero_std": 0.2,
"grad_norm": 7.57163667678833,
"kl": 0.2769763808697462,
"learning_rate": 3.820000000000001e-06,
"loss": -0.08738029599189759,
"num_tokens": 190974.0,
"reward": 0.9539999723434448,
"reward_std": 0.24900673925876618,
"rewards/reward_func/mean": 0.9539999723434448,
"rewards/reward_func/std": 0.2490067459642887,
"step": 60,
"step_time": 13.735741792595945,
"tools/call_frequency": 3.35,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.55,
"completions/max_length": 213.6,
"completions/max_terminated_length": 186.6,
"completions/mean_length": 196.15,
"completions/mean_terminated_length": 181.2,
"completions/min_length": 173.2,
"completions/min_terminated_length": 177.0,
"entropy": 0.18931779703125357,
"epoch": 0.40625,
"frac_reward_zero_std": 0.2,
"grad_norm": 0.3368631601333618,
"kl": 0.19928277991712093,
"learning_rate": 3.7200000000000004e-06,
"loss": -0.03082091510295868,
"num_tokens": 207221.0,
"reward": 1.1948333382606506,
"reward_std": 0.3531351625919342,
"rewards/reward_func/mean": 1.1948333382606506,
"rewards/reward_func/std": 0.3531351566314697,
"step": 65,
"step_time": 14.853071747999639,
"tools/call_frequency": 3.45,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.45,
"completions/max_length": 232.4,
"completions/max_terminated_length": 231.4,
"completions/mean_length": 214.8,
"completions/mean_terminated_length": 221.10000305175782,
"completions/min_length": 199.4,
"completions/min_terminated_length": 211.6,
"entropy": 0.20331259737722576,
"epoch": 0.4375,
"frac_reward_zero_std": 0.6,
"grad_norm": 3.155299663543701,
"kl": 0.21616111248731612,
"learning_rate": 3.62e-06,
"loss": -0.014388753473758698,
"num_tokens": 223949.0,
"reward": 1.187999999523163,
"reward_std": 0.06400000005960464,
"rewards/reward_func/mean": 1.187999999523163,
"rewards/reward_func/std": 0.06399999856948853,
"step": 70,
"step_time": 15.7972018689994,
"tools/call_frequency": 3.3,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 228.0,
"completions/max_terminated_length": 180.2,
"completions/mean_length": 217.2,
"completions/mean_terminated_length": 174.9,
"completions/min_length": 208.2,
"completions/min_terminated_length": 169.6,
"entropy": 0.09874274502508343,
"epoch": 0.46875,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.1496252417564392,
"kl": 0.19251887053251265,
"learning_rate": 3.52e-06,
"loss": 0.0129203662276268,
"num_tokens": 240663.0,
"reward": 1.166100013256073,
"reward_std": 0.27513332962989806,
"rewards/reward_func/mean": 1.166100013256073,
"rewards/reward_func/std": 0.275133341550827,
"step": 75,
"step_time": 15.653593644002104,
"tools/call_frequency": 3.15,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 250.6,
"completions/max_terminated_length": 235.4,
"completions/mean_length": 217.2,
"completions/mean_terminated_length": 209.23333740234375,
"completions/min_length": 169.8,
"completions/min_terminated_length": 174.0,
"entropy": 0.18624852728098631,
"epoch": 0.5,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.9008662700653076,
"kl": 0.19779104925692081,
"learning_rate": 3.4200000000000007e-06,
"loss": -0.060715597867965695,
"num_tokens": 257232.0,
"reward": 1.094600009918213,
"reward_std": 0.533681058883667,
"rewards/reward_func/mean": 1.094600009918213,
"rewards/reward_func/std": 0.5336810708045959,
"step": 80,
"step_time": 16.87674882839783,
"tools/call_frequency": 2.7,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8,
"completions/max_length": 263.2,
"completions/max_terminated_length": 149.0,
"completions/mean_length": 238.9,
"completions/mean_terminated_length": 138.2,
"completions/min_length": 220.0,
"completions/min_terminated_length": 127.4,
"entropy": 0.06761846686713398,
"epoch": 0.53125,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.0583312027156353,
"kl": 0.16298045124858618,
"learning_rate": 3.3200000000000004e-06,
"loss": 0.0317715585231781,
"num_tokens": 274377.0,
"reward": 1.168333351612091,
"reward_std": 0.21399999260902405,
"rewards/reward_func/mean": 1.168333351612091,
"rewards/reward_func/std": 0.214000004529953,
"step": 85,
"step_time": 19.253501980405417,
"tools/call_frequency": 2.45,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9,
"completions/max_length": 252.0,
"completions/max_terminated_length": 79.6,
"completions/mean_length": 229.35,
"completions/mean_terminated_length": 79.6,
"completions/min_length": 212.6,
"completions/min_terminated_length": 79.6,
"entropy": 0.04304317501373589,
"epoch": 0.5625,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.05765737593173981,
"kl": 0.1589741975069046,
"learning_rate": 3.2200000000000005e-06,
"loss": -0.009884151071310044,
"num_tokens": 291640.0,
"reward": 1.0771000266075135,
"reward_std": 0.2571271777153015,
"rewards/reward_func/mean": 1.0771000266075135,
"rewards/reward_func/std": 0.257127183675766,
"step": 90,
"step_time": 19.810263851404308,
"tools/call_frequency": 2.7,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 252.2,
"completions/max_terminated_length": 155.6,
"completions/mean_length": 227.45,
"completions/mean_terminated_length": 154.7,
"completions/min_length": 202.6,
"completions/min_terminated_length": 153.8,
"entropy": 0.03856636304408312,
"epoch": 0.59375,
"frac_reward_zero_std": 0.2,
"grad_norm": 2.2899415493011475,
"kl": 0.18391469195485116,
"learning_rate": 3.12e-06,
"loss": 0.012278559803962707,
"num_tokens": 308671.0,
"reward": 0.9493666887283325,
"reward_std": 0.3057107627391815,
"rewards/reward_func/mean": 0.9493666887283325,
"rewards/reward_func/std": 0.3057107746601105,
"step": 95,
"step_time": 18.270148772597896,
"tools/call_frequency": 2.75,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.65,
"completions/max_length": 229.0,
"completions/max_terminated_length": 128.6,
"completions/mean_length": 210.05,
"completions/mean_terminated_length": 121.23333435058593,
"completions/min_length": 191.4,
"completions/min_terminated_length": 114.6,
"entropy": 0.03718785918317735,
"epoch": 0.625,
"frac_reward_zero_std": 0.2,
"grad_norm": 1.4016427993774414,
"kl": 0.19276840873062612,
"learning_rate": 3.0200000000000003e-06,
"loss": -0.02043401300907135,
"num_tokens": 325246.0,
"reward": 0.9758000135421753,
"reward_std": 0.439729905128479,
"rewards/reward_func/mean": 0.9758000135421753,
"rewards/reward_func/std": 0.439729905128479,
"step": 100,
"step_time": 16.536685503809714,
"tools/call_frequency": 3.4,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.45,
"completions/max_length": 231.0,
"completions/max_terminated_length": 175.6,
"completions/mean_length": 209.7,
"completions/mean_terminated_length": 164.73333435058595,
"completions/min_length": 197.2,
"completions/min_terminated_length": 156.2,
"entropy": 0.0890876273624599,
"epoch": 0.65625,
"frac_reward_zero_std": 0.6,
"grad_norm": 1.857412338256836,
"kl": 0.20793221928179265,
"learning_rate": 2.92e-06,
"loss": 0.010671529173851012,
"num_tokens": 341743.0,
"reward": 1.244200015068054,
"reward_std": 0.25437753796577456,
"rewards/reward_func/mean": 1.244200015068054,
"rewards/reward_func/std": 0.25437754988670347,
"step": 105,
"step_time": 14.550393618003,
"tools/call_frequency": 3.4,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 228.0,
"completions/max_terminated_length": 215.0,
"completions/mean_length": 211.05,
"completions/mean_terminated_length": 204.6,
"completions/min_length": 194.2,
"completions/min_terminated_length": 194.2,
"entropy": 0.09650332322344184,
"epoch": 0.6875,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.5915409922599792,
"kl": 0.1943995427340269,
"learning_rate": 2.82e-06,
"loss": -0.007803649455308914,
"num_tokens": 358474.0,
"reward": 0.9213667035102844,
"reward_std": 0.48010437488555907,
"rewards/reward_func/mean": 0.9213667035102844,
"rewards/reward_func/std": 0.480104398727417,
"step": 110,
"step_time": 15.29034832160105,
"tools/call_frequency": 3.3,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 244.8,
"completions/max_terminated_length": 244.8,
"completions/mean_length": 224.4,
"completions/mean_terminated_length": 232.93333435058594,
"completions/min_length": 204.4,
"completions/min_terminated_length": 219.8,
"entropy": 0.06257005939260125,
"epoch": 0.71875,
"frac_reward_zero_std": 0.2,
"grad_norm": 0.09643584489822388,
"kl": 0.18671961799263953,
"learning_rate": 2.7200000000000002e-06,
"loss": 0.0009367348626255989,
"num_tokens": 375512.0,
"reward": 0.9198000192642212,
"reward_std": 0.41239041090011597,
"rewards/reward_func/mean": 0.9198000192642212,
"rewards/reward_func/std": 0.41239042282104493,
"step": 115,
"step_time": 16.68962257000094,
"tools/call_frequency": 3.05,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 238.4,
"completions/max_terminated_length": 194.2,
"completions/mean_length": 221.6,
"completions/mean_terminated_length": 185.86666870117188,
"completions/min_length": 202.6,
"completions/min_terminated_length": 174.8,
"entropy": 0.19831047160550952,
"epoch": 0.75,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.06480103731155396,
"kl": 0.2127195455133915,
"learning_rate": 2.6200000000000003e-06,
"loss": -0.002893347479403019,
"num_tokens": 392259.0,
"reward": 1.1177000164985658,
"reward_std": 0.34459384679794314,
"rewards/reward_func/mean": 1.1177000164985658,
"rewards/reward_func/std": 0.34459385871887205,
"step": 120,
"step_time": 15.74592421480629,
"tools/call_frequency": 3.1,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.55,
"completions/max_length": 249.6,
"completions/max_terminated_length": 248.4,
"completions/mean_length": 225.9,
"completions/mean_terminated_length": 238.83333435058594,
"completions/min_length": 207.6,
"completions/min_terminated_length": 229.0,
"entropy": 0.12472135615535081,
"epoch": 0.78125,
"frac_reward_zero_std": 0.2,
"grad_norm": 1.3778189420700073,
"kl": 0.22096077986061574,
"learning_rate": 2.52e-06,
"loss": 0.018771570920944215,
"num_tokens": 409108.0,
"reward": 0.6021333426237107,
"reward_std": 0.6133833765983582,
"rewards/reward_func/mean": 0.6021333426237107,
"rewards/reward_func/std": 0.6133833885192871,
"step": 125,
"step_time": 17.135429813191877,
"tools/call_frequency": 3.0,
"tools/failure_frequency": 0.0
}
],
"logging_steps": 5,
"max_steps": 250,
"num_input_tokens_seen": 409108,
"num_train_epochs": 2,
"save_steps": 125,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
size 7185

View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5eb33e25e043c4103e6cee6aa637be513e7c2c0b7afb8cdd2325c7891721e22f
size 2384234968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ad30d0785567ceb6d1472c0c58b027a9569701ec0b3f7f9e1b6557412d8231c6
size 4768669395

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:63b0bd96b0d28d762e23adea21cbb0a4f5ae44ffadba986e6529b752e987c40c
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aa68c9756e4b45f9fc5e31507423afdfe40083f2b850885e7dec29d5f3970b1f
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

View File

@@ -0,0 +1,78 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": true,
"local_files_only": false,
"max_length": 3072,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"stride": 0,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"truncation_strategy": "longest_first",
"unk_token": null
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
size 7185

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b68e115f077b56d92a8687696043f68cf4d262206fbbc1ceed637b932511d25b
size 23966

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c3069f8445a69ba6dbb4859ea555ae1502c389e67a54572235e71ad1d67d588a
size 21823

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b1b7eb25a9ada0e3bcbe12c628fde6b177a446a889be8e5681f37c1feeb87f0d
size 22907

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a3fd829f54108e3882389b9e442c07a35d5d07e0367dc3be83d302a465ac1111
size 23546

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:60b6b7b89e0297abd8244bdc13e7a75d60b34766299cfd9db3de5f9b25373dad
size 24284

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2b8be5f615ecd18a856e38c4fc12d3f16df7e6f9fb2d4f573c6db0d11eb4e5d8
size 24890

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d02e8831c4091b1dbd8b0e07448cd8f22de4b3efe52ba57d6f6b6f311fa9560c
size 24136

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d20e76df2ff675580ce50d9bcb6fd4e6225342d09303f180762cabab1e24d09c
size 23457

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b42b35b140b93dfe80a48387ebaff03cfe2ae1af27020dbf01ed2b2ce92dc464
size 24536

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:10826040482a7a035f66f510420b4737fc33944af16909a4a04148272a4158af
size 23472

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6873b01aeb8204e7742e55ce05030ae49384e5730f49dfed84096525919bea00
size 24304

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:de8626c9427a3c1b2cc33599dfc77755310fb356c4960e0f3e2975351aa7225b
size 22618

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:696b5af00da3c47c2729346a733fb873d978694c2a5959ecdee947d051cfe54a
size 24365

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d0874277f72d414e338612ec8eadd8acd4aea6f03be1582c71194bf838b69db1
size 24000

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:08a237f024f5f74a46c8bcced98366ccda0e7630e8b479c1df3bbdfb6851b4e9
size 24879

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4850fbf4fbb8b1944b6c971e91da5f510681ad9664e35e56828e9b50cea0db0e
size 24416

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:072d189c7cc2ca7e12abaf7cab8175b6d8d5dd4f93a34aeddcfb9f3630584163
size 24320

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:30acbe037313f6f3ffae9cb3d097d58f86c5c4df5a3455a5613e8a120fe28ff3
size 24495

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a1ac198276f2647c859589db083a8bc23b707dd3e780deee44591fdbeae2d821
size 24939

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fbfbcd01e3e49dc20f3c4ec6f5bae29692d9ff1932d27696cd773a9eaa31f733
size 23622

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ca57fb90811d63b27b46fa9f9b87e40101195a39f726df56b9ca70043357f768
size 24260

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9d834799c7968349955dfdeadd84971539773c304f681ac3d1632477d39bd47e
size 24949

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef13f61defd07f329d24be7ac6761b10e5175729fae5db04e1373ef3bb8389b6
size 24970

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d11d282244295c8c553d003ec90504d91d25855b55e6b8260ab930ac1f081ba2
size 24570

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7b8f35966c5bc1c9fd9fe0d3ce6cd8eb9c8b65395f7aae5182d67c4f08f0ab54
size 25114

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:88b54d548b96ad67e2cf580dd46b0cee2f707f7d117741ed1e93b9b2cb0e457f
size 23209

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:749f829e0806c9f9432492597b18573f0ff93c4f3f30c0745b6354de72518c60
size 24387

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b92f197fe307553295b686c5b5bd8a43322139886c0cedef75665de87f66c440
size 24489

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7028e21d8f6dd9e32cb076cf66228d646906fb904c70ed84f80426d786ec5ed
size 25681

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:da2ca6a5fe353e3c7e435c775fb487f923367b9a2e5769605628185afc7d3ba0
size 25240

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0690b76b28598fa815fe1f1c312144318210684badc5711a60d8e929154616cd
size 23230

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4fc270c3a3ed5a7bde9eeb02321c46c616e15042f917a763be2705a1fa80bad9
size 24247

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ae2c4bcabbb25bb7e70be2a3c27fab24a0cd675f11a15fe9361d4e73e75c0f3
size 24118

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4e954edee445dfed87e8c114b9c0b52f02ebdb8fbe9096b3b150a21689f391c4
size 24839

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3a8c8730b512d568a457f95a12eaab65511d8bcc5a6fbb6df79e7f9919aaac8d
size 25469

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e8d4b6a551b0c66d356a5919397b6cfe8da2453e193cd12cc77c925ad2a7c940
size 24879

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:af62c422876a65648f0c8357d7aaea0605ff56d7b7d329e657eeeb49310a8cab
size 25086

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:104bdb0177fb32febf5ad69d99c089aef23e0cdf858fef9f51f6837975877e94
size 24995

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:79dad7982eb1657e24c2d30b56e8065fbe658ee1bbcbb3825273cadcbd278bf1
size 23769

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:03b88568b249769abd964dccb3a029392ff4188a6f2e2e0d61e49e45b8734cb3
size 24790

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3ce7ed5ff6eda7d382bade212a53fcb422b3e192e220ce3758f753cf75259c8d
size 23958

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:19b48165efd963ddf763d266c16377a9f1a10ac8d1ea4ab6fec47be41ca39897
size 26131

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3c0ccb348d1be8c1e0a1b9287c1f7be4d87d75b59f14bcce50147d6c0970d71d
size 23995

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:09658361715b206e755041e17a00d8be401b27d49b5fb02665d2dd406a7dca14
size 25117

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6e73eda9b2dfe5b8e3e99ca7669bf7f028a5afdd65e64b08432831449b4a8d97
size 25627

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:14aed69014783ad4eb182afdbae30c06cbe8da270bd87a6a70382d3aa8870fdf
size 24619

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:49fcedabb97e8a9db277be5a8442ff7a4bdf53c9dbb09f1081e8f1d656cd7f9c
size 25408

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:54a2bc4b721e6d58161fe6f2405d5861c5a7aa33ecad498dc3923ce85a84f6b8
size 25454

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6ce3a8098ceb8361a7bdb1d7a3a2f0cbc26e3876257dca9219ae45d2a8cd2eb1
size 24477

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2e999027fab4f80f916ee346b91deeab0d7bf5d6c537c2ecbcfe706ea807b7de
size 25162

63
config.json Normal file
View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

151
eval/trained_eval_rows.csv Normal file
View File

@@ -0,0 +1,151 @@
agent,seed,case_id,difficulty,reward,primary_reward,auxiliary_reward,contradictions_total,contradictions_triggered,contradictions_surfaced,questions_used,evidence_presented,evidence_timing_successes,blind_evidence_count,useless_questions_ratio,avg_question_length,model_repo,invalid_tool_calls
random,20260425,timeline_255d67,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260427,workplace_c98377,easy,0.0,0.0,-0.4,1,0,0,3,5,0,5,1.0,5.0,,
random,20260428,motive_66ff59,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260429,timeline_19bb78,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260430,timeline_a97690,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260431,alibi_67ffcd,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260432,alibi_423bca,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260433,knowledge_960d07,medium,0.0,0.0,-0.4,2,0,0,8,0,0,0,1.0,5.0,,
random,20260434,alibi_e829c1,easy,0.0,0.0,-0.4,1,0,0,7,1,0,1,1.0,5.0,,
random,20260435,motive_85e25b,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260436,knowledge_a599e3,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260437,motive_8bca20,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260438,corporate_6b1664,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260439,alibi_a6c582,easy,0.0,0.0,-0.4,1,0,0,8,0,0,0,1.0,5.0,,
random,20260440,workplace_835476,easy,0.0,0.0,-0.4,1,0,0,5,3,0,3,1.0,5.0,,
random,20260441,possession_a079c5,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
random,20260442,possession_9cc45d,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260443,possession_259aa5,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260444,corporate_76724c,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260445,timeline_767821,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260446,motive_c0d166,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260447,corporate_307934,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260448,timeline_592816,hard,0.0,0.0,-0.4,3,0,0,6,2,0,2,1.0,5.0,,
random,20260449,knowledge_b26824,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260450,knowledge_697785,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260451,timeline_81dafd,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260453,possession_dbb5fe,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260454,alibi_a4666f,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
keyword_spam,20260425,timeline_255d67,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260426,knowledge_b28f8c,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260427,workplace_c98377,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260428,motive_66ff59,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260429,timeline_19bb78,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260430,timeline_a97690,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260431,alibi_67ffcd,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260432,alibi_423bca,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260433,knowledge_960d07,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260434,alibi_e829c1,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260435,motive_85e25b,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260436,knowledge_a599e3,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260437,motive_8bca20,easy,0.0,0.0,-0.15000000000000002,1,0,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260438,corporate_6b1664,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260439,alibi_a6c582,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260440,workplace_835476,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260441,possession_a079c5,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260442,possession_9cc45d,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260444,corporate_76724c,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260445,timeline_767821,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260446,motive_c0d166,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260447,corporate_307934,hard,0.020000000000000007,0.0,0.10000000000000003,3,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260448,timeline_592816,hard,0.19,0.0,0.95,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260449,knowledge_b26824,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260450,knowledge_697785,hard,0.12000000000000002,0.0,0.6000000000000001,3,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260451,timeline_81dafd,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260452,corporate_8eb7d7,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260453,possession_dbb5fe,medium,0.030000000000000006,0.0,0.15000000000000002,2,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260454,alibi_a4666f,hard,0.15000000000000002,0.0,0.75,3,2,0,5,0,0,0,0.2,4.2,,
present_all,20260425,timeline_255d67,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260427,workplace_c98377,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260428,motive_66ff59,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260429,timeline_19bb78,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260430,timeline_a97690,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260431,alibi_67ffcd,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260432,alibi_423bca,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260433,knowledge_960d07,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260434,alibi_e829c1,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260435,motive_85e25b,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260436,knowledge_a599e3,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260437,motive_8bca20,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260438,corporate_6b1664,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260439,alibi_a6c582,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260440,workplace_835476,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260441,possession_a079c5,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260442,possession_9cc45d,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260444,corporate_76724c,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260445,timeline_767821,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260446,motive_c0d166,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260447,corporate_307934,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260448,timeline_592816,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260449,knowledge_b26824,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260450,knowledge_697785,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260451,timeline_81dafd,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260453,possession_dbb5fe,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260454,alibi_a4666f,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
scripted_oracle,20260425,timeline_255d67,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260426,knowledge_b28f8c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260427,workplace_c98377,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260428,motive_66ff59,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260429,timeline_19bb78,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260430,timeline_a97690,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260431,alibi_67ffcd,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260432,alibi_423bca,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260433,knowledge_960d07,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260434,alibi_e829c1,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260435,motive_85e25b,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260436,knowledge_a599e3,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260437,motive_8bca20,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260438,corporate_6b1664,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260439,alibi_a6c582,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260440,workplace_835476,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260441,possession_a079c5,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260442,possession_9cc45d,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260443,possession_259aa5,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260444,corporate_76724c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260445,timeline_767821,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260446,motive_c0d166,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260447,corporate_307934,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260448,timeline_592816,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260449,knowledge_b26824,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260450,knowledge_697785,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.6666666666666667,,
scripted_oracle,20260451,timeline_81dafd,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260452,corporate_8eb7d7,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260453,possession_dbb5fe,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260454,alibi_a4666f,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,2.3333333333333335,,
trained_sft_grpo_run2,20260425,timeline_255d67,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260426,knowledge_b28f8c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260427,workplace_c98377,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260428,motive_66ff59,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260429,timeline_19bb78,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,31.428571428571427,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260430,timeline_a97690,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,36.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260431,alibi_67ffcd,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260432,alibi_423bca,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260433,knowledge_960d07,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260434,alibi_e829c1,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,37.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260435,motive_85e25b,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,32.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260436,knowledge_a599e3,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260437,motive_8bca20,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260438,corporate_6b1664,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260439,alibi_a6c582,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260440,workplace_835476,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260441,possession_a079c5,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,47.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260442,possession_9cc45d,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260443,possession_259aa5,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,42.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260444,corporate_76724c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260445,timeline_767821,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,44.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260446,motive_c0d166,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260447,corporate_307934,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260448,timeline_592816,hard,0.010000000000000004,0.0,0.05000000000000002,3,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260449,knowledge_b26824,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260450,knowledge_697785,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260451,timeline_81dafd,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260452,corporate_8eb7d7,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260453,possession_dbb5fe,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
trained_sft_grpo_run2,20260454,alibi_a4666f,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
1 agent seed case_id difficulty reward primary_reward auxiliary_reward contradictions_total contradictions_triggered contradictions_surfaced questions_used evidence_presented evidence_timing_successes blind_evidence_count useless_questions_ratio avg_question_length model_repo invalid_tool_calls
2 random 20260425 timeline_255d67 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
3 random 20260426 knowledge_b28f8c medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
4 random 20260427 workplace_c98377 easy 0.0 0.0 -0.4 1 0 0 3 5 0 5 1.0 5.0
5 random 20260428 motive_66ff59 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
6 random 20260429 timeline_19bb78 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
7 random 20260430 timeline_a97690 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
8 random 20260431 alibi_67ffcd medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
9 random 20260432 alibi_423bca medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
10 random 20260433 knowledge_960d07 medium 0.0 0.0 -0.4 2 0 0 8 0 0 0 1.0 5.0
11 random 20260434 alibi_e829c1 easy 0.0 0.0 -0.4 1 0 0 7 1 0 1 1.0 5.0
12 random 20260435 motive_85e25b hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
13 random 20260436 knowledge_a599e3 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
14 random 20260437 motive_8bca20 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
15 random 20260438 corporate_6b1664 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
16 random 20260439 alibi_a6c582 easy 0.0 0.0 -0.4 1 0 0 8 0 0 0 1.0 5.0
17 random 20260440 workplace_835476 easy 0.0 0.0 -0.4 1 0 0 5 3 0 3 1.0 5.0
18 random 20260441 possession_a079c5 hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
19 random 20260442 possession_9cc45d hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
20 random 20260443 possession_259aa5 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
21 random 20260444 corporate_76724c medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
22 random 20260445 timeline_767821 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
23 random 20260446 motive_c0d166 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
24 random 20260447 corporate_307934 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
25 random 20260448 timeline_592816 hard 0.0 0.0 -0.4 3 0 0 6 2 0 2 1.0 5.0
26 random 20260449 knowledge_b26824 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
27 random 20260450 knowledge_697785 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
28 random 20260451 timeline_81dafd medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
29 random 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
30 random 20260453 possession_dbb5fe medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
31 random 20260454 alibi_a4666f hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
32 keyword_spam 20260425 timeline_255d67 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
33 keyword_spam 20260426 knowledge_b28f8c medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
34 keyword_spam 20260427 workplace_c98377 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
35 keyword_spam 20260428 motive_66ff59 hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
36 keyword_spam 20260429 timeline_19bb78 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
37 keyword_spam 20260430 timeline_a97690 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
38 keyword_spam 20260431 alibi_67ffcd medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
39 keyword_spam 20260432 alibi_423bca medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
40 keyword_spam 20260433 knowledge_960d07 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
41 keyword_spam 20260434 alibi_e829c1 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
42 keyword_spam 20260435 motive_85e25b hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
43 keyword_spam 20260436 knowledge_a599e3 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
44 keyword_spam 20260437 motive_8bca20 easy 0.0 0.0 -0.15000000000000002 1 0 0 5 0 0 0 0.6 4.2
45 keyword_spam 20260438 corporate_6b1664 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
46 keyword_spam 20260439 alibi_a6c582 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
47 keyword_spam 20260440 workplace_835476 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
48 keyword_spam 20260441 possession_a079c5 hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
49 keyword_spam 20260442 possession_9cc45d hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
50 keyword_spam 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
51 keyword_spam 20260444 corporate_76724c medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
52 keyword_spam 20260445 timeline_767821 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
53 keyword_spam 20260446 motive_c0d166 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
54 keyword_spam 20260447 corporate_307934 hard 0.020000000000000007 0.0 0.10000000000000003 3 1 0 5 0 0 0 0.8 4.2
55 keyword_spam 20260448 timeline_592816 hard 0.19 0.0 0.95 3 3 0 5 0 0 0 0.2 4.2
56 keyword_spam 20260449 knowledge_b26824 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
57 keyword_spam 20260450 knowledge_697785 hard 0.12000000000000002 0.0 0.6000000000000001 3 2 0 5 0 0 0 0.4 4.2
58 keyword_spam 20260451 timeline_81dafd medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
59 keyword_spam 20260452 corporate_8eb7d7 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
60 keyword_spam 20260453 possession_dbb5fe medium 0.030000000000000006 0.0 0.15000000000000002 2 1 0 5 0 0 0 0.6 4.2
61 keyword_spam 20260454 alibi_a4666f hard 0.15000000000000002 0.0 0.75 3 2 0 5 0 0 0 0.2 4.2
62 present_all 20260425 timeline_255d67 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
63 present_all 20260426 knowledge_b28f8c medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
64 present_all 20260427 workplace_c98377 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
65 present_all 20260428 motive_66ff59 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
66 present_all 20260429 timeline_19bb78 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
67 present_all 20260430 timeline_a97690 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
68 present_all 20260431 alibi_67ffcd medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
69 present_all 20260432 alibi_423bca medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
70 present_all 20260433 knowledge_960d07 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
71 present_all 20260434 alibi_e829c1 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
72 present_all 20260435 motive_85e25b hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
73 present_all 20260436 knowledge_a599e3 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
74 present_all 20260437 motive_8bca20 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
75 present_all 20260438 corporate_6b1664 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
76 present_all 20260439 alibi_a6c582 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
77 present_all 20260440 workplace_835476 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
78 present_all 20260441 possession_a079c5 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
79 present_all 20260442 possession_9cc45d hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
80 present_all 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
81 present_all 20260444 corporate_76724c medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
82 present_all 20260445 timeline_767821 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
83 present_all 20260446 motive_c0d166 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
84 present_all 20260447 corporate_307934 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
85 present_all 20260448 timeline_592816 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
86 present_all 20260449 knowledge_b26824 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
87 present_all 20260450 knowledge_697785 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
88 present_all 20260451 timeline_81dafd medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
89 present_all 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
90 present_all 20260453 possession_dbb5fe medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
91 present_all 20260454 alibi_a4666f hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
92 scripted_oracle 20260425 timeline_255d67 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
93 scripted_oracle 20260426 knowledge_b28f8c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
94 scripted_oracle 20260427 workplace_c98377 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
95 scripted_oracle 20260428 motive_66ff59 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
96 scripted_oracle 20260429 timeline_19bb78 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
97 scripted_oracle 20260430 timeline_a97690 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
98 scripted_oracle 20260431 alibi_67ffcd medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
99 scripted_oracle 20260432 alibi_423bca medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
100 scripted_oracle 20260433 knowledge_960d07 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
101 scripted_oracle 20260434 alibi_e829c1 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
102 scripted_oracle 20260435 motive_85e25b hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
103 scripted_oracle 20260436 knowledge_a599e3 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
104 scripted_oracle 20260437 motive_8bca20 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
105 scripted_oracle 20260438 corporate_6b1664 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
106 scripted_oracle 20260439 alibi_a6c582 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
107 scripted_oracle 20260440 workplace_835476 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
108 scripted_oracle 20260441 possession_a079c5 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
109 scripted_oracle 20260442 possession_9cc45d hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
110 scripted_oracle 20260443 possession_259aa5 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
111 scripted_oracle 20260444 corporate_76724c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
112 scripted_oracle 20260445 timeline_767821 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
113 scripted_oracle 20260446 motive_c0d166 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
114 scripted_oracle 20260447 corporate_307934 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
115 scripted_oracle 20260448 timeline_592816 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
116 scripted_oracle 20260449 knowledge_b26824 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
117 scripted_oracle 20260450 knowledge_697785 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.6666666666666667
118 scripted_oracle 20260451 timeline_81dafd medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
119 scripted_oracle 20260452 corporate_8eb7d7 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
120 scripted_oracle 20260453 possession_dbb5fe medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
121 scripted_oracle 20260454 alibi_a4666f hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 2.3333333333333335
122 trained_sft_grpo_run2 20260425 timeline_255d67 easy 0.010000000000000004 0.0 0.05000000000000002 1 1 0 7 0 0 0 0.7142857142857143 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
123 trained_sft_grpo_run2 20260426 knowledge_b28f8c medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 45.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
124 trained_sft_grpo_run2 20260427 workplace_c98377 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 31.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
125 trained_sft_grpo_run2 20260428 motive_66ff59 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 31.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
126 trained_sft_grpo_run2 20260429 timeline_19bb78 easy 0.010000000000000004 0.0 0.05000000000000002 1 1 0 7 0 0 0 0.7142857142857143 31.428571428571427 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
127 trained_sft_grpo_run2 20260430 timeline_a97690 medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 36.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
128 trained_sft_grpo_run2 20260431 alibi_67ffcd medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
129 trained_sft_grpo_run2 20260432 alibi_423bca medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
130 trained_sft_grpo_run2 20260433 knowledge_960d07 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
131 trained_sft_grpo_run2 20260434 alibi_e829c1 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 37.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
132 trained_sft_grpo_run2 20260435 motive_85e25b hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 32.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
133 trained_sft_grpo_run2 20260436 knowledge_a599e3 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
134 trained_sft_grpo_run2 20260437 motive_8bca20 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 30.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
135 trained_sft_grpo_run2 20260438 corporate_6b1664 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
136 trained_sft_grpo_run2 20260439 alibi_a6c582 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
137 trained_sft_grpo_run2 20260440 workplace_835476 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 30.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
138 trained_sft_grpo_run2 20260441 possession_a079c5 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 47.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
139 trained_sft_grpo_run2 20260442 possession_9cc45d hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 43.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
140 trained_sft_grpo_run2 20260443 possession_259aa5 easy 0.8200000000000001 1.0 0.09999999999999998 1 1 1 7 1 1 0 0.8571428571428571 42.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
141 trained_sft_grpo_run2 20260444 corporate_76724c medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
142 trained_sft_grpo_run2 20260445 timeline_767821 medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 44.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
143 trained_sft_grpo_run2 20260446 motive_c0d166 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
144 trained_sft_grpo_run2 20260447 corporate_307934 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
145 trained_sft_grpo_run2 20260448 timeline_592816 hard 0.010000000000000004 0.0 0.05000000000000002 3 1 0 7 0 0 0 0.7142857142857143 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
146 trained_sft_grpo_run2 20260449 knowledge_b26824 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
147 trained_sft_grpo_run2 20260450 knowledge_697785 hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 43.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
148 trained_sft_grpo_run2 20260451 timeline_81dafd medium 0.010000000000000004 0.0 0.05000000000000002 2 1 0 7 0 0 0 0.7142857142857143 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
149 trained_sft_grpo_run2 20260452 corporate_8eb7d7 medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
150 trained_sft_grpo_run2 20260453 possession_dbb5fe medium 0.42000000000000004 0.5 0.09999999999999998 2 1 1 7 1 1 0 0.8571428571428571 45.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0
151 trained_sft_grpo_run2 20260454 alibi_a4666f hard 0.2866666666666667 0.3333333333333333 0.09999999999999998 3 1 1 7 1 1 0 0.8571428571428571 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 0.0

View File

@@ -0,0 +1,150 @@
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 3, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 1.0}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260425, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260426, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260428, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260429, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260430, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260431, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260432, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260433, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260434, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260435, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260436, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260438, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260439, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260441, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260442, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260444, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260445, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260446, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260447, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.95, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.19, "seed": 20260448, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260449, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260450, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260451, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260452, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260453, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.75, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.15000000000000002, "seed": 20260454, "useless_questions_ratio": 0.2}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260425, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260426, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260427, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260428, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260429, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_a97690", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260430, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260431, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260432, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260433, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260434, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260435, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260436, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260437, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260438, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260439, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260440, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260441, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260442, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260443, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260444, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_767821", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260445, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260446, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260447, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260448, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260449, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.6666666666666667, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260450, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_81dafd", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260451, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260452, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260453, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 2.3333333333333335, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260454, "useless_questions_ratio": 0.0}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260425, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 45.0, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260426, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 31.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260427, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 31.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260428, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 31.428571428571427, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260429, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 36.0, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260430, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260431, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260432, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260433, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 37.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260434, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 32.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260435, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260436, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 30.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260437, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260438, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260439, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 30.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260440, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 47.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260441, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 43.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260442, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 42.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260443, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260444, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 44.0, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260445, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260446, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260447, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260448, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260449, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 43.0, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260450, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260451, "useless_questions_ratio": 0.7142857142857143}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260452, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 45.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260453, "useless_questions_ratio": 0.8571428571428571}
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260454, "useless_questions_ratio": 0.8571428571428571}

View File

@@ -0,0 +1,52 @@
[
{
"agent": "keyword_spam",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.07300000000000001,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.6777777777777778,
"avg_useless_ratio": 0.48000000000000004,
"episodes": 30
},
{
"agent": "present_all",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.0,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.0,
"avg_useless_ratio": 0.0,
"episodes": 30
},
{
"agent": "random",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.0,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.0,
"avg_useless_ratio": 1.0,
"episodes": 30
},
{
"agent": "scripted_oracle",
"avg_evidence_timing": 1.9,
"avg_primary_reward": 0.95,
"avg_reward": 0.9023333333333334,
"avg_surface_rate": 0.95,
"avg_trigger_rate": 0.95,
"avg_useless_ratio": 0.0,
"episodes": 30
},
{
"agent": "trained_sft_grpo_run2",
"avg_evidence_timing": 0.8,
"avg_primary_reward": 0.4611111111111111,
"avg_reward": 0.3868888888888889,
"avg_surface_rate": 0.4611111111111111,
"avg_trigger_rate": 0.5888888888888889,
"avg_useless_ratio": 0.8285714285714285,
"episodes": 30
}
]

View File

@@ -0,0 +1,137 @@
# Agent: trained_sft_grpo_run2
## timeline_255d67 (easy)
Esme Dubois is accused of assault at the public library on Walnut Street on May 28th. The victim claims the assault occurred at 11:45 PM. The defendant claims they were elsewhere at that time.
### Transcript
- Step 1 `ask_question`: Esme Dubois is accused of assault at the public library on Walnut Street on May 28th. The victim claims the assault occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: The assault happened at 11:00 PM, not 11:45 PM.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 2 `present_evidence`: victor_statement
- Witness: Invalid exhibit ID.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 3 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: The assault happened at 11:00 PM, not 11:45 PM.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 4 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: You've already asked that question.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 5 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: You've already asked that question.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 6 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: You've already asked that question.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 7 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: You've already asked that question.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 8 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
- Witness: You've already asked that question.
- Triggered: c1_time_of_assault; Surfaced: none
- Step 9 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: c1_time_of_assault; Surfaced: none
### Reward
```json
{"primary_reward": 0.0, "auxiliary_reward_raw": 0.05000000000000002, "total_reward": 0.010000000000000004, "contradictions_total": 1.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 0.0, "keyword_questions": 2.0, "evidence_timing_successes": 0.0, "blind_evidence_count": 0.0, "duplicate_questions": 5.0, "irrelevant_questions": 0.0, "inadmissible_actions": 1.0, "useless_questions_ratio": 0.7142857142857143, "avg_question_length": 39.0}
```
---
# Agent: trained_sft_grpo_run2
## knowledge_b28f8c (medium)
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
### Transcript
- Step 1 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: I have never met Kavita Iyer in my life.
- Triggered: c1_know_victim; Surfaced: none
- Step 2 `present_evidence`: email_chain
- Witness: [Witness stammers] I... I'm not sure what to say.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 3 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 4 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 5 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 6 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 7 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 8 `ask_question`: CASE BRIEF:
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: You've already asked that question.
- Triggered: c1_know_victim; Surfaced: c1_know_victim
- Step 9 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: c1_know_victim; Surfaced: c1_know_victim
### Reward
```json
{"primary_reward": 0.5, "auxiliary_reward_raw": 0.09999999999999998, "total_reward": 0.42000000000000004, "contradictions_total": 2.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 1.0, "keyword_questions": 1.0, "evidence_timing_successes": 1.0, "blind_evidence_count": 0.0, "duplicate_questions": 6.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.8571428571428571, "avg_question_length": 45.0}
```
---
# Agent: trained_sft_grpo_run2
## workplace_c98377 (easy)
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
### Transcript
- Step 1 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: I never sent Ravi Joshi any threatening message.
- Triggered: c1_message_denial; Surfaced: none
- Step 2 `present_evidence`: message_export
- Witness: [Witness stammers] I... I'm not sure what to say.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 3 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 4 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 5 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 6 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 7 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 8 `ask_question`: CASE BRIEF:
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: You've already asked that question.
- Triggered: c1_message_denial; Surfaced: c1_message_denial
- Step 9 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: c1_message_denial; Surfaced: c1_message_denial
### Reward
```json
{"primary_reward": 1.0, "auxiliary_reward_raw": 0.09999999999999998, "total_reward": 0.8200000000000001, "contradictions_total": 1.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 1.0, "keyword_questions": 1.0, "evidence_timing_successes": 1.0, "blind_evidence_count": 0.0, "duplicate_questions": 6.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.8571428571428571, "avg_question_length": 31.0}
```

12
generation_config.json Normal file
View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5eb33e25e043c4103e6cee6aa637be513e7c2c0b7afb8cdd2325c7891721e22f
size 2384234968

View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "bfloat16",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3aa735583653d63e0b164d0872f4bd2aa4147f31ba523b70fff69fb169b24869
size 1192135096

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f3cdba37323e45d3a891de61878a08412fb1d85f4ade068eeffc03740ec9e275
size 11422748

View File

@@ -0,0 +1,75 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": false,
"local_files_only": false,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"unk_token": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:44e78eec69cef56830cc59b94a060abe906ff4571c26fb4566a6bdcb27e4499d
size 5265

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

78
tokenizer_config.json Normal file
View File

@@ -0,0 +1,78 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": true,
"local_files_only": false,
"max_length": 3072,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"stride": 0,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"truncation_strategy": "longest_first",
"unk_token": null
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
size 7185

23
training_summary.json Normal file
View File

@@ -0,0 +1,23 @@
{
"artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2",
"env_url": "https://heavycoderhh-counsel-env.hf.space",
"evidence_pressure": 1.4,
"grpo_dataset_size": 160,
"grpo_learning_rate": 5e-06,
"grpo_max_steps": 250,
"max_completion_length": 320,
"metrics": {
"total_flos": 0.0,
"train_loss": -0.00972448107972741,
"train_runtime": 4103.8977,
"train_samples_per_second": 0.244,
"train_steps_per_second": 0.061
},
"model": "heavycoderhh/counsel-env-qwen3-0.6b-grpo",
"num_generations": 4,
"sft_dataset_size": 320,
"sft_dir": "/tmp/counsel-sft-grpo-output/sft_warm_start",
"sft_epochs": 1.0,
"sft_learning_rate": 1e-05,
"space_repo": "heavycoderhh/counsel-env"
}