初始化项目,由ModelHub XC社区提供模型

Model: heavycoderhh/counsel-env-qwen3-0.6b-grpo
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-16 07:21:17 +08:00
commit 2ea3d793ee
76 changed files with 3284 additions and 0 deletions

38
.gitattributes vendored Normal file
View File

@@ -0,0 +1,38 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

68
README.md Normal file
View File

@@ -0,0 +1,68 @@
---
base_model: Qwen/Qwen3-0.6B
library_name: transformers
model_name: counsel-grpo-output
tags:
- generated_from_trainer
- trl
- grpo
- hf_jobs
licence: license
---
# Model Card for counsel-grpo-output
This model is a fine-tuned version of [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B).
It has been trained using [TRL](https://github.com/huggingface/trl).
## Quick start
```python
from transformers import pipeline
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
generator = pipeline("text-generation", model="None", device="cuda")
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
print(output["generated_text"])
```
## Training procedure
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
### Framework versions
- TRL: 1.2.0
- Transformers: 5.6.2
- Pytorch: 2.11.0
- Datasets: 4.8.4
- Tokenizers: 0.22.2
## Citations
Cite GRPO as:
```bibtex
@article{shao2024deepseekmath,
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
year = 2024,
eprint = {arXiv:2402.03300},
}
```
Cite TRL as:
```bibtex
@software{vonwerra2020trl,
title = {{TRL: Transformers Reinforcement Learning}},
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
license = {Apache-2.0},
url = {https://github.com/huggingface/trl},
year = {2020}
}
```

89
chat_template.jinja Normal file
View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8d6d4fdb7c0c098116c54803446e70c9228a6cdc56ed3830ea72a8c070fd4bb3
size 2384234968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec322709818f7f042ee53f6a8d9cddad701506a6aba2fabbcc3dde446f17f939
size 4768669395

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:124ff4ef2c0581fffe34c8548d45e1106badea301ebd85e0d9fbe9036540ae75
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8af82574c5bd9fbcf30df6ea22f7df3c2ee640fc984580fab0f8dbb06ae38110
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

View File

@@ -0,0 +1,75 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": false,
"local_files_only": false,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"unk_token": null
}

View File

@@ -0,0 +1,634 @@
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.390625,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 152.4,
"completions/max_terminated_length": 152.4,
"completions/mean_length": 117.8,
"completions/mean_terminated_length": 117.8,
"completions/min_length": 93.8,
"completions/min_terminated_length": 93.8,
"entropy": 0.20053473562002183,
"epoch": 0.01953125,
"frac_reward_zero_std": 0.0,
"grad_norm": 20.259794235229492,
"kl": 0.1606330933049321,
"learning_rate": 9.800000000000001e-06,
"loss": 0.020250317454338074,
"num_tokens": 11544.0,
"reward": 0.4544999921694398,
"reward_std": 0.2574530947953463,
"rewards/reward_func/mean": 0.4544999921694398,
"rewards/reward_func/std": 0.2574530977755785,
"step": 5,
"step_time": 8.736884885399922,
"tools/call_frequency": 3.1,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 205.8,
"completions/max_terminated_length": 205.8,
"completions/mean_length": 174.4,
"completions/mean_terminated_length": 174.4,
"completions/min_length": 140.6,
"completions/min_terminated_length": 140.6,
"entropy": 0.45406929701566695,
"epoch": 0.0390625,
"frac_reward_zero_std": 0.0,
"grad_norm": 20.727792739868164,
"kl": 0.5858453318476677,
"learning_rate": 9.55e-06,
"loss": 0.00982179045677185,
"num_tokens": 24254.0,
"reward": 0.08549999967217445,
"reward_std": 0.14715017192065716,
"rewards/reward_func/mean": 0.08549999967217445,
"rewards/reward_func/std": 0.14715017192065716,
"step": 10,
"step_time": 11.54933958799811,
"tools/call_frequency": 3.45,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.35,
"completions/max_length": 320.8,
"completions/max_terminated_length": 233.6,
"completions/mean_length": 232.35,
"completions/mean_terminated_length": 189.8,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"entropy": 0.5534275218844413,
"epoch": 0.05859375,
"frac_reward_zero_std": 0.2,
"grad_norm": 6.85633659362793,
"kl": 0.6838902086019516,
"learning_rate": 9.3e-06,
"loss": 0.026860833168029785,
"num_tokens": 38189.0,
"reward": 0.10483333505690098,
"reward_std": 0.16720814146101476,
"rewards/reward_func/mean": 0.10483333505690098,
"rewards/reward_func/std": 0.1672081384807825,
"step": 15,
"step_time": 18.40421949800075,
"tools/call_frequency": 4.35,
"tools/failure_frequency": 0.0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 413.0,
"completions/max_terminated_length": 351.2,
"completions/mean_length": 340.5,
"completions/mean_terminated_length": 306.28333435058596,
"completions/min_length": 263.0,
"completions/min_terminated_length": 263.0,
"entropy": 0.5062903374433517,
"epoch": 0.078125,
"frac_reward_zero_std": 0.4,
"grad_norm": 6.486283302307129,
"kl": 0.5253867790102958,
"learning_rate": 9.050000000000001e-06,
"loss": -0.045569175481796266,
"num_tokens": 54196.0,
"reward": 0.09649999756366015,
"reward_std": 0.10897674113512039,
"rewards/reward_func/mean": 0.09649999756366015,
"rewards/reward_func/std": 0.10897674113512039,
"step": 20,
"step_time": 29.913843032000294,
"tools/call_frequency": 3.95,
"tools/failure_frequency": 0.22069264352321624
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.65,
"completions/max_length": 480.4,
"completions/max_terminated_length": 241.0,
"completions/mean_length": 361.25,
"completions/mean_terminated_length": 225.56666870117186,
"completions/min_length": 247.4,
"completions/min_terminated_length": 202.8,
"entropy": 0.4256091395393014,
"epoch": 0.09765625,
"frac_reward_zero_std": 0.2,
"grad_norm": 4.082395553588867,
"kl": 0.43983293175697324,
"learning_rate": 8.8e-06,
"loss": 0.02868407070636749,
"num_tokens": 70641.0,
"reward": 0.0399999987334013,
"reward_std": 0.060056403279304504,
"rewards/reward_func/mean": 0.0399999987334013,
"rewards/reward_func/std": 0.060056403279304504,
"step": 25,
"step_time": 35.14591444559992,
"tools/call_frequency": 3.7,
"tools/failure_frequency": 0.18648459613323212
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 342.0,
"completions/max_terminated_length": 286.6,
"completions/mean_length": 267.45,
"completions/mean_terminated_length": 208.86666870117188,
"completions/min_length": 93.2,
"completions/min_terminated_length": 93.2,
"entropy": 0.27065443359315394,
"epoch": 0.1171875,
"frac_reward_zero_std": 0.0,
"grad_norm": 5.178186893463135,
"kl": 0.6691923886537552,
"learning_rate": 8.550000000000001e-06,
"loss": -0.30545821189880373,
"num_tokens": 85232.0,
"reward": 0.029999999329447746,
"reward_std": 0.019999999552965164,
"rewards/reward_func/mean": 0.029999999329447746,
"rewards/reward_func/std": 0.019999999552965164,
"step": 30,
"step_time": 21.171669380999084,
"tools/call_frequency": 5.3,
"tools/failure_frequency": 0.30696970224380493
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 314.6,
"completions/max_terminated_length": 300.6,
"completions/mean_length": 275.95,
"completions/mean_terminated_length": 258.2,
"completions/min_length": 191.6,
"completions/min_terminated_length": 193.2,
"entropy": 0.12053632475435734,
"epoch": 0.13671875,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0749428272247314,
"kl": 0.6381830915808677,
"learning_rate": 8.3e-06,
"loss": -0.07461150288581848,
"num_tokens": 100010.0,
"reward": 0.023999999463558196,
"reward_std": 0.021856406703591347,
"rewards/reward_func/mean": 0.023999999463558196,
"rewards/reward_func/std": 0.021856406703591347,
"step": 35,
"step_time": 18.075483031600744,
"tools/call_frequency": 7.2,
"tools/failure_frequency": 0.3397361934185028
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.55,
"completions/max_length": 406.2,
"completions/max_terminated_length": 194.6,
"completions/mean_length": 318.55,
"completions/mean_terminated_length": 139.6666717529297,
"completions/min_length": 207.2,
"completions/min_terminated_length": 81.6,
"entropy": 0.25037811435759066,
"epoch": 0.15625,
"frac_reward_zero_std": 0.2,
"grad_norm": 5.45590877532959,
"kl": 0.632529079169035,
"learning_rate": 8.050000000000001e-06,
"loss": -0.08673273324966431,
"num_tokens": 115597.0,
"reward": 0.013999999687075614,
"reward_std": 0.016618802025914193,
"rewards/reward_func/mean": 0.013999999687075614,
"rewards/reward_func/std": 0.016618802025914193,
"step": 40,
"step_time": 25.49324105279957,
"tools/call_frequency": 4.8,
"tools/failure_frequency": 0.2961344659328461
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.7,
"completions/max_length": 488.8,
"completions/max_terminated_length": 167.4,
"completions/mean_length": 351.65,
"completions/mean_terminated_length": 126.56666870117188,
"completions/min_length": 218.6,
"completions/min_terminated_length": 102.6,
"entropy": 0.30095756258815526,
"epoch": 0.17578125,
"frac_reward_zero_std": 0.2,
"grad_norm": 2.2991526126861572,
"kl": 0.549846900999546,
"learning_rate": 7.800000000000002e-06,
"loss": 0.01944226920604706,
"num_tokens": 131856.0,
"reward": 0.017999999597668646,
"reward_std": 0.016618802025914193,
"rewards/reward_func/mean": 0.017999999597668646,
"rewards/reward_func/std": 0.016618802025914193,
"step": 45,
"step_time": 35.21552088999961,
"tools/call_frequency": 3.3,
"tools/failure_frequency": 0.2740601599216461
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 381.0,
"completions/max_terminated_length": 284.0,
"completions/mean_length": 294.35,
"completions/mean_terminated_length": 227.53333435058593,
"completions/min_length": 160.8,
"completions/min_terminated_length": 160.8,
"entropy": 0.23832304682582617,
"epoch": 0.1953125,
"frac_reward_zero_std": 0.2,
"grad_norm": 0.2268025130033493,
"kl": 0.6058375500142574,
"learning_rate": 7.5500000000000006e-06,
"loss": -0.08301904201507568,
"num_tokens": 146914.0,
"reward": 0.023999999463558196,
"reward_std": 0.018475209176540375,
"rewards/reward_func/mean": 0.023999999463558196,
"rewards/reward_func/std": 0.018475209176540375,
"step": 50,
"step_time": 25.430086624599788,
"tools/call_frequency": 5.5,
"tools/failure_frequency": 0.30512219071388247
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 339.2,
"completions/max_terminated_length": 290.2,
"completions/mean_length": 287.3,
"completions/mean_terminated_length": 284.8333374023438,
"completions/min_length": 262.0,
"completions/min_terminated_length": 279.2,
"entropy": 0.05971384542062878,
"epoch": 0.21484375,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.22013917565345764,
"kl": 0.5083232939243316,
"learning_rate": 7.3e-06,
"loss": 0.0854921281337738,
"num_tokens": 161945.0,
"reward": 0.031999999284744264,
"reward_std": 0.012618802115321159,
"rewards/reward_func/mean": 0.031999999284744264,
"rewards/reward_func/std": 0.012618802115321159,
"step": 55,
"step_time": 21.9785563156016,
"tools/call_frequency": 8.3,
"tools/failure_frequency": 0.3500013709068298
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 264.6,
"completions/max_terminated_length": 219.6,
"completions/mean_length": 249.7,
"completions/mean_terminated_length": 217.26666870117188,
"completions/min_length": 213.4,
"completions/min_terminated_length": 215.6,
"entropy": 0.030067690974101426,
"epoch": 0.234375,
"frac_reward_zero_std": 0.2,
"grad_norm": 2.1762969493865967,
"kl": 0.5388822212815285,
"learning_rate": 7.05e-06,
"loss": -0.06041957139968872,
"num_tokens": 176143.0,
"reward": 0.029999999329447746,
"reward_std": 0.016618802025914193,
"rewards/reward_func/mean": 0.029999999329447746,
"rewards/reward_func/std": 0.016618802025914193,
"step": 60,
"step_time": 16.232493841598625,
"tools/call_frequency": 8.85,
"tools/failure_frequency": 0.3545756280422211
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 288.2,
"completions/max_terminated_length": 148.2,
"completions/mean_length": 213.95,
"completions/mean_terminated_length": 137.4000030517578,
"completions/min_length": 181.4,
"completions/min_terminated_length": 129.6,
"entropy": 0.07268587870057672,
"epoch": 0.25390625,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.1308048814535141,
"kl": 0.8497638419270516,
"learning_rate": 6.800000000000001e-06,
"loss": -0.2692496538162231,
"num_tokens": 189629.0,
"reward": 0.03799999877810478,
"reward_std": 0.010309400968253613,
"rewards/reward_func/mean": 0.03799999877810478,
"rewards/reward_func/std": 0.010309400968253613,
"step": 65,
"step_time": 17.90531996159916,
"tools/call_frequency": 5.85,
"tools/failure_frequency": 0.3754902005195618
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3,
"completions/max_length": 274.6,
"completions/max_terminated_length": 256.6,
"completions/mean_length": 248.25,
"completions/mean_terminated_length": 241.73333740234375,
"completions/min_length": 226.0,
"completions/min_terminated_length": 226.0,
"entropy": 0.05142406928353012,
"epoch": 0.2734375,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.09222911298274994,
"kl": 0.57062017172575,
"learning_rate": 6.550000000000001e-06,
"loss": 0.02273084670305252,
"num_tokens": 203795.0,
"reward": 0.05999999865889549,
"reward_std": 0.0,
"rewards/reward_func/mean": 0.05999999865889549,
"rewards/reward_func/std": 0.0,
"step": 70,
"step_time": 16.734460433397906,
"tools/call_frequency": 9.3,
"tools/failure_frequency": 0.1542351394891739
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.35,
"completions/max_length": 303.4,
"completions/max_terminated_length": 242.2,
"completions/mean_length": 244.05,
"completions/mean_terminated_length": 231.43333435058594,
"completions/min_length": 215.6,
"completions/min_terminated_length": 222.2,
"entropy": 0.03808064609766006,
"epoch": 0.29296875,
"frac_reward_zero_std": 0.8,
"grad_norm": 0.2428537756204605,
"kl": 0.49708728566765786,
"learning_rate": 6.300000000000001e-06,
"loss": 0.08911536335945129,
"num_tokens": 217958.0,
"reward": 0.0569999985396862,
"reward_std": 0.006000000238418579,
"rewards/reward_func/mean": 0.0569999985396862,
"rewards/reward_func/std": 0.005999999865889549,
"step": 75,
"step_time": 18.7449420207995,
"tools/call_frequency": 9.25,
"tools/failure_frequency": 0.09236167445778846
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 298.6,
"completions/max_terminated_length": 238.2,
"completions/mean_length": 243.95,
"completions/mean_terminated_length": 229.53333435058593,
"completions/min_length": 217.8,
"completions/min_terminated_length": 221.8,
"entropy": 0.03462462190072983,
"epoch": 0.3125,
"frac_reward_zero_std": 0.6,
"grad_norm": 3.802891969680786,
"kl": 0.48679155968129634,
"learning_rate": 6.0500000000000005e-06,
"loss": 0.0853162169456482,
"num_tokens": 232107.0,
"reward": 0.05399999842047691,
"reward_std": 0.012000000476837159,
"rewards/reward_func/mean": 0.05399999842047691,
"rewards/reward_func/std": 0.011999999731779098,
"step": 80,
"step_time": 18.624738503398838,
"tools/call_frequency": 9.4,
"tools/failure_frequency": 0.1012024775147438
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.55,
"completions/max_length": 315.8,
"completions/max_terminated_length": 245.4,
"completions/mean_length": 287.0,
"completions/mean_terminated_length": 223.70000305175782,
"completions/min_length": 235.4,
"completions/min_terminated_length": 188.8,
"entropy": 0.018990062386728825,
"epoch": 0.33203125,
"frac_reward_zero_std": 0.8,
"grad_norm": 0.24913819134235382,
"kl": 0.36854752600193025,
"learning_rate": 5.8e-06,
"loss": -0.06027485728263855,
"num_tokens": 247068.0,
"reward": 0.0569999985396862,
"reward_std": 0.006000000238418579,
"rewards/reward_func/mean": 0.0569999985396862,
"rewards/reward_func/std": 0.005999999865889549,
"step": 85,
"step_time": 19.23757492739969,
"tools/call_frequency": 7.25,
"tools/failure_frequency": 0.38577277064323423
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6,
"completions/max_length": 336.4,
"completions/max_terminated_length": 313.2,
"completions/mean_length": 315.8,
"completions/mean_terminated_length": 307.6,
"completions/min_length": 291.8,
"completions/min_terminated_length": 302.0,
"entropy": 0.005942635895917192,
"epoch": 0.3515625,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.03795464709401131,
"kl": 0.27816674262285235,
"learning_rate": 5.550000000000001e-06,
"loss": 0.011118948459625244,
"num_tokens": 262655.0,
"reward": 0.05999999865889549,
"reward_std": 0.0,
"rewards/reward_func/mean": 0.05999999865889549,
"rewards/reward_func/std": 0.0,
"step": 90,
"step_time": 20.623258255800465,
"tools/call_frequency": 7.0,
"tools/failure_frequency": 0.378443706035614
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3,
"completions/max_length": 326.2,
"completions/max_terminated_length": 326.2,
"completions/mean_length": 305.55,
"completions/mean_terminated_length": 311.4333435058594,
"completions/min_length": 286.4,
"completions/min_terminated_length": 300.0,
"entropy": 0.00408092990401201,
"epoch": 0.37109375,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.03232214227318764,
"kl": 0.2662045076489449,
"learning_rate": 5.300000000000001e-06,
"loss": 0.010630997270345688,
"num_tokens": 277935.0,
"reward": 0.05999999865889549,
"reward_std": 0.0,
"rewards/reward_func/mean": 0.05999999865889549,
"rewards/reward_func/std": 0.0,
"step": 95,
"step_time": 19.27180065259963,
"tools/call_frequency": 7.5,
"tools/failure_frequency": 0.40017797946929934
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4,
"completions/max_length": 370.4,
"completions/max_terminated_length": 320.0,
"completions/mean_length": 305.75,
"completions/mean_terminated_length": 274.26666870117185,
"completions/min_length": 213.2,
"completions/min_terminated_length": 213.8,
"entropy": 0.020784856198588386,
"epoch": 0.390625,
"frac_reward_zero_std": 0.6,
"grad_norm": 2.253676414489746,
"kl": 0.30799318477511406,
"learning_rate": 5.050000000000001e-06,
"loss": -0.07253098487854004,
"num_tokens": 293245.0,
"reward": 0.050999998673796655,
"reward_std": 0.012928203493356705,
"rewards/reward_func/mean": 0.050999998673796655,
"rewards/reward_func/std": 0.012928203120827675,
"step": 100,
"step_time": 22.40414413039907,
"tools/call_frequency": 5.7,
"tools/failure_frequency": 0.3851664662361145
}
],
"logging_steps": 5,
"max_steps": 200,
"num_input_tokens_seen": 293245,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec2ebe18213235a9211aa4bf0c3778da36ad2990f51c4dddec3ffae0a6be7033
size 7185

View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a74651f230a1644b02fec7706be28f5209ebe746a4876a1df7c07a294a345f11
size 2384234968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:930f9c60a28c026305674220377bf9d08a9e5cc2f2bfbeac67bf8402953d1cb2
size 4768669395

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d8e9bbdccf9538bc08ddd052112e929691fdd087026390ef5e8ad8aa61f690da
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:39a7cdd550d6ad45e0803294bb002a4956fd107348f20422f2f106918ad1bbd8
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

View File

@@ -0,0 +1,75 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": false,
"local_files_only": false,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"unk_token": null
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec2ebe18213235a9211aa4bf0c3778da36ad2990f51c4dddec3ffae0a6be7033
size 7185

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:40ab7c2da42d233760f178da49197fd96ab585bb5c5d1db708f471dc41028203
size 17624

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7db68983abc099fefc535827202de2cb4aeaa566e1c983efdbcf8972f1e5cb30
size 20558

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3a1800427305524b201307a5970dc21a24d19d3fdafdee262e2387f7b34cf983
size 24290

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d482666c7fb92be81e526c3059fc05bb4c92dc8b5e7e9dd5fec5123c75e1ec1d
size 21178

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0ae4d08fd1f018e00b737a96f040274e93cc7d67f08cba09703b555a1a9b951e
size 22322

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:858b7d342929fdf3548afd2b111fb23d0761293bced38847e819daa6ab55e0d5
size 22211

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8512e7fda86859f68b1bd03422d382cdc0f92a31f1d1c6333b3c0e3d33d3b53a
size 22642

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d98816af41b2c123868346fa968a70a76279aa5cdf21562b2489d7369cfb0f78
size 19277

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:75141dd7b55994f1234eeb624f867eefe4a31586def8b732cbea4d9b8fd632b8
size 22253

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e58a8048eab063416839085dd682602db54e6a20c8fd6b7c8cb326d54c27ac78
size 22194

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b515992f42d97e23f62e1744146560c554897c9b15e66a474debff3b350646fd
size 22358

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6a0ece6069a589efc109aecb5be6d13798087043e77aff2ca58233e69e0a3f96
size 22223

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:44dada6eeeafb0b2b767fe650e604491abbbac436d654703407384a88047bf67
size 22889

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:03e1253fc444f5efdad07ef30ca8f20a8b83f1f9ce0af8d39009d7a973ac1f70
size 21900

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:780d61d130ca7e9e05a7f2b99f06a15fb2c17eeba1c257c1ceaff9f46e82dc69
size 21867

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:da5bef0e99a5a7aad1e056c03e775098453ecbe0fb28d7bffa60f87fcefe1fb9
size 21989

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4cd61a7bd01ca7ab93b6c3291c1d9dc85606bed2b8db79bfce272fe9cf305f2c
size 22572

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2e7cb80425b1b572a140591eb5cfe57e98953833003a2ade65b11a5dd422cc93
size 23665

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7823bad70898155273e09b219c9c0d95d9c656fa2f11fcb3fa9e6de578c6b7e3
size 23250

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5a513a1560745ea59d087078dff5f0c0de3a80d1fa2a33ccf5bff76d14ac5f8f
size 21353

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b601fe61b78bb3d894f54f5081fd0f57dc1c783fd3504b48f0279a896e769816
size 23294

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e9e0dc22f2e430371e6db825b9a5735844c317c7fe36914a2ed7755dddf904e7
size 22884

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ebeaec160d6064acd5e730fe4137817967eca9506d018cd4bc14c87e201186e9
size 23448

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:25f477ade091dc47cfe6c8b966e04cf6c3923083226d0f944d57a793d059b529
size 23378

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8538a35efa7f965f554810d3ea9eab724cae895c5e0587f13f8817b0028243ab
size 23185

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9812ce3ffac78c475eeda65574ae001fb1684e882291a2df47173e6815a1859e
size 23326

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c1d7862ab580a97abb370c2f50afba94756b178c876390ba0d5a13d4b65c976d
size 23342

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7f4caaf47a7d8a855606891bf41e1d88b5324d75cad3037f14a1101e8894e636
size 23026

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8ff215067e86e4a01f8d179f2904c5d8148ff2b8938211cf1ea49e6aef9a02ec
size 23378

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0617ad84dcdfc1a69afadb7c863b453c20c8f782ef25c7254c2da3fd11e2255c
size 23055

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ca7fb852427d6566f33a2325ea65b917f5b7292204ace354c2c7fa4e4a80f60
size 22866

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9907c048edeee6fadc783145ce64c01b376c539a103c0e644ce2e677d0f287fe
size 23442

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:49ddf82301b0ec8abad785429f6fe5440d5bed881f733c8170a956bdbc8e4390
size 23430

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c26b9bab096aaefd00e656d5ae16ff0f0fc82d2389e725c87cf0efee12e1f342
size 22986

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7600cab610b3bb10df6ffe52d4e13559f3e3be83731c67540b75ecd843553818
size 23545

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5fa5443d40c2bdacae85a44f26e3398bc1cd449d91477c55c8873d82b2404bc8
size 22972

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:529623709fb1db4175d4f69d548412559e4f6cdc8fe3a35b121cfbe888c69ec0
size 22926

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:62ad17103876fa42974b867351fe7f390fed3d4fded4f561267fb3a55ff20090
size 23019

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:34cad03b71c628ce49b96ba1ffae17b54fcd1ad3ee470b698cb50cf9c0290234
size 23208

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4bc43295443585b75cfb120dd85ca83ca491fbdc46388e43497cbad0958748ed
size 23493

63
config.json Normal file
View File

@@ -0,0 +1,63 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "float32",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000,
"rope_type": "default"
},
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

151
eval/trained_eval_rows.csv Normal file
View File

@@ -0,0 +1,151 @@
agent,seed,case_id,difficulty,reward,primary_reward,auxiliary_reward,contradictions_total,contradictions_triggered,contradictions_surfaced,questions_used,evidence_presented,evidence_timing_successes,blind_evidence_count,useless_questions_ratio,avg_question_length,model_repo,invalid_tool_calls
random,20260425,timeline_255d67,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260427,workplace_c98377,easy,0.0,0.0,-0.4,1,0,0,3,5,0,5,1.0,5.0,,
random,20260428,motive_66ff59,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260429,timeline_19bb78,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260430,timeline_a97690,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260431,alibi_67ffcd,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260432,alibi_423bca,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260433,knowledge_960d07,medium,0.0,0.0,-0.4,2,0,0,8,0,0,0,1.0,5.0,,
random,20260434,alibi_e829c1,easy,0.0,0.0,-0.4,1,0,0,7,1,0,1,1.0,5.0,,
random,20260435,motive_85e25b,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260436,knowledge_a599e3,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260437,motive_8bca20,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
random,20260438,corporate_6b1664,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
random,20260439,alibi_a6c582,easy,0.0,0.0,-0.4,1,0,0,8,0,0,0,1.0,5.0,,
random,20260440,workplace_835476,easy,0.0,0.0,-0.4,1,0,0,5,3,0,3,1.0,5.0,,
random,20260441,possession_a079c5,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
random,20260442,possession_9cc45d,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
random,20260443,possession_259aa5,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
random,20260444,corporate_76724c,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260445,timeline_767821,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260446,motive_c0d166,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260447,corporate_307934,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260448,timeline_592816,hard,0.0,0.0,-0.4,3,0,0,6,2,0,2,1.0,5.0,,
random,20260449,knowledge_b26824,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260450,knowledge_697785,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
random,20260451,timeline_81dafd,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
random,20260453,possession_dbb5fe,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
random,20260454,alibi_a4666f,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
keyword_spam,20260425,timeline_255d67,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260426,knowledge_b28f8c,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260427,workplace_c98377,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260428,motive_66ff59,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260429,timeline_19bb78,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260430,timeline_a97690,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260431,alibi_67ffcd,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260432,alibi_423bca,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260433,knowledge_960d07,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260434,alibi_e829c1,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260435,motive_85e25b,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260436,knowledge_a599e3,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260437,motive_8bca20,easy,0.0,0.0,-0.15000000000000002,1,0,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260438,corporate_6b1664,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260439,alibi_a6c582,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260440,workplace_835476,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260441,possession_a079c5,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260442,possession_9cc45d,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260444,corporate_76724c,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260445,timeline_767821,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260446,motive_c0d166,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260447,corporate_307934,hard,0.020000000000000007,0.0,0.10000000000000003,3,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260448,timeline_592816,hard,0.19,0.0,0.95,3,3,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260449,knowledge_b26824,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260450,knowledge_697785,hard,0.12000000000000002,0.0,0.6000000000000001,3,2,0,5,0,0,0,0.4,4.2,,
keyword_spam,20260451,timeline_81dafd,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
keyword_spam,20260452,corporate_8eb7d7,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
keyword_spam,20260453,possession_dbb5fe,medium,0.030000000000000006,0.0,0.15000000000000002,2,1,0,5,0,0,0,0.6,4.2,,
keyword_spam,20260454,alibi_a4666f,hard,0.15000000000000002,0.0,0.75,3,2,0,5,0,0,0,0.2,4.2,,
present_all,20260425,timeline_255d67,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260427,workplace_c98377,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260428,motive_66ff59,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260429,timeline_19bb78,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260430,timeline_a97690,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260431,alibi_67ffcd,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260432,alibi_423bca,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260433,knowledge_960d07,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260434,alibi_e829c1,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260435,motive_85e25b,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260436,knowledge_a599e3,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260437,motive_8bca20,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260438,corporate_6b1664,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260439,alibi_a6c582,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260440,workplace_835476,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
present_all,20260441,possession_a079c5,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260442,possession_9cc45d,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
present_all,20260444,corporate_76724c,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260445,timeline_767821,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260446,motive_c0d166,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260447,corporate_307934,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260448,timeline_592816,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
present_all,20260449,knowledge_b26824,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260450,knowledge_697785,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
present_all,20260451,timeline_81dafd,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
present_all,20260453,possession_dbb5fe,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
present_all,20260454,alibi_a4666f,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
scripted_oracle,20260425,timeline_255d67,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260426,knowledge_b28f8c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260427,workplace_c98377,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260428,motive_66ff59,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260429,timeline_19bb78,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260430,timeline_a97690,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260431,alibi_67ffcd,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260432,alibi_423bca,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
scripted_oracle,20260433,knowledge_960d07,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260434,alibi_e829c1,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260435,motive_85e25b,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260436,knowledge_a599e3,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260437,motive_8bca20,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260438,corporate_6b1664,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260439,alibi_a6c582,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
scripted_oracle,20260440,workplace_835476,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260441,possession_a079c5,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260442,possession_9cc45d,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260443,possession_259aa5,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
scripted_oracle,20260444,corporate_76724c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260445,timeline_767821,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260446,motive_c0d166,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260447,corporate_307934,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260448,timeline_592816,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
scripted_oracle,20260449,knowledge_b26824,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
scripted_oracle,20260450,knowledge_697785,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.6666666666666667,,
scripted_oracle,20260451,timeline_81dafd,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
scripted_oracle,20260452,corporate_8eb7d7,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260453,possession_dbb5fe,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
scripted_oracle,20260454,alibi_a4666f,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,2.3333333333333335,,
trained_grpo_200,20260425,timeline_255d67,easy,0.0,0.0,0.0,1,0,0,0,0,0,0,0.0,0.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,1.0
trained_grpo_200,20260426,knowledge_b28f8c,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260427,workplace_c98377,easy,0.06000000000000001,0.0,0.30000000000000004,1,1,0,1,0,0,0,0.0,29.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260428,motive_66ff59,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,33.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260429,timeline_19bb78,easy,0.06000000000000001,0.0,0.30000000000000004,1,1,0,1,0,0,0,0.0,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260430,timeline_a97690,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,36.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260431,alibi_67ffcd,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260432,alibi_423bca,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260433,knowledge_960d07,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,41.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260434,alibi_e829c1,easy,0.06000000000000001,0.0,0.30000000000000004,1,1,0,1,0,0,0,0.0,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260435,motive_85e25b,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,34.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260436,knowledge_a599e3,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,41.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260437,motive_8bca20,easy,0.0,0.0,0.0,1,0,0,0,0,0,0,0.0,0.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,1.0
trained_grpo_200,20260438,corporate_6b1664,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260439,alibi_a6c582,easy,0.06000000000000001,0.0,0.30000000000000004,1,1,0,1,0,0,0,0.0,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260440,workplace_835476,easy,0.0,0.0,0.0,1,0,0,0,0,0,0,0.0,0.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,1.0
trained_grpo_200,20260441,possession_a079c5,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,49.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260442,possession_9cc45d,hard,0.0,0.0,0.0,3,0,0,0,0,0,0,0.0,0.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,1.0
trained_grpo_200,20260443,possession_259aa5,easy,0.06000000000000001,0.0,0.30000000000000004,1,1,0,1,0,0,0,0.0,44.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260444,corporate_76724c,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,29.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260445,timeline_767821,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,37.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260446,motive_c0d166,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,34.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260447,corporate_307934,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260448,timeline_592816,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260449,knowledge_b26824,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,40.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260450,knowledge_697785,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,41.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260451,timeline_81dafd,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,34.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260452,corporate_8eb7d7,medium,0.0,0.0,0.0,2,0,0,0,0,0,0,0.0,0.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,1.0
trained_grpo_200,20260453,possession_dbb5fe,medium,0.06000000000000001,0.0,0.30000000000000004,2,1,0,1,0,0,0,0.0,47.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
trained_grpo_200,20260454,alibi_a4666f,hard,0.06000000000000001,0.0,0.30000000000000004,3,1,0,1,0,0,0,0.0,37.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo,0.0
1 agent seed case_id difficulty reward primary_reward auxiliary_reward contradictions_total contradictions_triggered contradictions_surfaced questions_used evidence_presented evidence_timing_successes blind_evidence_count useless_questions_ratio avg_question_length model_repo invalid_tool_calls
2 random 20260425 timeline_255d67 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
3 random 20260426 knowledge_b28f8c medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
4 random 20260427 workplace_c98377 easy 0.0 0.0 -0.4 1 0 0 3 5 0 5 1.0 5.0
5 random 20260428 motive_66ff59 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
6 random 20260429 timeline_19bb78 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
7 random 20260430 timeline_a97690 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
8 random 20260431 alibi_67ffcd medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
9 random 20260432 alibi_423bca medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
10 random 20260433 knowledge_960d07 medium 0.0 0.0 -0.4 2 0 0 8 0 0 0 1.0 5.0
11 random 20260434 alibi_e829c1 easy 0.0 0.0 -0.4 1 0 0 7 1 0 1 1.0 5.0
12 random 20260435 motive_85e25b hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
13 random 20260436 knowledge_a599e3 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
14 random 20260437 motive_8bca20 easy 0.0 0.0 -0.4 1 0 0 6 2 0 2 1.0 5.0
15 random 20260438 corporate_6b1664 medium 0.0 0.0 -0.4 2 0 0 6 2 0 2 1.0 5.0
16 random 20260439 alibi_a6c582 easy 0.0 0.0 -0.4 1 0 0 8 0 0 0 1.0 5.0
17 random 20260440 workplace_835476 easy 0.0 0.0 -0.4 1 0 0 5 3 0 3 1.0 5.0
18 random 20260441 possession_a079c5 hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
19 random 20260442 possession_9cc45d hard 0.0 0.0 -0.4 3 0 0 5 3 0 3 1.0 5.0
20 random 20260443 possession_259aa5 easy 0.0 0.0 -0.4 1 0 0 4 4 0 4 1.0 5.0
21 random 20260444 corporate_76724c medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
22 random 20260445 timeline_767821 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
23 random 20260446 motive_c0d166 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
24 random 20260447 corporate_307934 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
25 random 20260448 timeline_592816 hard 0.0 0.0 -0.4 3 0 0 6 2 0 2 1.0 5.0
26 random 20260449 knowledge_b26824 medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
27 random 20260450 knowledge_697785 hard 0.0 0.0 -0.4 3 0 0 7 1 0 1 1.0 5.0
28 random 20260451 timeline_81dafd medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
29 random 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.4 2 0 0 4 4 0 4 1.0 5.0
30 random 20260453 possession_dbb5fe medium 0.0 0.0 -0.4 2 0 0 5 3 0 3 1.0 5.0
31 random 20260454 alibi_a4666f hard 0.0 0.0 -0.4 3 0 0 8 0 0 0 1.0 5.0
32 keyword_spam 20260425 timeline_255d67 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
33 keyword_spam 20260426 knowledge_b28f8c medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
34 keyword_spam 20260427 workplace_c98377 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
35 keyword_spam 20260428 motive_66ff59 hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
36 keyword_spam 20260429 timeline_19bb78 easy 0.030000000000000006 0.0 0.15000000000000002 1 1 0 5 0 0 0 0.6 4.2
37 keyword_spam 20260430 timeline_a97690 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
38 keyword_spam 20260431 alibi_67ffcd medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
39 keyword_spam 20260432 alibi_423bca medium 0.04000000000000001 0.0 0.20000000000000004 2 1 0 5 0 0 0 0.4 4.2
40 keyword_spam 20260433 knowledge_960d07 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
41 keyword_spam 20260434 alibi_e829c1 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
42 keyword_spam 20260435 motive_85e25b hard 0.17000000000000004 0.0 0.8500000000000001 3 3 0 5 0 0 0 0.2 4.2
43 keyword_spam 20260436 knowledge_a599e3 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
44 keyword_spam 20260437 motive_8bca20 easy 0.0 0.0 -0.15000000000000002 1 0 0 5 0 0 0 0.6 4.2
45 keyword_spam 20260438 corporate_6b1664 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
46 keyword_spam 20260439 alibi_a6c582 easy 0.04000000000000001 0.0 0.20000000000000004 1 1 0 5 0 0 0 0.4 4.2
47 keyword_spam 20260440 workplace_835476 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
48 keyword_spam 20260441 possession_a079c5 hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
49 keyword_spam 20260442 possession_9cc45d hard 0.030000000000000006 0.0 0.15000000000000002 3 1 0 5 0 0 0 0.6 4.2
50 keyword_spam 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 5 0 0 0 0.8 4.2
51 keyword_spam 20260444 corporate_76724c medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
52 keyword_spam 20260445 timeline_767821 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
53 keyword_spam 20260446 motive_c0d166 medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
54 keyword_spam 20260447 corporate_307934 hard 0.020000000000000007 0.0 0.10000000000000003 3 1 0 5 0 0 0 0.8 4.2
55 keyword_spam 20260448 timeline_592816 hard 0.19 0.0 0.95 3 3 0 5 0 0 0 0.2 4.2
56 keyword_spam 20260449 knowledge_b26824 medium 0.12000000000000002 0.0 0.6000000000000001 2 2 0 5 0 0 0 0.4 4.2
57 keyword_spam 20260450 knowledge_697785 hard 0.12000000000000002 0.0 0.6000000000000001 3 2 0 5 0 0 0 0.4 4.2
58 keyword_spam 20260451 timeline_81dafd medium 0.13 0.0 0.65 2 2 0 5 0 0 0 0.2 4.2
59 keyword_spam 20260452 corporate_8eb7d7 medium 0.020000000000000007 0.0 0.10000000000000003 2 1 0 5 0 0 0 0.8 4.2
60 keyword_spam 20260453 possession_dbb5fe medium 0.030000000000000006 0.0 0.15000000000000002 2 1 0 5 0 0 0 0.6 4.2
61 keyword_spam 20260454 alibi_a4666f hard 0.15000000000000002 0.0 0.75 3 2 0 5 0 0 0 0.2 4.2
62 present_all 20260425 timeline_255d67 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
63 present_all 20260426 knowledge_b28f8c medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
64 present_all 20260427 workplace_c98377 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
65 present_all 20260428 motive_66ff59 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
66 present_all 20260429 timeline_19bb78 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
67 present_all 20260430 timeline_a97690 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
68 present_all 20260431 alibi_67ffcd medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
69 present_all 20260432 alibi_423bca medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
70 present_all 20260433 knowledge_960d07 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
71 present_all 20260434 alibi_e829c1 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
72 present_all 20260435 motive_85e25b hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
73 present_all 20260436 knowledge_a599e3 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
74 present_all 20260437 motive_8bca20 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
75 present_all 20260438 corporate_6b1664 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
76 present_all 20260439 alibi_a6c582 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
77 present_all 20260440 workplace_835476 easy 0.0 0.0 -0.25 1 0 0 0 5 0 5 0.0 0.0
78 present_all 20260441 possession_a079c5 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
79 present_all 20260442 possession_9cc45d hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
80 present_all 20260443 possession_259aa5 easy 0.0 0.0 -0.2 1 0 0 0 4 0 4 0.0 0.0
81 present_all 20260444 corporate_76724c medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
82 present_all 20260445 timeline_767821 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
83 present_all 20260446 motive_c0d166 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
84 present_all 20260447 corporate_307934 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
85 present_all 20260448 timeline_592816 hard 0.0 0.0 -0.35000000000000003 3 0 0 0 7 0 7 0.0 0.0
86 present_all 20260449 knowledge_b26824 medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
87 present_all 20260450 knowledge_697785 hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
88 present_all 20260451 timeline_81dafd medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
89 present_all 20260452 corporate_8eb7d7 medium 0.0 0.0 -0.25 2 0 0 0 5 0 5 0.0 0.0
90 present_all 20260453 possession_dbb5fe medium 0.0 0.0 -0.2 2 0 0 0 4 0 4 0.0 0.0
91 present_all 20260454 alibi_a4666f hard 0.0 0.0 -0.30000000000000004 3 0 0 0 6 0 6 0.0 0.0
92 scripted_oracle 20260425 timeline_255d67 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
93 scripted_oracle 20260426 knowledge_b28f8c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
94 scripted_oracle 20260427 workplace_c98377 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
95 scripted_oracle 20260428 motive_66ff59 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
96 scripted_oracle 20260429 timeline_19bb78 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
97 scripted_oracle 20260430 timeline_a97690 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
98 scripted_oracle 20260431 alibi_67ffcd medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
99 scripted_oracle 20260432 alibi_423bca medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 3.0
100 scripted_oracle 20260433 knowledge_960d07 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
101 scripted_oracle 20260434 alibi_e829c1 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
102 scripted_oracle 20260435 motive_85e25b hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
103 scripted_oracle 20260436 knowledge_a599e3 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
104 scripted_oracle 20260437 motive_8bca20 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
105 scripted_oracle 20260438 corporate_6b1664 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
106 scripted_oracle 20260439 alibi_a6c582 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 3.0
107 scripted_oracle 20260440 workplace_835476 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
108 scripted_oracle 20260441 possession_a079c5 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
109 scripted_oracle 20260442 possession_9cc45d hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
110 scripted_oracle 20260443 possession_259aa5 easy 0.8800000000000001 1.0 0.4 1 1 1 1 1 1 0 0.0 1.0
111 scripted_oracle 20260444 corporate_76724c medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
112 scripted_oracle 20260445 timeline_767821 medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
113 scripted_oracle 20260446 motive_c0d166 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
114 scripted_oracle 20260447 corporate_307934 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
115 scripted_oracle 20260448 timeline_592816 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.0
116 scripted_oracle 20260449 knowledge_b26824 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.5
117 scripted_oracle 20260450 knowledge_697785 hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 1.6666666666666667
118 scripted_oracle 20260451 timeline_81dafd medium 0.49000000000000005 0.5 0.45 2 1 1 2 2 1 1 0.0 1.0
119 scripted_oracle 20260452 corporate_8eb7d7 medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
120 scripted_oracle 20260453 possession_dbb5fe medium 0.9600000000000001 1.0 0.8 2 2 2 2 2 2 0 0.0 1.0
121 scripted_oracle 20260454 alibi_a4666f hard 1.0 1.0 1.2000000000000002 3 3 3 3 3 3 0 0.0 2.3333333333333335
122 trained_grpo_200 20260425 timeline_255d67 easy 0.0 0.0 0.0 1 0 0 0 0 0 0 0.0 0.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 1.0
123 trained_grpo_200 20260426 knowledge_b28f8c medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 43.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
124 trained_grpo_200 20260427 workplace_c98377 easy 0.06000000000000001 0.0 0.30000000000000004 1 1 0 1 0 0 0 0.0 29.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
125 trained_grpo_200 20260428 motive_66ff59 hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 33.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
126 trained_grpo_200 20260429 timeline_19bb78 easy 0.06000000000000001 0.0 0.30000000000000004 1 1 0 1 0 0 0 0.0 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
127 trained_grpo_200 20260430 timeline_a97690 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 36.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
128 trained_grpo_200 20260431 alibi_67ffcd medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
129 trained_grpo_200 20260432 alibi_423bca medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
130 trained_grpo_200 20260433 knowledge_960d07 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 41.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
131 trained_grpo_200 20260434 alibi_e829c1 easy 0.06000000000000001 0.0 0.30000000000000004 1 1 0 1 0 0 0 0.0 39.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
132 trained_grpo_200 20260435 motive_85e25b hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 34.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
133 trained_grpo_200 20260436 knowledge_a599e3 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 41.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
134 trained_grpo_200 20260437 motive_8bca20 easy 0.0 0.0 0.0 1 0 0 0 0 0 0 0.0 0.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 1.0
135 trained_grpo_200 20260438 corporate_6b1664 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
136 trained_grpo_200 20260439 alibi_a6c582 easy 0.06000000000000001 0.0 0.30000000000000004 1 1 0 1 0 0 0 0.0 38.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
137 trained_grpo_200 20260440 workplace_835476 easy 0.0 0.0 0.0 1 0 0 0 0 0 0 0.0 0.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 1.0
138 trained_grpo_200 20260441 possession_a079c5 hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 49.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
139 trained_grpo_200 20260442 possession_9cc45d hard 0.0 0.0 0.0 3 0 0 0 0 0 0 0.0 0.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 1.0
140 trained_grpo_200 20260443 possession_259aa5 easy 0.06000000000000001 0.0 0.30000000000000004 1 1 0 1 0 0 0 0.0 44.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
141 trained_grpo_200 20260444 corporate_76724c medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 29.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
142 trained_grpo_200 20260445 timeline_767821 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 37.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
143 trained_grpo_200 20260446 motive_c0d166 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 34.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
144 trained_grpo_200 20260447 corporate_307934 hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 27.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
145 trained_grpo_200 20260448 timeline_592816 hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 35.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
146 trained_grpo_200 20260449 knowledge_b26824 medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 40.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
147 trained_grpo_200 20260450 knowledge_697785 hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 41.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
148 trained_grpo_200 20260451 timeline_81dafd medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 34.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
149 trained_grpo_200 20260452 corporate_8eb7d7 medium 0.0 0.0 0.0 2 0 0 0 0 0 0 0.0 0.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 1.0
150 trained_grpo_200 20260453 possession_dbb5fe medium 0.06000000000000001 0.0 0.30000000000000004 2 1 0 1 0 0 0 0.0 47.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0
151 trained_grpo_200 20260454 alibi_a4666f hard 0.06000000000000001 0.0 0.30000000000000004 3 1 0 1 0 0 0 0.0 37.0 heavycoderhh/counsel-env-qwen3-0.6b-grpo 0.0

View File

@@ -0,0 +1,150 @@
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 3, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 1.0}
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 1.0}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260425, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260426, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260428, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260429, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260430, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260431, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260432, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260433, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260434, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260435, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260436, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260438, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260439, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260441, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260442, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260444, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260445, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260446, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260447, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.95, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.19, "seed": 20260448, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260449, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260450, "useless_questions_ratio": 0.4}
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260451, "useless_questions_ratio": 0.2}
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260452, "useless_questions_ratio": 0.8}
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260453, "useless_questions_ratio": 0.6}
{"agent": "keyword_spam", "auxiliary_reward": 0.75, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.15000000000000002, "seed": 20260454, "useless_questions_ratio": 0.2}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 0.0}
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260425, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260426, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260427, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260428, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260429, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_a97690", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260430, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260431, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260432, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260433, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260434, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260435, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260436, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260437, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260438, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260439, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260440, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260441, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260442, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260443, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260444, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_767821", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260445, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260446, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260447, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260448, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260449, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.6666666666666667, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260450, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_81dafd", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260451, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260452, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260453, "useless_questions_ratio": 0.0}
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 2.3333333333333335, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260454, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.0, "avg_question_length": 0.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 1, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 43.0, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260426, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 29.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260427, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 33.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260428, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260429, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 36.0, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260430, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260431, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260432, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 41.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260433, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260434, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 34.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260435, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 41.0, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260436, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.0, "avg_question_length": 0.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 1, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260438, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260439, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.0, "avg_question_length": 0.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 1, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 49.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260441, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.0, "avg_question_length": 0.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 1, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 44.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260443, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 29.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260444, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 37.0, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260445, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 34.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260446, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260447, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260448, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 40.0, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260449, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 41.0, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260450, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 34.0, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260451, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.0, "avg_question_length": 0.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 1, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 47.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260453, "useless_questions_ratio": 0.0}
{"agent": "trained_grpo_200", "auxiliary_reward": 0.30000000000000004, "avg_question_length": 37.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "primary_reward": 0.0, "questions_used": 1, "reward": 0.06000000000000001, "seed": 20260454, "useless_questions_ratio": 0.0}

View File

@@ -0,0 +1,52 @@
[
{
"agent": "keyword_spam",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.07300000000000001,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.6777777777777778,
"avg_useless_ratio": 0.48000000000000004,
"episodes": 30
},
{
"agent": "present_all",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.0,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.0,
"avg_useless_ratio": 0.0,
"episodes": 30
},
{
"agent": "random",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.0,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.0,
"avg_useless_ratio": 1.0,
"episodes": 30
},
{
"agent": "scripted_oracle",
"avg_evidence_timing": 1.9,
"avg_primary_reward": 0.95,
"avg_reward": 0.9023333333333334,
"avg_surface_rate": 0.95,
"avg_trigger_rate": 0.95,
"avg_useless_ratio": 0.0,
"episodes": 30
},
{
"agent": "trained_grpo_200",
"avg_evidence_timing": 0,
"avg_primary_reward": 0.0,
"avg_reward": 0.05000000000000001,
"avg_surface_rate": 0.0,
"avg_trigger_rate": 0.4611111111111111,
"avg_useless_ratio": 0.0,
"episodes": 30
}
]

View File

@@ -0,0 +1,57 @@
# Agent: trained_grpo_200
## timeline_255d67 (easy)
Esme Dubois is accused of assault at the public library on Walnut Street on May 28th. The victim claims the assault occurred at 11:45 PM. The defendant claims they were elsewhere at that time.
### Transcript
- Step 1 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: none; Surfaced: none
### Reward
```json
{"primary_reward": 0.0, "auxiliary_reward_raw": 0.0, "total_reward": 0.0, "contradictions_total": 1.0, "contradictions_triggered": 0.0, "contradictions_surfaced": 0.0, "keyword_questions": 0.0, "evidence_timing_successes": 0.0, "blind_evidence_count": 0.0, "duplicate_questions": 0.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.0, "avg_question_length": 0.0}
```
---
# Agent: trained_grpo_200
## knowledge_b28f8c (medium)
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
### Transcript
- Step 1 `ask_question`: Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
- Witness: I have never met Kavita Iyer in my life.
- Triggered: c1_know_victim; Surfaced: none
- Step 2 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: c1_know_victim; Surfaced: none
### Reward
```json
{"primary_reward": 0.0, "auxiliary_reward_raw": 0.30000000000000004, "total_reward": 0.06000000000000001, "contradictions_total": 2.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 0.0, "keyword_questions": 1.0, "evidence_timing_successes": 0.0, "blind_evidence_count": 0.0, "duplicate_questions": 0.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.0, "avg_question_length": 43.0}
```
---
# Agent: trained_grpo_200
## workplace_c98377 (easy)
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
### Transcript
- Step 1 `ask_question`: Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
- Witness: I never sent Ravi Joshi any threatening message.
- Triggered: c1_message_denial; Surfaced: none
- Step 2 `rest_case`: rest_case
- Witness: [Counsel rests.]
- Triggered: c1_message_denial; Surfaced: none
### Reward
```json
{"primary_reward": 0.0, "auxiliary_reward_raw": 0.30000000000000004, "total_reward": 0.06000000000000001, "contradictions_total": 1.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 0.0, "keyword_questions": 1.0, "evidence_timing_successes": 0.0, "blind_evidence_count": 0.0, "duplicate_questions": 0.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.0, "avg_question_length": 29.0}
```

12
generation_config.json Normal file
View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "5.6.2"
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a74651f230a1644b02fec7706be28f5209ebe746a4876a1df7c07a294a345f11
size 2384234968

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
size 11422650

75
tokenizer_config.json Normal file
View File

@@ -0,0 +1,75 @@
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": false,
"local_files_only": false,
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"response_schema": {
"properties": {
"content": {
"type": "string"
},
"reasoning_content": {
"type": "string"
},
"role": {
"const": "assistant"
},
"tool_calls": {
"items": {
"properties": {
"function": {
"properties": {
"arguments": {
"additionalProperties": {},
"type": "object"
},
"name": {
"type": "string"
}
},
"type": "object"
},
"type": {
"const": "function"
}
},
"type": "object",
"x-parser": "json",
"x-parser-args": {
"transform": "{type: 'function', function: @}"
}
},
"type": "array",
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
}
},
"type": "object",
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
},
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "left",
"unk_token": null
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec2ebe18213235a9211aa4bf0c3778da36ad2990f51c4dddec3ffae0a6be7033
size 7185

18
training_summary.json Normal file
View File

@@ -0,0 +1,18 @@
{
"artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo",
"dataset_size": 256,
"env_url": "https://heavycoderhh-counsel-env.hf.space",
"max_completion_length": 512,
"max_steps": 200,
"metrics": {
"total_flos": 0.0,
"train_loss": -0.0162161529250443,
"train_runtime": 4111.2914,
"train_samples_per_second": 0.195,
"train_steps_per_second": 0.049
},
"model": "Qwen/Qwen3-0.6B",
"num_generations": 4,
"space_repo": "heavycoderhh/counsel-env",
"use_vllm": false
}