初始化项目,由ModelHub XC社区提供模型
Model: heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2 Source: Original Platform
This commit is contained in:
39
.gitattributes
vendored
Normal file
39
.gitattributes
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
sft_warm_start/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
67
README.md
Normal file
67
README.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
library_name: transformers
|
||||
model_name: counsel-sft-grpo-output
|
||||
tags:
|
||||
- generated_from_trainer
|
||||
- hf_jobs
|
||||
- trl
|
||||
- grpo
|
||||
licence: license
|
||||
---
|
||||
|
||||
# Model Card for counsel-sft-grpo-output
|
||||
|
||||
This model is a fine-tuned version of [None](https://huggingface.co/None).
|
||||
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||
|
||||
## Quick start
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
|
||||
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||
generator = pipeline("text-generation", model="None", device="cuda")
|
||||
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||
print(output["generated_text"])
|
||||
```
|
||||
|
||||
## Training procedure
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
||||
|
||||
### Framework versions
|
||||
|
||||
- TRL: 1.2.0
|
||||
- Transformers: 5.6.2
|
||||
- Pytorch: 2.11.0
|
||||
- Datasets: 4.8.4
|
||||
- Tokenizers: 0.22.2
|
||||
|
||||
## Citations
|
||||
|
||||
Cite GRPO as:
|
||||
|
||||
```bibtex
|
||||
@article{shao2024deepseekmath,
|
||||
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
|
||||
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
|
||||
year = 2024,
|
||||
eprint = {arXiv:2402.03300},
|
||||
}
|
||||
```
|
||||
|
||||
Cite TRL as:
|
||||
|
||||
```bibtex
|
||||
@software{vonwerra2020trl,
|
||||
title = {{TRL: Transformers Reinforcement Learning}},
|
||||
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
|
||||
license = {Apache-2.0},
|
||||
url = {https://github.com/huggingface/trl},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
89
checkpoint-125/chat_template.jinja
Normal file
89
checkpoint-125/chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
63
checkpoint-125/config.json
Normal file
63
checkpoint-125/config.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"dtype": "float32",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.6.2",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
checkpoint-125/generation_config.json
Normal file
12
checkpoint-125/generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "5.6.2"
|
||||
}
|
||||
3
checkpoint-125/model.safetensors
Normal file
3
checkpoint-125/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9cfa4edd146d430b8bbff0802fd8c777682b57609ee370304d76982a5919498b
|
||||
size 2384234968
|
||||
3
checkpoint-125/optimizer.pt
Normal file
3
checkpoint-125/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2ca431c190b97d71d34b7476e9c0b446b1f7a0854090fdc7233f7e708432c5f4
|
||||
size 4768669395
|
||||
3
checkpoint-125/rng_state.pth
Normal file
3
checkpoint-125/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce9d06b6c01a6abb92a5b52d5d361d96821b8e35f68158be1830efadebe0f334
|
||||
size 14645
|
||||
3
checkpoint-125/scheduler.pt
Normal file
3
checkpoint-125/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e084e51a71375a2e59178fefd4967d7cdc87b72fa22758078088a60489f32adb
|
||||
size 1465
|
||||
3
checkpoint-125/tokenizer.json
Normal file
3
checkpoint-125/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
|
||||
size 11422650
|
||||
78
checkpoint-125/tokenizer_config.json
Normal file
78
checkpoint-125/tokenizer_config.json
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"local_files_only": false,
|
||||
"max_length": 3072,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"response_schema": {
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning_content": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"const": "assistant"
|
||||
},
|
||||
"tool_calls": {
|
||||
"items": {
|
||||
"properties": {
|
||||
"function": {
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"additionalProperties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"type": {
|
||||
"const": "function"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-parser": "json",
|
||||
"x-parser-args": {
|
||||
"transform": "{type: 'function', function: @}"
|
||||
}
|
||||
},
|
||||
"type": "array",
|
||||
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
|
||||
},
|
||||
"split_special_tokens": false,
|
||||
"stride": 0,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"truncation_side": "left",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": null
|
||||
}
|
||||
784
checkpoint-125/trainer_state.json
Normal file
784
checkpoint-125/trainer_state.json
Normal file
@@ -0,0 +1,784 @@
|
||||
{
|
||||
"best_global_step": null,
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 0.78125,
|
||||
"eval_steps": 500,
|
||||
"global_step": 125,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.6,
|
||||
"completions/max_length": 199.6,
|
||||
"completions/max_terminated_length": 134.0,
|
||||
"completions/mean_length": 171.9,
|
||||
"completions/mean_terminated_length": 122.36666870117188,
|
||||
"completions/min_length": 113.0,
|
||||
"completions/min_terminated_length": 113.0,
|
||||
"entropy": 0.06970996516756714,
|
||||
"epoch": 0.03125,
|
||||
"frac_reward_zero_std": 0.6,
|
||||
"grad_norm": 3.630038261413574,
|
||||
"kl": 0.00014932112862879875,
|
||||
"learning_rate": 4.92e-06,
|
||||
"loss": 0.029165178537368774,
|
||||
"num_tokens": 15758.0,
|
||||
"reward": -0.31389998495578764,
|
||||
"reward_std": 0.2122000053524971,
|
||||
"rewards/reward_func/mean": -0.31389998495578764,
|
||||
"rewards/reward_func/std": 0.21219999492168426,
|
||||
"step": 5,
|
||||
"step_time": 14.728857926794444,
|
||||
"tools/call_frequency": 3.45,
|
||||
"tools/failure_frequency": 0.21573015451431274
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.0,
|
||||
"completions/max_length": 168.6,
|
||||
"completions/max_terminated_length": 168.6,
|
||||
"completions/mean_length": 148.3,
|
||||
"completions/mean_terminated_length": 148.3,
|
||||
"completions/min_length": 129.6,
|
||||
"completions/min_terminated_length": 129.6,
|
||||
"entropy": 0.042718362715095284,
|
||||
"epoch": 0.0625,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 3.325033187866211,
|
||||
"kl": 0.037860750965774057,
|
||||
"learning_rate": 4.8200000000000004e-06,
|
||||
"loss": -0.011221970617771148,
|
||||
"num_tokens": 31053.0,
|
||||
"reward": 0.2989000082015991,
|
||||
"reward_std": 0.4415143087506294,
|
||||
"rewards/reward_func/mean": 0.2989000082015991,
|
||||
"rewards/reward_func/std": 0.4415143221616745,
|
||||
"step": 10,
|
||||
"step_time": 9.975367512006779,
|
||||
"tools/call_frequency": 2.5,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.0,
|
||||
"completions/max_length": 152.8,
|
||||
"completions/max_terminated_length": 152.8,
|
||||
"completions/mean_length": 131.3,
|
||||
"completions/mean_terminated_length": 131.3,
|
||||
"completions/min_length": 113.6,
|
||||
"completions/min_terminated_length": 113.6,
|
||||
"entropy": 0.016039706021547317,
|
||||
"epoch": 0.09375,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 1.1289054155349731,
|
||||
"kl": 0.06640795171260834,
|
||||
"learning_rate": 4.7200000000000005e-06,
|
||||
"loss": 0.04752160608768463,
|
||||
"num_tokens": 45857.0,
|
||||
"reward": 1.1023000121116637,
|
||||
"reward_std": 0.4320605039596558,
|
||||
"rewards/reward_func/mean": 1.1023000121116637,
|
||||
"rewards/reward_func/std": 0.43206052780151366,
|
||||
"step": 15,
|
||||
"step_time": 8.620344271202338,
|
||||
"tools/call_frequency": 2.35,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.1,
|
||||
"completions/max_length": 172.0,
|
||||
"completions/max_terminated_length": 172.0,
|
||||
"completions/mean_length": 136.55,
|
||||
"completions/mean_terminated_length": 137.2500030517578,
|
||||
"completions/min_length": 111.4,
|
||||
"completions/min_terminated_length": 111.4,
|
||||
"entropy": 0.027425602450966834,
|
||||
"epoch": 0.125,
|
||||
"frac_reward_zero_std": 0.6,
|
||||
"grad_norm": 0.8991426229476929,
|
||||
"kl": 0.09577701878733932,
|
||||
"learning_rate": 4.620000000000001e-06,
|
||||
"loss": -0.1201351523399353,
|
||||
"num_tokens": 60826.0,
|
||||
"reward": 0.7200000047683716,
|
||||
"reward_std": 0.3419178485870361,
|
||||
"rewards/reward_func/mean": 0.7200000047683716,
|
||||
"rewards/reward_func/std": 0.3419178485870361,
|
||||
"step": 20,
|
||||
"step_time": 11.403528443601681,
|
||||
"tools/call_frequency": 2.05,
|
||||
"tools/failure_frequency": 0.026666668057441712
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.35,
|
||||
"completions/max_length": 203.2,
|
||||
"completions/max_terminated_length": 161.2,
|
||||
"completions/mean_length": 188.6,
|
||||
"completions/mean_terminated_length": 150.6666687011719,
|
||||
"completions/min_length": 173.4,
|
||||
"completions/min_terminated_length": 139.6,
|
||||
"entropy": 0.033282498246990144,
|
||||
"epoch": 0.15625,
|
||||
"frac_reward_zero_std": 0.0,
|
||||
"grad_norm": 2.041987657546997,
|
||||
"kl": 0.051508421916514634,
|
||||
"learning_rate": 4.520000000000001e-06,
|
||||
"loss": 0.03198407888412476,
|
||||
"num_tokens": 76838.0,
|
||||
"reward": 1.2669333696365357,
|
||||
"reward_std": 0.3234894543886185,
|
||||
"rewards/reward_func/mean": 1.2669333696365357,
|
||||
"rewards/reward_func/std": 0.32348946332931516,
|
||||
"step": 25,
|
||||
"step_time": 13.736867211584467,
|
||||
"tools/call_frequency": 4.4,
|
||||
"tools/failure_frequency": 0.14583333432674409
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.65,
|
||||
"completions/max_length": 209.4,
|
||||
"completions/max_terminated_length": 160.8,
|
||||
"completions/mean_length": 192.45,
|
||||
"completions/mean_terminated_length": 148.3,
|
||||
"completions/min_length": 169.2,
|
||||
"completions/min_terminated_length": 135.8,
|
||||
"entropy": 0.04025774166220799,
|
||||
"epoch": 0.1875,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 1.6383038759231567,
|
||||
"kl": 0.09242036554496735,
|
||||
"learning_rate": 4.42e-06,
|
||||
"loss": -0.03659022152423859,
|
||||
"num_tokens": 93054.0,
|
||||
"reward": 1.0333500146865844,
|
||||
"reward_std": 0.38981522917747496,
|
||||
"rewards/reward_func/mean": 1.0333500146865844,
|
||||
"rewards/reward_func/std": 0.389815217256546,
|
||||
"step": 30,
|
||||
"step_time": 14.735964270806289,
|
||||
"tools/call_frequency": 3.85,
|
||||
"tools/failure_frequency": 0.023529411852359773
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.7,
|
||||
"completions/max_length": 207.4,
|
||||
"completions/max_terminated_length": 168.8,
|
||||
"completions/mean_length": 196.4,
|
||||
"completions/mean_terminated_length": 166.7,
|
||||
"completions/min_length": 191.2,
|
||||
"completions/min_terminated_length": 164.6,
|
||||
"entropy": 0.02646293715806678,
|
||||
"epoch": 0.21875,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 0.6842532157897949,
|
||||
"kl": 0.09354882184416055,
|
||||
"learning_rate": 4.32e-06,
|
||||
"loss": 0.014650090038776398,
|
||||
"num_tokens": 109141.0,
|
||||
"reward": 1.0134333491325378,
|
||||
"reward_std": 0.28623148798942566,
|
||||
"rewards/reward_func/mean": 1.0134333491325378,
|
||||
"rewards/reward_func/std": 0.2862314820289612,
|
||||
"step": 35,
|
||||
"step_time": 14.25194917320332,
|
||||
"tools/call_frequency": 3.95,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.7,
|
||||
"completions/max_length": 228.8,
|
||||
"completions/max_terminated_length": 163.6,
|
||||
"completions/mean_length": 207.8,
|
||||
"completions/mean_terminated_length": 162.3,
|
||||
"completions/min_length": 188.8,
|
||||
"completions/min_terminated_length": 161.0,
|
||||
"entropy": 0.049686831969302146,
|
||||
"epoch": 0.25,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 2.386836528778076,
|
||||
"kl": 0.12552661653608083,
|
||||
"learning_rate": 4.22e-06,
|
||||
"loss": 0.023246073722839357,
|
||||
"num_tokens": 125712.0,
|
||||
"reward": 0.9764333426952362,
|
||||
"reward_std": 0.3545127585530281,
|
||||
"rewards/reward_func/mean": 0.9764333426952362,
|
||||
"rewards/reward_func/std": 0.35451277494430544,
|
||||
"step": 40,
|
||||
"step_time": 16.735324517198023,
|
||||
"tools/call_frequency": 3.45,
|
||||
"tools/failure_frequency": 0.02857142984867096
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.8,
|
||||
"completions/max_length": 231.4,
|
||||
"completions/max_terminated_length": 127.2,
|
||||
"completions/mean_length": 209.4,
|
||||
"completions/mean_terminated_length": 124.2,
|
||||
"completions/min_length": 187.2,
|
||||
"completions/min_terminated_length": 121.2,
|
||||
"entropy": 0.14096241008955984,
|
||||
"epoch": 0.28125,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 5.072839260101318,
|
||||
"kl": 0.10897002797573804,
|
||||
"learning_rate": 4.12e-06,
|
||||
"loss": 0.05337468385696411,
|
||||
"num_tokens": 142131.0,
|
||||
"reward": 1.0291000008583069,
|
||||
"reward_std": 0.5297403573989868,
|
||||
"rewards/reward_func/mean": 1.0291000008583069,
|
||||
"rewards/reward_func/std": 0.5297403573989868,
|
||||
"step": 45,
|
||||
"step_time": 17.371078941601446,
|
||||
"tools/call_frequency": 3.4,
|
||||
"tools/failure_frequency": 0.01428571492433548
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.6,
|
||||
"completions/max_length": 237.2,
|
||||
"completions/max_terminated_length": 172.2,
|
||||
"completions/mean_length": 201.7,
|
||||
"completions/mean_terminated_length": 160.46666870117187,
|
||||
"completions/min_length": 186.0,
|
||||
"completions/min_terminated_length": 150.8,
|
||||
"entropy": 0.1540619947016239,
|
||||
"epoch": 0.3125,
|
||||
"frac_reward_zero_std": 0.0,
|
||||
"grad_norm": 5.2555952072143555,
|
||||
"kl": 0.17082785218954086,
|
||||
"learning_rate": 4.0200000000000005e-06,
|
||||
"loss": 0.06733548641204834,
|
||||
"num_tokens": 158431.0,
|
||||
"reward": 0.8427666783332824,
|
||||
"reward_std": 0.6860074520111084,
|
||||
"rewards/reward_func/mean": 0.8427666783332824,
|
||||
"rewards/reward_func/std": 0.6860074281692505,
|
||||
"step": 50,
|
||||
"step_time": 17.60776922639343,
|
||||
"tools/call_frequency": 3.6,
|
||||
"tools/failure_frequency": 0.027619048953056335
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.55,
|
||||
"completions/max_length": 209.2,
|
||||
"completions/max_terminated_length": 209.2,
|
||||
"completions/mean_length": 198.0,
|
||||
"completions/mean_terminated_length": 206.43333435058594,
|
||||
"completions/min_length": 189.4,
|
||||
"completions/min_terminated_length": 203.8,
|
||||
"entropy": 0.11417091116309167,
|
||||
"epoch": 0.34375,
|
||||
"frac_reward_zero_std": 0.8,
|
||||
"grad_norm": 0.1612984985113144,
|
||||
"kl": 0.14481508396565915,
|
||||
"learning_rate": 3.920000000000001e-06,
|
||||
"loss": -0.0013940947130322457,
|
||||
"num_tokens": 174665.0,
|
||||
"reward": 1.337833333015442,
|
||||
"reward_std": 0.04058598577976227,
|
||||
"rewards/reward_func/mean": 1.337833333015442,
|
||||
"rewards/reward_func/std": 0.04058598577976227,
|
||||
"step": 55,
|
||||
"step_time": 13.894916865596315,
|
||||
"tools/call_frequency": 3.9,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.35,
|
||||
"completions/max_length": 210.6,
|
||||
"completions/max_terminated_length": 210.0,
|
||||
"completions/mean_length": 189.7,
|
||||
"completions/mean_terminated_length": 184.93333435058594,
|
||||
"completions/min_length": 152.0,
|
||||
"completions/min_terminated_length": 153.4,
|
||||
"entropy": 0.18207021439447998,
|
||||
"epoch": 0.375,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 7.57163667678833,
|
||||
"kl": 0.2769763808697462,
|
||||
"learning_rate": 3.820000000000001e-06,
|
||||
"loss": -0.08738029599189759,
|
||||
"num_tokens": 190974.0,
|
||||
"reward": 0.9539999723434448,
|
||||
"reward_std": 0.24900673925876618,
|
||||
"rewards/reward_func/mean": 0.9539999723434448,
|
||||
"rewards/reward_func/std": 0.2490067459642887,
|
||||
"step": 60,
|
||||
"step_time": 13.735741792595945,
|
||||
"tools/call_frequency": 3.35,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.55,
|
||||
"completions/max_length": 213.6,
|
||||
"completions/max_terminated_length": 186.6,
|
||||
"completions/mean_length": 196.15,
|
||||
"completions/mean_terminated_length": 181.2,
|
||||
"completions/min_length": 173.2,
|
||||
"completions/min_terminated_length": 177.0,
|
||||
"entropy": 0.18931779703125357,
|
||||
"epoch": 0.40625,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 0.3368631601333618,
|
||||
"kl": 0.19928277991712093,
|
||||
"learning_rate": 3.7200000000000004e-06,
|
||||
"loss": -0.03082091510295868,
|
||||
"num_tokens": 207221.0,
|
||||
"reward": 1.1948333382606506,
|
||||
"reward_std": 0.3531351625919342,
|
||||
"rewards/reward_func/mean": 1.1948333382606506,
|
||||
"rewards/reward_func/std": 0.3531351566314697,
|
||||
"step": 65,
|
||||
"step_time": 14.853071747999639,
|
||||
"tools/call_frequency": 3.45,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.45,
|
||||
"completions/max_length": 232.4,
|
||||
"completions/max_terminated_length": 231.4,
|
||||
"completions/mean_length": 214.8,
|
||||
"completions/mean_terminated_length": 221.10000305175782,
|
||||
"completions/min_length": 199.4,
|
||||
"completions/min_terminated_length": 211.6,
|
||||
"entropy": 0.20331259737722576,
|
||||
"epoch": 0.4375,
|
||||
"frac_reward_zero_std": 0.6,
|
||||
"grad_norm": 3.155299663543701,
|
||||
"kl": 0.21616111248731612,
|
||||
"learning_rate": 3.62e-06,
|
||||
"loss": -0.014388753473758698,
|
||||
"num_tokens": 223949.0,
|
||||
"reward": 1.187999999523163,
|
||||
"reward_std": 0.06400000005960464,
|
||||
"rewards/reward_func/mean": 1.187999999523163,
|
||||
"rewards/reward_func/std": 0.06399999856948853,
|
||||
"step": 70,
|
||||
"step_time": 15.7972018689994,
|
||||
"tools/call_frequency": 3.3,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.75,
|
||||
"completions/max_length": 228.0,
|
||||
"completions/max_terminated_length": 180.2,
|
||||
"completions/mean_length": 217.2,
|
||||
"completions/mean_terminated_length": 174.9,
|
||||
"completions/min_length": 208.2,
|
||||
"completions/min_terminated_length": 169.6,
|
||||
"entropy": 0.09874274502508343,
|
||||
"epoch": 0.46875,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 0.1496252417564392,
|
||||
"kl": 0.19251887053251265,
|
||||
"learning_rate": 3.52e-06,
|
||||
"loss": 0.0129203662276268,
|
||||
"num_tokens": 240663.0,
|
||||
"reward": 1.166100013256073,
|
||||
"reward_std": 0.27513332962989806,
|
||||
"rewards/reward_func/mean": 1.166100013256073,
|
||||
"rewards/reward_func/std": 0.275133341550827,
|
||||
"step": 75,
|
||||
"step_time": 15.653593644002104,
|
||||
"tools/call_frequency": 3.15,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.4,
|
||||
"completions/max_length": 250.6,
|
||||
"completions/max_terminated_length": 235.4,
|
||||
"completions/mean_length": 217.2,
|
||||
"completions/mean_terminated_length": 209.23333740234375,
|
||||
"completions/min_length": 169.8,
|
||||
"completions/min_terminated_length": 174.0,
|
||||
"entropy": 0.18624852728098631,
|
||||
"epoch": 0.5,
|
||||
"frac_reward_zero_std": 0.0,
|
||||
"grad_norm": 3.9008662700653076,
|
||||
"kl": 0.19779104925692081,
|
||||
"learning_rate": 3.4200000000000007e-06,
|
||||
"loss": -0.060715597867965695,
|
||||
"num_tokens": 257232.0,
|
||||
"reward": 1.094600009918213,
|
||||
"reward_std": 0.533681058883667,
|
||||
"rewards/reward_func/mean": 1.094600009918213,
|
||||
"rewards/reward_func/std": 0.5336810708045959,
|
||||
"step": 80,
|
||||
"step_time": 16.87674882839783,
|
||||
"tools/call_frequency": 2.7,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.8,
|
||||
"completions/max_length": 263.2,
|
||||
"completions/max_terminated_length": 149.0,
|
||||
"completions/mean_length": 238.9,
|
||||
"completions/mean_terminated_length": 138.2,
|
||||
"completions/min_length": 220.0,
|
||||
"completions/min_terminated_length": 127.4,
|
||||
"entropy": 0.06761846686713398,
|
||||
"epoch": 0.53125,
|
||||
"frac_reward_zero_std": 0.6,
|
||||
"grad_norm": 0.0583312027156353,
|
||||
"kl": 0.16298045124858618,
|
||||
"learning_rate": 3.3200000000000004e-06,
|
||||
"loss": 0.0317715585231781,
|
||||
"num_tokens": 274377.0,
|
||||
"reward": 1.168333351612091,
|
||||
"reward_std": 0.21399999260902405,
|
||||
"rewards/reward_func/mean": 1.168333351612091,
|
||||
"rewards/reward_func/std": 0.214000004529953,
|
||||
"step": 85,
|
||||
"step_time": 19.253501980405417,
|
||||
"tools/call_frequency": 2.45,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.9,
|
||||
"completions/max_length": 252.0,
|
||||
"completions/max_terminated_length": 79.6,
|
||||
"completions/mean_length": 229.35,
|
||||
"completions/mean_terminated_length": 79.6,
|
||||
"completions/min_length": 212.6,
|
||||
"completions/min_terminated_length": 79.6,
|
||||
"entropy": 0.04304317501373589,
|
||||
"epoch": 0.5625,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 0.05765737593173981,
|
||||
"kl": 0.1589741975069046,
|
||||
"learning_rate": 3.2200000000000005e-06,
|
||||
"loss": -0.009884151071310044,
|
||||
"num_tokens": 291640.0,
|
||||
"reward": 1.0771000266075135,
|
||||
"reward_std": 0.2571271777153015,
|
||||
"rewards/reward_func/mean": 1.0771000266075135,
|
||||
"rewards/reward_func/std": 0.257127183675766,
|
||||
"step": 90,
|
||||
"step_time": 19.810263851404308,
|
||||
"tools/call_frequency": 2.7,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.75,
|
||||
"completions/max_length": 252.2,
|
||||
"completions/max_terminated_length": 155.6,
|
||||
"completions/mean_length": 227.45,
|
||||
"completions/mean_terminated_length": 154.7,
|
||||
"completions/min_length": 202.6,
|
||||
"completions/min_terminated_length": 153.8,
|
||||
"entropy": 0.03856636304408312,
|
||||
"epoch": 0.59375,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 2.2899415493011475,
|
||||
"kl": 0.18391469195485116,
|
||||
"learning_rate": 3.12e-06,
|
||||
"loss": 0.012278559803962707,
|
||||
"num_tokens": 308671.0,
|
||||
"reward": 0.9493666887283325,
|
||||
"reward_std": 0.3057107627391815,
|
||||
"rewards/reward_func/mean": 0.9493666887283325,
|
||||
"rewards/reward_func/std": 0.3057107746601105,
|
||||
"step": 95,
|
||||
"step_time": 18.270148772597896,
|
||||
"tools/call_frequency": 2.75,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.65,
|
||||
"completions/max_length": 229.0,
|
||||
"completions/max_terminated_length": 128.6,
|
||||
"completions/mean_length": 210.05,
|
||||
"completions/mean_terminated_length": 121.23333435058593,
|
||||
"completions/min_length": 191.4,
|
||||
"completions/min_terminated_length": 114.6,
|
||||
"entropy": 0.03718785918317735,
|
||||
"epoch": 0.625,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 1.4016427993774414,
|
||||
"kl": 0.19276840873062612,
|
||||
"learning_rate": 3.0200000000000003e-06,
|
||||
"loss": -0.02043401300907135,
|
||||
"num_tokens": 325246.0,
|
||||
"reward": 0.9758000135421753,
|
||||
"reward_std": 0.439729905128479,
|
||||
"rewards/reward_func/mean": 0.9758000135421753,
|
||||
"rewards/reward_func/std": 0.439729905128479,
|
||||
"step": 100,
|
||||
"step_time": 16.536685503809714,
|
||||
"tools/call_frequency": 3.4,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.45,
|
||||
"completions/max_length": 231.0,
|
||||
"completions/max_terminated_length": 175.6,
|
||||
"completions/mean_length": 209.7,
|
||||
"completions/mean_terminated_length": 164.73333435058595,
|
||||
"completions/min_length": 197.2,
|
||||
"completions/min_terminated_length": 156.2,
|
||||
"entropy": 0.0890876273624599,
|
||||
"epoch": 0.65625,
|
||||
"frac_reward_zero_std": 0.6,
|
||||
"grad_norm": 1.857412338256836,
|
||||
"kl": 0.20793221928179265,
|
||||
"learning_rate": 2.92e-06,
|
||||
"loss": 0.010671529173851012,
|
||||
"num_tokens": 341743.0,
|
||||
"reward": 1.244200015068054,
|
||||
"reward_std": 0.25437753796577456,
|
||||
"rewards/reward_func/mean": 1.244200015068054,
|
||||
"rewards/reward_func/std": 0.25437754988670347,
|
||||
"step": 105,
|
||||
"step_time": 14.550393618003,
|
||||
"tools/call_frequency": 3.4,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.6,
|
||||
"completions/max_length": 228.0,
|
||||
"completions/max_terminated_length": 215.0,
|
||||
"completions/mean_length": 211.05,
|
||||
"completions/mean_terminated_length": 204.6,
|
||||
"completions/min_length": 194.2,
|
||||
"completions/min_terminated_length": 194.2,
|
||||
"entropy": 0.09650332322344184,
|
||||
"epoch": 0.6875,
|
||||
"frac_reward_zero_std": 0.0,
|
||||
"grad_norm": 0.5915409922599792,
|
||||
"kl": 0.1943995427340269,
|
||||
"learning_rate": 2.82e-06,
|
||||
"loss": -0.007803649455308914,
|
||||
"num_tokens": 358474.0,
|
||||
"reward": 0.9213667035102844,
|
||||
"reward_std": 0.48010437488555907,
|
||||
"rewards/reward_func/mean": 0.9213667035102844,
|
||||
"rewards/reward_func/std": 0.480104398727417,
|
||||
"step": 110,
|
||||
"step_time": 15.29034832160105,
|
||||
"tools/call_frequency": 3.3,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.5,
|
||||
"completions/max_length": 244.8,
|
||||
"completions/max_terminated_length": 244.8,
|
||||
"completions/mean_length": 224.4,
|
||||
"completions/mean_terminated_length": 232.93333435058594,
|
||||
"completions/min_length": 204.4,
|
||||
"completions/min_terminated_length": 219.8,
|
||||
"entropy": 0.06257005939260125,
|
||||
"epoch": 0.71875,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 0.09643584489822388,
|
||||
"kl": 0.18671961799263953,
|
||||
"learning_rate": 2.7200000000000002e-06,
|
||||
"loss": 0.0009367348626255989,
|
||||
"num_tokens": 375512.0,
|
||||
"reward": 0.9198000192642212,
|
||||
"reward_std": 0.41239041090011597,
|
||||
"rewards/reward_func/mean": 0.9198000192642212,
|
||||
"rewards/reward_func/std": 0.41239042282104493,
|
||||
"step": 115,
|
||||
"step_time": 16.68962257000094,
|
||||
"tools/call_frequency": 3.05,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.5,
|
||||
"completions/max_length": 238.4,
|
||||
"completions/max_terminated_length": 194.2,
|
||||
"completions/mean_length": 221.6,
|
||||
"completions/mean_terminated_length": 185.86666870117188,
|
||||
"completions/min_length": 202.6,
|
||||
"completions/min_terminated_length": 174.8,
|
||||
"entropy": 0.19831047160550952,
|
||||
"epoch": 0.75,
|
||||
"frac_reward_zero_std": 0.4,
|
||||
"grad_norm": 0.06480103731155396,
|
||||
"kl": 0.2127195455133915,
|
||||
"learning_rate": 2.6200000000000003e-06,
|
||||
"loss": -0.002893347479403019,
|
||||
"num_tokens": 392259.0,
|
||||
"reward": 1.1177000164985658,
|
||||
"reward_std": 0.34459384679794314,
|
||||
"rewards/reward_func/mean": 1.1177000164985658,
|
||||
"rewards/reward_func/std": 0.34459385871887205,
|
||||
"step": 120,
|
||||
"step_time": 15.74592421480629,
|
||||
"tools/call_frequency": 3.1,
|
||||
"tools/failure_frequency": 0.0
|
||||
},
|
||||
{
|
||||
"clip_ratio/high_max": 0.0,
|
||||
"clip_ratio/high_mean": 0.0,
|
||||
"clip_ratio/low_mean": 0.0,
|
||||
"clip_ratio/low_min": 0.0,
|
||||
"clip_ratio/region_mean": 0.0,
|
||||
"completions/clipped_ratio": 0.55,
|
||||
"completions/max_length": 249.6,
|
||||
"completions/max_terminated_length": 248.4,
|
||||
"completions/mean_length": 225.9,
|
||||
"completions/mean_terminated_length": 238.83333435058594,
|
||||
"completions/min_length": 207.6,
|
||||
"completions/min_terminated_length": 229.0,
|
||||
"entropy": 0.12472135615535081,
|
||||
"epoch": 0.78125,
|
||||
"frac_reward_zero_std": 0.2,
|
||||
"grad_norm": 1.3778189420700073,
|
||||
"kl": 0.22096077986061574,
|
||||
"learning_rate": 2.52e-06,
|
||||
"loss": 0.018771570920944215,
|
||||
"num_tokens": 409108.0,
|
||||
"reward": 0.6021333426237107,
|
||||
"reward_std": 0.6133833765983582,
|
||||
"rewards/reward_func/mean": 0.6021333426237107,
|
||||
"rewards/reward_func/std": 0.6133833885192871,
|
||||
"step": 125,
|
||||
"step_time": 17.135429813191877,
|
||||
"tools/call_frequency": 3.0,
|
||||
"tools/failure_frequency": 0.0
|
||||
}
|
||||
],
|
||||
"logging_steps": 5,
|
||||
"max_steps": 250,
|
||||
"num_input_tokens_seen": 409108,
|
||||
"num_train_epochs": 2,
|
||||
"save_steps": 125,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": false
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 0.0,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
checkpoint-125/training_args.bin
Normal file
3
checkpoint-125/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
|
||||
size 7185
|
||||
89
checkpoint-250/chat_template.jinja
Normal file
89
checkpoint-250/chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
63
checkpoint-250/config.json
Normal file
63
checkpoint-250/config.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"dtype": "float32",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.6.2",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
checkpoint-250/generation_config.json
Normal file
12
checkpoint-250/generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "5.6.2"
|
||||
}
|
||||
3
checkpoint-250/model.safetensors
Normal file
3
checkpoint-250/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5eb33e25e043c4103e6cee6aa637be513e7c2c0b7afb8cdd2325c7891721e22f
|
||||
size 2384234968
|
||||
3
checkpoint-250/optimizer.pt
Normal file
3
checkpoint-250/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ad30d0785567ceb6d1472c0c58b027a9569701ec0b3f7f9e1b6557412d8231c6
|
||||
size 4768669395
|
||||
3
checkpoint-250/rng_state.pth
Normal file
3
checkpoint-250/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:63b0bd96b0d28d762e23adea21cbb0a4f5ae44ffadba986e6529b752e987c40c
|
||||
size 14645
|
||||
3
checkpoint-250/scheduler.pt
Normal file
3
checkpoint-250/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aa68c9756e4b45f9fc5e31507423afdfe40083f2b850885e7dec29d5f3970b1f
|
||||
size 1465
|
||||
3
checkpoint-250/tokenizer.json
Normal file
3
checkpoint-250/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
|
||||
size 11422650
|
||||
78
checkpoint-250/tokenizer_config.json
Normal file
78
checkpoint-250/tokenizer_config.json
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"local_files_only": false,
|
||||
"max_length": 3072,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"response_schema": {
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning_content": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"const": "assistant"
|
||||
},
|
||||
"tool_calls": {
|
||||
"items": {
|
||||
"properties": {
|
||||
"function": {
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"additionalProperties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"type": {
|
||||
"const": "function"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-parser": "json",
|
||||
"x-parser-args": {
|
||||
"transform": "{type: 'function', function: @}"
|
||||
}
|
||||
},
|
||||
"type": "array",
|
||||
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
|
||||
},
|
||||
"split_special_tokens": false,
|
||||
"stride": 0,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"truncation_side": "left",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": null
|
||||
}
|
||||
1534
checkpoint-250/trainer_state.json
Normal file
1534
checkpoint-250/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-250/training_args.bin
Normal file
3
checkpoint-250/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
|
||||
size 7185
|
||||
3
completions/completions_00005.parquet
Normal file
3
completions/completions_00005.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b68e115f077b56d92a8687696043f68cf4d262206fbbc1ceed637b932511d25b
|
||||
size 23966
|
||||
3
completions/completions_00010.parquet
Normal file
3
completions/completions_00010.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c3069f8445a69ba6dbb4859ea555ae1502c389e67a54572235e71ad1d67d588a
|
||||
size 21823
|
||||
3
completions/completions_00015.parquet
Normal file
3
completions/completions_00015.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b1b7eb25a9ada0e3bcbe12c628fde6b177a446a889be8e5681f37c1feeb87f0d
|
||||
size 22907
|
||||
3
completions/completions_00020.parquet
Normal file
3
completions/completions_00020.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a3fd829f54108e3882389b9e442c07a35d5d07e0367dc3be83d302a465ac1111
|
||||
size 23546
|
||||
3
completions/completions_00025.parquet
Normal file
3
completions/completions_00025.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60b6b7b89e0297abd8244bdc13e7a75d60b34766299cfd9db3de5f9b25373dad
|
||||
size 24284
|
||||
3
completions/completions_00030.parquet
Normal file
3
completions/completions_00030.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2b8be5f615ecd18a856e38c4fc12d3f16df7e6f9fb2d4f573c6db0d11eb4e5d8
|
||||
size 24890
|
||||
3
completions/completions_00035.parquet
Normal file
3
completions/completions_00035.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d02e8831c4091b1dbd8b0e07448cd8f22de4b3efe52ba57d6f6b6f311fa9560c
|
||||
size 24136
|
||||
3
completions/completions_00040.parquet
Normal file
3
completions/completions_00040.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d20e76df2ff675580ce50d9bcb6fd4e6225342d09303f180762cabab1e24d09c
|
||||
size 23457
|
||||
3
completions/completions_00045.parquet
Normal file
3
completions/completions_00045.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b42b35b140b93dfe80a48387ebaff03cfe2ae1af27020dbf01ed2b2ce92dc464
|
||||
size 24536
|
||||
3
completions/completions_00050.parquet
Normal file
3
completions/completions_00050.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:10826040482a7a035f66f510420b4737fc33944af16909a4a04148272a4158af
|
||||
size 23472
|
||||
3
completions/completions_00055.parquet
Normal file
3
completions/completions_00055.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6873b01aeb8204e7742e55ce05030ae49384e5730f49dfed84096525919bea00
|
||||
size 24304
|
||||
3
completions/completions_00060.parquet
Normal file
3
completions/completions_00060.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:de8626c9427a3c1b2cc33599dfc77755310fb356c4960e0f3e2975351aa7225b
|
||||
size 22618
|
||||
3
completions/completions_00065.parquet
Normal file
3
completions/completions_00065.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:696b5af00da3c47c2729346a733fb873d978694c2a5959ecdee947d051cfe54a
|
||||
size 24365
|
||||
3
completions/completions_00070.parquet
Normal file
3
completions/completions_00070.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d0874277f72d414e338612ec8eadd8acd4aea6f03be1582c71194bf838b69db1
|
||||
size 24000
|
||||
3
completions/completions_00075.parquet
Normal file
3
completions/completions_00075.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:08a237f024f5f74a46c8bcced98366ccda0e7630e8b479c1df3bbdfb6851b4e9
|
||||
size 24879
|
||||
3
completions/completions_00080.parquet
Normal file
3
completions/completions_00080.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4850fbf4fbb8b1944b6c971e91da5f510681ad9664e35e56828e9b50cea0db0e
|
||||
size 24416
|
||||
3
completions/completions_00085.parquet
Normal file
3
completions/completions_00085.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:072d189c7cc2ca7e12abaf7cab8175b6d8d5dd4f93a34aeddcfb9f3630584163
|
||||
size 24320
|
||||
3
completions/completions_00090.parquet
Normal file
3
completions/completions_00090.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:30acbe037313f6f3ffae9cb3d097d58f86c5c4df5a3455a5613e8a120fe28ff3
|
||||
size 24495
|
||||
3
completions/completions_00095.parquet
Normal file
3
completions/completions_00095.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a1ac198276f2647c859589db083a8bc23b707dd3e780deee44591fdbeae2d821
|
||||
size 24939
|
||||
3
completions/completions_00100.parquet
Normal file
3
completions/completions_00100.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fbfbcd01e3e49dc20f3c4ec6f5bae29692d9ff1932d27696cd773a9eaa31f733
|
||||
size 23622
|
||||
3
completions/completions_00105.parquet
Normal file
3
completions/completions_00105.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ca57fb90811d63b27b46fa9f9b87e40101195a39f726df56b9ca70043357f768
|
||||
size 24260
|
||||
3
completions/completions_00110.parquet
Normal file
3
completions/completions_00110.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9d834799c7968349955dfdeadd84971539773c304f681ac3d1632477d39bd47e
|
||||
size 24949
|
||||
3
completions/completions_00115.parquet
Normal file
3
completions/completions_00115.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ef13f61defd07f329d24be7ac6761b10e5175729fae5db04e1373ef3bb8389b6
|
||||
size 24970
|
||||
3
completions/completions_00120.parquet
Normal file
3
completions/completions_00120.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d11d282244295c8c553d003ec90504d91d25855b55e6b8260ab930ac1f081ba2
|
||||
size 24570
|
||||
3
completions/completions_00125.parquet
Normal file
3
completions/completions_00125.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7b8f35966c5bc1c9fd9fe0d3ce6cd8eb9c8b65395f7aae5182d67c4f08f0ab54
|
||||
size 25114
|
||||
3
completions/completions_00130.parquet
Normal file
3
completions/completions_00130.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:88b54d548b96ad67e2cf580dd46b0cee2f707f7d117741ed1e93b9b2cb0e457f
|
||||
size 23209
|
||||
3
completions/completions_00135.parquet
Normal file
3
completions/completions_00135.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:749f829e0806c9f9432492597b18573f0ff93c4f3f30c0745b6354de72518c60
|
||||
size 24387
|
||||
3
completions/completions_00140.parquet
Normal file
3
completions/completions_00140.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b92f197fe307553295b686c5b5bd8a43322139886c0cedef75665de87f66c440
|
||||
size 24489
|
||||
3
completions/completions_00145.parquet
Normal file
3
completions/completions_00145.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e7028e21d8f6dd9e32cb076cf66228d646906fb904c70ed84f80426d786ec5ed
|
||||
size 25681
|
||||
3
completions/completions_00150.parquet
Normal file
3
completions/completions_00150.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:da2ca6a5fe353e3c7e435c775fb487f923367b9a2e5769605628185afc7d3ba0
|
||||
size 25240
|
||||
3
completions/completions_00155.parquet
Normal file
3
completions/completions_00155.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0690b76b28598fa815fe1f1c312144318210684badc5711a60d8e929154616cd
|
||||
size 23230
|
||||
3
completions/completions_00160.parquet
Normal file
3
completions/completions_00160.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4fc270c3a3ed5a7bde9eeb02321c46c616e15042f917a763be2705a1fa80bad9
|
||||
size 24247
|
||||
3
completions/completions_00165.parquet
Normal file
3
completions/completions_00165.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ae2c4bcabbb25bb7e70be2a3c27fab24a0cd675f11a15fe9361d4e73e75c0f3
|
||||
size 24118
|
||||
3
completions/completions_00170.parquet
Normal file
3
completions/completions_00170.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4e954edee445dfed87e8c114b9c0b52f02ebdb8fbe9096b3b150a21689f391c4
|
||||
size 24839
|
||||
3
completions/completions_00175.parquet
Normal file
3
completions/completions_00175.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3a8c8730b512d568a457f95a12eaab65511d8bcc5a6fbb6df79e7f9919aaac8d
|
||||
size 25469
|
||||
3
completions/completions_00180.parquet
Normal file
3
completions/completions_00180.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e8d4b6a551b0c66d356a5919397b6cfe8da2453e193cd12cc77c925ad2a7c940
|
||||
size 24879
|
||||
3
completions/completions_00185.parquet
Normal file
3
completions/completions_00185.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:af62c422876a65648f0c8357d7aaea0605ff56d7b7d329e657eeeb49310a8cab
|
||||
size 25086
|
||||
3
completions/completions_00190.parquet
Normal file
3
completions/completions_00190.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:104bdb0177fb32febf5ad69d99c089aef23e0cdf858fef9f51f6837975877e94
|
||||
size 24995
|
||||
3
completions/completions_00195.parquet
Normal file
3
completions/completions_00195.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:79dad7982eb1657e24c2d30b56e8065fbe658ee1bbcbb3825273cadcbd278bf1
|
||||
size 23769
|
||||
3
completions/completions_00200.parquet
Normal file
3
completions/completions_00200.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:03b88568b249769abd964dccb3a029392ff4188a6f2e2e0d61e49e45b8734cb3
|
||||
size 24790
|
||||
3
completions/completions_00205.parquet
Normal file
3
completions/completions_00205.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3ce7ed5ff6eda7d382bade212a53fcb422b3e192e220ce3758f753cf75259c8d
|
||||
size 23958
|
||||
3
completions/completions_00210.parquet
Normal file
3
completions/completions_00210.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:19b48165efd963ddf763d266c16377a9f1a10ac8d1ea4ab6fec47be41ca39897
|
||||
size 26131
|
||||
3
completions/completions_00215.parquet
Normal file
3
completions/completions_00215.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3c0ccb348d1be8c1e0a1b9287c1f7be4d87d75b59f14bcce50147d6c0970d71d
|
||||
size 23995
|
||||
3
completions/completions_00220.parquet
Normal file
3
completions/completions_00220.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:09658361715b206e755041e17a00d8be401b27d49b5fb02665d2dd406a7dca14
|
||||
size 25117
|
||||
3
completions/completions_00225.parquet
Normal file
3
completions/completions_00225.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6e73eda9b2dfe5b8e3e99ca7669bf7f028a5afdd65e64b08432831449b4a8d97
|
||||
size 25627
|
||||
3
completions/completions_00230.parquet
Normal file
3
completions/completions_00230.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:14aed69014783ad4eb182afdbae30c06cbe8da270bd87a6a70382d3aa8870fdf
|
||||
size 24619
|
||||
3
completions/completions_00235.parquet
Normal file
3
completions/completions_00235.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:49fcedabb97e8a9db277be5a8442ff7a4bdf53c9dbb09f1081e8f1d656cd7f9c
|
||||
size 25408
|
||||
3
completions/completions_00240.parquet
Normal file
3
completions/completions_00240.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:54a2bc4b721e6d58161fe6f2405d5861c5a7aa33ecad498dc3923ce85a84f6b8
|
||||
size 25454
|
||||
3
completions/completions_00245.parquet
Normal file
3
completions/completions_00245.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6ce3a8098ceb8361a7bdb1d7a3a2f0cbc26e3876257dca9219ae45d2a8cd2eb1
|
||||
size 24477
|
||||
3
completions/completions_00250.parquet
Normal file
3
completions/completions_00250.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2e999027fab4f80f916ee346b91deeab0d7bf5d6c537c2ecbcfe706ea807b7de
|
||||
size 25162
|
||||
63
config.json
Normal file
63
config.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"dtype": "float32",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.6.2",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
151
eval/trained_eval_rows.csv
Normal file
151
eval/trained_eval_rows.csv
Normal file
@@ -0,0 +1,151 @@
|
||||
agent,seed,case_id,difficulty,reward,primary_reward,auxiliary_reward,contradictions_total,contradictions_triggered,contradictions_surfaced,questions_used,evidence_presented,evidence_timing_successes,blind_evidence_count,useless_questions_ratio,avg_question_length,model_repo,invalid_tool_calls
|
||||
random,20260425,timeline_255d67,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260427,workplace_c98377,easy,0.0,0.0,-0.4,1,0,0,3,5,0,5,1.0,5.0,,
|
||||
random,20260428,motive_66ff59,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
|
||||
random,20260429,timeline_19bb78,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
|
||||
random,20260430,timeline_a97690,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260431,alibi_67ffcd,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260432,alibi_423bca,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260433,knowledge_960d07,medium,0.0,0.0,-0.4,2,0,0,8,0,0,0,1.0,5.0,,
|
||||
random,20260434,alibi_e829c1,easy,0.0,0.0,-0.4,1,0,0,7,1,0,1,1.0,5.0,,
|
||||
random,20260435,motive_85e25b,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260436,knowledge_a599e3,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
|
||||
random,20260437,motive_8bca20,easy,0.0,0.0,-0.4,1,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260438,corporate_6b1664,medium,0.0,0.0,-0.4,2,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260439,alibi_a6c582,easy,0.0,0.0,-0.4,1,0,0,8,0,0,0,1.0,5.0,,
|
||||
random,20260440,workplace_835476,easy,0.0,0.0,-0.4,1,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260441,possession_a079c5,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
|
||||
random,20260442,possession_9cc45d,hard,0.0,0.0,-0.4,3,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260443,possession_259aa5,easy,0.0,0.0,-0.4,1,0,0,4,4,0,4,1.0,5.0,,
|
||||
random,20260444,corporate_76724c,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260445,timeline_767821,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260446,motive_c0d166,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260447,corporate_307934,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
|
||||
random,20260448,timeline_592816,hard,0.0,0.0,-0.4,3,0,0,6,2,0,2,1.0,5.0,,
|
||||
random,20260449,knowledge_b26824,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260450,knowledge_697785,hard,0.0,0.0,-0.4,3,0,0,7,1,0,1,1.0,5.0,,
|
||||
random,20260451,timeline_81dafd,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
|
||||
random,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.4,2,0,0,4,4,0,4,1.0,5.0,,
|
||||
random,20260453,possession_dbb5fe,medium,0.0,0.0,-0.4,2,0,0,5,3,0,3,1.0,5.0,,
|
||||
random,20260454,alibi_a4666f,hard,0.0,0.0,-0.4,3,0,0,8,0,0,0,1.0,5.0,,
|
||||
keyword_spam,20260425,timeline_255d67,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260426,knowledge_b28f8c,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260427,workplace_c98377,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260428,motive_66ff59,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260429,timeline_19bb78,easy,0.030000000000000006,0.0,0.15000000000000002,1,1,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260430,timeline_a97690,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260431,alibi_67ffcd,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260432,alibi_423bca,medium,0.04000000000000001,0.0,0.20000000000000004,2,1,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260433,knowledge_960d07,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260434,alibi_e829c1,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260435,motive_85e25b,hard,0.17000000000000004,0.0,0.8500000000000001,3,3,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260436,knowledge_a599e3,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260437,motive_8bca20,easy,0.0,0.0,-0.15000000000000002,1,0,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260438,corporate_6b1664,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260439,alibi_a6c582,easy,0.04000000000000001,0.0,0.20000000000000004,1,1,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260440,workplace_835476,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260441,possession_a079c5,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260442,possession_9cc45d,hard,0.030000000000000006,0.0,0.15000000000000002,3,1,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260444,corporate_76724c,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260445,timeline_767821,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260446,motive_c0d166,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260447,corporate_307934,hard,0.020000000000000007,0.0,0.10000000000000003,3,1,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260448,timeline_592816,hard,0.19,0.0,0.95,3,3,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260449,knowledge_b26824,medium,0.12000000000000002,0.0,0.6000000000000001,2,2,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260450,knowledge_697785,hard,0.12000000000000002,0.0,0.6000000000000001,3,2,0,5,0,0,0,0.4,4.2,,
|
||||
keyword_spam,20260451,timeline_81dafd,medium,0.13,0.0,0.65,2,2,0,5,0,0,0,0.2,4.2,,
|
||||
keyword_spam,20260452,corporate_8eb7d7,medium,0.020000000000000007,0.0,0.10000000000000003,2,1,0,5,0,0,0,0.8,4.2,,
|
||||
keyword_spam,20260453,possession_dbb5fe,medium,0.030000000000000006,0.0,0.15000000000000002,2,1,0,5,0,0,0,0.6,4.2,,
|
||||
keyword_spam,20260454,alibi_a4666f,hard,0.15000000000000002,0.0,0.75,3,2,0,5,0,0,0,0.2,4.2,,
|
||||
present_all,20260425,timeline_255d67,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260426,knowledge_b28f8c,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260427,workplace_c98377,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260428,motive_66ff59,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
|
||||
present_all,20260429,timeline_19bb78,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260430,timeline_a97690,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260431,alibi_67ffcd,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260432,alibi_423bca,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260433,knowledge_960d07,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260434,alibi_e829c1,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260435,motive_85e25b,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
|
||||
present_all,20260436,knowledge_a599e3,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260437,motive_8bca20,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260438,corporate_6b1664,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260439,alibi_a6c582,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260440,workplace_835476,easy,0.0,0.0,-0.25,1,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260441,possession_a079c5,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
|
||||
present_all,20260442,possession_9cc45d,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
|
||||
present_all,20260443,possession_259aa5,easy,0.0,0.0,-0.2,1,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260444,corporate_76724c,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260445,timeline_767821,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260446,motive_c0d166,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260447,corporate_307934,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
|
||||
present_all,20260448,timeline_592816,hard,0.0,0.0,-0.35000000000000003,3,0,0,0,7,0,7,0.0,0.0,,
|
||||
present_all,20260449,knowledge_b26824,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260450,knowledge_697785,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
|
||||
present_all,20260451,timeline_81dafd,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260452,corporate_8eb7d7,medium,0.0,0.0,-0.25,2,0,0,0,5,0,5,0.0,0.0,,
|
||||
present_all,20260453,possession_dbb5fe,medium,0.0,0.0,-0.2,2,0,0,0,4,0,4,0.0,0.0,,
|
||||
present_all,20260454,alibi_a4666f,hard,0.0,0.0,-0.30000000000000004,3,0,0,0,6,0,6,0.0,0.0,,
|
||||
scripted_oracle,20260425,timeline_255d67,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260426,knowledge_b28f8c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
|
||||
scripted_oracle,20260427,workplace_c98377,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260428,motive_66ff59,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260429,timeline_19bb78,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260430,timeline_a97690,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
|
||||
scripted_oracle,20260431,alibi_67ffcd,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
|
||||
scripted_oracle,20260432,alibi_423bca,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,3.0,,
|
||||
scripted_oracle,20260433,knowledge_960d07,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
|
||||
scripted_oracle,20260434,alibi_e829c1,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
|
||||
scripted_oracle,20260435,motive_85e25b,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260436,knowledge_a599e3,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
|
||||
scripted_oracle,20260437,motive_8bca20,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260438,corporate_6b1664,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
|
||||
scripted_oracle,20260439,alibi_a6c582,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,3.0,,
|
||||
scripted_oracle,20260440,workplace_835476,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260441,possession_a079c5,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260442,possession_9cc45d,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260443,possession_259aa5,easy,0.8800000000000001,1.0,0.4,1,1,1,1,1,1,0,0.0,1.0,,
|
||||
scripted_oracle,20260444,corporate_76724c,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
|
||||
scripted_oracle,20260445,timeline_767821,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
|
||||
scripted_oracle,20260446,motive_c0d166,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
|
||||
scripted_oracle,20260447,corporate_307934,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260448,timeline_592816,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.0,,
|
||||
scripted_oracle,20260449,knowledge_b26824,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.5,,
|
||||
scripted_oracle,20260450,knowledge_697785,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,1.6666666666666667,,
|
||||
scripted_oracle,20260451,timeline_81dafd,medium,0.49000000000000005,0.5,0.45,2,1,1,2,2,1,1,0.0,1.0,,
|
||||
scripted_oracle,20260452,corporate_8eb7d7,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
|
||||
scripted_oracle,20260453,possession_dbb5fe,medium,0.9600000000000001,1.0,0.8,2,2,2,2,2,2,0,0.0,1.0,,
|
||||
scripted_oracle,20260454,alibi_a4666f,hard,1.0,1.0,1.2000000000000002,3,3,3,3,3,3,0,0.0,2.3333333333333335,,
|
||||
trained_sft_grpo_run2,20260425,timeline_255d67,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260426,knowledge_b28f8c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260427,workplace_c98377,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260428,motive_66ff59,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,31.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260429,timeline_19bb78,easy,0.010000000000000004,0.0,0.05000000000000002,1,1,0,7,0,0,0,0.7142857142857143,31.428571428571427,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260430,timeline_a97690,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,36.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260431,alibi_67ffcd,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260432,alibi_423bca,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260433,knowledge_960d07,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260434,alibi_e829c1,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,37.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260435,motive_85e25b,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,32.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260436,knowledge_a599e3,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260437,motive_8bca20,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260438,corporate_6b1664,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260439,alibi_a6c582,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260440,workplace_835476,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,30.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260441,possession_a079c5,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,47.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260442,possession_9cc45d,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260443,possession_259aa5,easy,0.8200000000000001,1.0,0.09999999999999998,1,1,1,7,1,1,0,0.8571428571428571,42.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260444,corporate_76724c,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260445,timeline_767821,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,44.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260446,motive_c0d166,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,35.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260447,corporate_307934,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260448,timeline_592816,hard,0.010000000000000004,0.0,0.05000000000000002,3,1,0,7,0,0,0,0.7142857142857143,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260449,knowledge_b26824,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260450,knowledge_697785,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,43.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260451,timeline_81dafd,medium,0.010000000000000004,0.0,0.05000000000000002,2,1,0,7,0,0,0,0.7142857142857143,38.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260452,corporate_8eb7d7,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,27.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260453,possession_dbb5fe,medium,0.42000000000000004,0.5,0.09999999999999998,2,1,1,7,1,1,0,0.8571428571428571,45.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
trained_sft_grpo_run2,20260454,alibi_a4666f,hard,0.2866666666666667,0.3333333333333333,0.09999999999999998,3,1,1,7,1,1,0,0.8571428571428571,39.0,heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2,0.0
|
||||
|
150
eval/trained_eval_rows.jsonl
Normal file
150
eval/trained_eval_rows.jsonl
Normal file
@@ -0,0 +1,150 @@
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 3, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 2, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 2, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 6, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 1, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 7, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 4, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 4, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 3, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 3, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 1.0}
|
||||
{"agent": "random", "auxiliary_reward": -0.4, "avg_question_length": 5.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 8, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 1.0}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260425, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260426, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260428, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260429, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260430, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260431, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260432, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260433, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260434, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.8500000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.17000000000000004, "seed": 20260435, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260436, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": -0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260438, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.20000000000000004, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.04000000000000001, "seed": 20260439, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260441, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260442, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": -0.2, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260444, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260445, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260446, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260447, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.95, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.19, "seed": 20260448, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260449, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.6000000000000001, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.12000000000000002, "seed": 20260450, "useless_questions_ratio": 0.4}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.65, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.13, "seed": 20260451, "useless_questions_ratio": 0.2}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.10000000000000003, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.020000000000000007, "seed": 20260452, "useless_questions_ratio": 0.8}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.15000000000000002, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.030000000000000006, "seed": 20260453, "useless_questions_ratio": 0.6}
|
||||
{"agent": "keyword_spam", "auxiliary_reward": 0.75, "avg_question_length": 4.2, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 2, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 5, "reward": 0.15000000000000002, "seed": 20260454, "useless_questions_ratio": 0.2}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260425, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260426, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_c98377", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260427, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_66ff59", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260428, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260429, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260430, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_67ffcd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260431, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_423bca", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260432, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_960d07", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260433, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_e829c1", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260434, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "motive_85e25b", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260435, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_a599e3", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260436, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_8bca20", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260437, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_6b1664", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260438, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "alibi_a6c582", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260439, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "workplace_835476", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260440, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_a079c5", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260441, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "possession_9cc45d", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260442, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_259aa5", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 0, "difficulty": "easy", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260443, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_76724c", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260444, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260445, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "motive_c0d166", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260446, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "corporate_307934", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260447, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.35000000000000003, "avg_question_length": 0.0, "blind_evidence_count": 7, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 7, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260448, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "knowledge_b26824", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260449, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "knowledge_697785", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260450, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260451, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.25, "avg_question_length": 0.0, "blind_evidence_count": 5, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 5, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260452, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.2, "avg_question_length": 0.0, "blind_evidence_count": 4, "case_id": "possession_dbb5fe", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 0, "difficulty": "medium", "evidence_presented": 4, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260453, "useless_questions_ratio": 0.0}
|
||||
{"agent": "present_all", "auxiliary_reward": -0.30000000000000004, "avg_question_length": 0.0, "blind_evidence_count": 6, "case_id": "alibi_a4666f", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 0, "difficulty": "hard", "evidence_presented": 6, "evidence_timing_successes": 0, "primary_reward": 0.0, "questions_used": 0, "reward": 0.0, "seed": 20260454, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260425, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260426, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260427, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260428, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260429, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_a97690", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260430, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260431, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260432, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260433, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260434, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260435, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260436, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260437, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260438, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 3.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260439, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260440, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260441, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260442, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.4, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "primary_reward": 1.0, "questions_used": 1, "reward": 0.8800000000000001, "seed": 20260443, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260444, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_767821", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260445, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260446, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260447, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260448, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.5, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260449, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 1.6666666666666667, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260450, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.45, "avg_question_length": 1.0, "blind_evidence_count": 1, "case_id": "timeline_81dafd", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 1, "primary_reward": 0.5, "questions_used": 2, "reward": 0.49000000000000005, "seed": 20260451, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260452, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 0.8, "avg_question_length": 1.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 2, "contradictions_total": 2, "contradictions_triggered": 2, "difficulty": "medium", "evidence_presented": 2, "evidence_timing_successes": 2, "primary_reward": 1.0, "questions_used": 2, "reward": 0.9600000000000001, "seed": 20260453, "useless_questions_ratio": 0.0}
|
||||
{"agent": "scripted_oracle", "auxiliary_reward": 1.2000000000000002, "avg_question_length": 2.3333333333333335, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 3, "contradictions_total": 3, "contradictions_triggered": 3, "difficulty": "hard", "evidence_presented": 3, "evidence_timing_successes": 3, "primary_reward": 1.0, "questions_used": 3, "reward": 1.0, "seed": 20260454, "useless_questions_ratio": 0.0}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "timeline_255d67", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260425, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 45.0, "blind_evidence_count": 0, "case_id": "knowledge_b28f8c", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260426, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 31.0, "blind_evidence_count": 0, "case_id": "workplace_c98377", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260427, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 31.0, "blind_evidence_count": 0, "case_id": "motive_66ff59", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260428, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 31.428571428571427, "blind_evidence_count": 0, "case_id": "timeline_19bb78", "contradictions_surfaced": 0, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260429, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 36.0, "blind_evidence_count": 0, "case_id": "timeline_a97690", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260430, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "alibi_67ffcd", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260431, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_423bca", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260432, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "knowledge_960d07", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260433, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 37.0, "blind_evidence_count": 0, "case_id": "alibi_e829c1", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260434, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 32.0, "blind_evidence_count": 0, "case_id": "motive_85e25b", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260435, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "knowledge_a599e3", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260436, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 30.0, "blind_evidence_count": 0, "case_id": "motive_8bca20", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260437, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_6b1664", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260438, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "alibi_a6c582", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260439, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 30.0, "blind_evidence_count": 0, "case_id": "workplace_835476", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260440, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 47.0, "blind_evidence_count": 0, "case_id": "possession_a079c5", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260441, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 43.0, "blind_evidence_count": 0, "case_id": "possession_9cc45d", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260442, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 42.0, "blind_evidence_count": 0, "case_id": "possession_259aa5", "contradictions_surfaced": 1, "contradictions_total": 1, "contradictions_triggered": 1, "difficulty": "easy", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 1.0, "questions_used": 7, "reward": 0.8200000000000001, "seed": 20260443, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_76724c", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260444, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 44.0, "blind_evidence_count": 0, "case_id": "timeline_767821", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260445, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 35.0, "blind_evidence_count": 0, "case_id": "motive_c0d166", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260446, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_307934", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260447, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "timeline_592816", "contradictions_surfaced": 0, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260448, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "knowledge_b26824", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260449, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 43.0, "blind_evidence_count": 0, "case_id": "knowledge_697785", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260450, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.05000000000000002, "avg_question_length": 38.0, "blind_evidence_count": 0, "case_id": "timeline_81dafd", "contradictions_surfaced": 0, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 0, "evidence_timing_successes": 0, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.0, "questions_used": 7, "reward": 0.010000000000000004, "seed": 20260451, "useless_questions_ratio": 0.7142857142857143}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 27.0, "blind_evidence_count": 0, "case_id": "corporate_8eb7d7", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260452, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 45.0, "blind_evidence_count": 0, "case_id": "possession_dbb5fe", "contradictions_surfaced": 1, "contradictions_total": 2, "contradictions_triggered": 1, "difficulty": "medium", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.5, "questions_used": 7, "reward": 0.42000000000000004, "seed": 20260453, "useless_questions_ratio": 0.8571428571428571}
|
||||
{"agent": "trained_sft_grpo_run2", "auxiliary_reward": 0.09999999999999998, "avg_question_length": 39.0, "blind_evidence_count": 0, "case_id": "alibi_a4666f", "contradictions_surfaced": 1, "contradictions_total": 3, "contradictions_triggered": 1, "difficulty": "hard", "evidence_presented": 1, "evidence_timing_successes": 1, "invalid_tool_calls": 0, "model_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "primary_reward": 0.3333333333333333, "questions_used": 7, "reward": 0.2866666666666667, "seed": 20260454, "useless_questions_ratio": 0.8571428571428571}
|
||||
52
eval/trained_eval_summary.json
Normal file
52
eval/trained_eval_summary.json
Normal file
@@ -0,0 +1,52 @@
|
||||
[
|
||||
{
|
||||
"agent": "keyword_spam",
|
||||
"avg_evidence_timing": 0,
|
||||
"avg_primary_reward": 0.0,
|
||||
"avg_reward": 0.07300000000000001,
|
||||
"avg_surface_rate": 0.0,
|
||||
"avg_trigger_rate": 0.6777777777777778,
|
||||
"avg_useless_ratio": 0.48000000000000004,
|
||||
"episodes": 30
|
||||
},
|
||||
{
|
||||
"agent": "present_all",
|
||||
"avg_evidence_timing": 0,
|
||||
"avg_primary_reward": 0.0,
|
||||
"avg_reward": 0.0,
|
||||
"avg_surface_rate": 0.0,
|
||||
"avg_trigger_rate": 0.0,
|
||||
"avg_useless_ratio": 0.0,
|
||||
"episodes": 30
|
||||
},
|
||||
{
|
||||
"agent": "random",
|
||||
"avg_evidence_timing": 0,
|
||||
"avg_primary_reward": 0.0,
|
||||
"avg_reward": 0.0,
|
||||
"avg_surface_rate": 0.0,
|
||||
"avg_trigger_rate": 0.0,
|
||||
"avg_useless_ratio": 1.0,
|
||||
"episodes": 30
|
||||
},
|
||||
{
|
||||
"agent": "scripted_oracle",
|
||||
"avg_evidence_timing": 1.9,
|
||||
"avg_primary_reward": 0.95,
|
||||
"avg_reward": 0.9023333333333334,
|
||||
"avg_surface_rate": 0.95,
|
||||
"avg_trigger_rate": 0.95,
|
||||
"avg_useless_ratio": 0.0,
|
||||
"episodes": 30
|
||||
},
|
||||
{
|
||||
"agent": "trained_sft_grpo_run2",
|
||||
"avg_evidence_timing": 0.8,
|
||||
"avg_primary_reward": 0.4611111111111111,
|
||||
"avg_reward": 0.3868888888888889,
|
||||
"avg_surface_rate": 0.4611111111111111,
|
||||
"avg_trigger_rate": 0.5888888888888889,
|
||||
"avg_useless_ratio": 0.8285714285714285,
|
||||
"episodes": 30
|
||||
}
|
||||
]
|
||||
137
eval/trained_eval_transcripts.md
Normal file
137
eval/trained_eval_transcripts.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# Agent: trained_sft_grpo_run2
|
||||
|
||||
## timeline_255d67 (easy)
|
||||
|
||||
Esme Dubois is accused of assault at the public library on Walnut Street on May 28th. The victim claims the assault occurred at 11:45 PM. The defendant claims they were elsewhere at that time.
|
||||
|
||||
### Transcript
|
||||
- Step 1 `ask_question`: Esme Dubois is accused of assault at the public library on Walnut Street on May 28th. The victim claims the assault occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: The assault happened at 11:00 PM, not 11:45 PM.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 2 `present_evidence`: victor_statement
|
||||
- Witness: Invalid exhibit ID.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 3 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: The assault happened at 11:00 PM, not 11:45 PM.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 4 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 5 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 6 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 7 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 8 `ask_question`: Esme Dubois is accused of theft at the public library on Walnut Street on May 28th. The victim claims the theft occurred at 11:45 PM on the victim's location. The defendant claims the victim was elsewhere at that time.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
- Step 9 `rest_case`: rest_case
|
||||
- Witness: [Counsel rests.]
|
||||
- Triggered: c1_time_of_assault; Surfaced: none
|
||||
|
||||
### Reward
|
||||
```json
|
||||
{"primary_reward": 0.0, "auxiliary_reward_raw": 0.05000000000000002, "total_reward": 0.010000000000000004, "contradictions_total": 1.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 0.0, "keyword_questions": 2.0, "evidence_timing_successes": 0.0, "blind_evidence_count": 0.0, "duplicate_questions": 5.0, "irrelevant_questions": 0.0, "inadmissible_actions": 1.0, "useless_questions_ratio": 0.7142857142857143, "avg_question_length": 39.0}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
# Agent: trained_sft_grpo_run2
|
||||
|
||||
## knowledge_b28f8c (medium)
|
||||
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
|
||||
### Transcript
|
||||
- Step 1 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: I have never met Kavita Iyer in my life.
|
||||
- Triggered: c1_know_victim; Surfaced: none
|
||||
- Step 2 `present_evidence`: email_chain
|
||||
- Witness: [Witness stammers] I... I'm not sure what to say.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 3 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 4 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 5 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 6 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 7 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 8 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is charged with embezzlement from the diner at the corner of 9th and Broad. The state alleges Priya Sharma systematically diverted funds in coordination with the manager, Kavita Iyer, between December 5th and the present. The defendant denies knowing Kavita Iyer.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
- Step 9 `rest_case`: rest_case
|
||||
- Witness: [Counsel rests.]
|
||||
- Triggered: c1_know_victim; Surfaced: c1_know_victim
|
||||
|
||||
### Reward
|
||||
```json
|
||||
{"primary_reward": 0.5, "auxiliary_reward_raw": 0.09999999999999998, "total_reward": 0.42000000000000004, "contradictions_total": 2.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 1.0, "keyword_questions": 1.0, "evidence_timing_successes": 1.0, "blind_evidence_count": 0.0, "duplicate_questions": 6.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.8571428571428571, "avg_question_length": 45.0}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
# Agent: trained_sft_grpo_run2
|
||||
|
||||
## workplace_c98377 (easy)
|
||||
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
|
||||
### Transcript
|
||||
- Step 1 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: I never sent Ravi Joshi any threatening message.
|
||||
- Triggered: c1_message_denial; Surfaced: none
|
||||
- Step 2 `present_evidence`: message_export
|
||||
- Witness: [Witness stammers] I... I'm not sure what to say.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 3 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 4 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 5 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 6 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 7 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 8 `ask_question`: CASE BRIEF:
|
||||
Priya Sharma is questioned in an HR investigation after Ravi Joshi reported retaliation at the HR office on March 14th. The manager denies sending threatening messages or being nearby.
|
||||
- Witness: You've already asked that question.
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
- Step 9 `rest_case`: rest_case
|
||||
- Witness: [Counsel rests.]
|
||||
- Triggered: c1_message_denial; Surfaced: c1_message_denial
|
||||
|
||||
### Reward
|
||||
```json
|
||||
{"primary_reward": 1.0, "auxiliary_reward_raw": 0.09999999999999998, "total_reward": 0.8200000000000001, "contradictions_total": 1.0, "contradictions_triggered": 1.0, "contradictions_surfaced": 1.0, "keyword_questions": 1.0, "evidence_timing_successes": 1.0, "blind_evidence_count": 0.0, "duplicate_questions": 6.0, "irrelevant_questions": 0.0, "inadmissible_actions": 0.0, "useless_questions_ratio": 0.8571428571428571, "avg_question_length": 31.0}
|
||||
```
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "5.6.2"
|
||||
}
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5eb33e25e043c4103e6cee6aa637be513e7c2c0b7afb8cdd2325c7891721e22f
|
||||
size 2384234968
|
||||
89
sft_warm_start/chat_template.jinja
Normal file
89
sft_warm_start/chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
63
sft_warm_start/config.json
Normal file
63
sft_warm_start/config.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.6.2",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
sft_warm_start/generation_config.json
Normal file
12
sft_warm_start/generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "5.6.2"
|
||||
}
|
||||
3
sft_warm_start/model.safetensors
Normal file
3
sft_warm_start/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3aa735583653d63e0b164d0872f4bd2aa4147f31ba523b70fff69fb169b24869
|
||||
size 1192135096
|
||||
3
sft_warm_start/tokenizer.json
Normal file
3
sft_warm_start/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f3cdba37323e45d3a891de61878a08412fb1d85f4ade068eeffc03740ec9e275
|
||||
size 11422748
|
||||
75
sft_warm_start/tokenizer_config.json
Normal file
75
sft_warm_start/tokenizer_config.json
Normal file
@@ -0,0 +1,75 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": false,
|
||||
"local_files_only": false,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"response_schema": {
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning_content": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"const": "assistant"
|
||||
},
|
||||
"tool_calls": {
|
||||
"items": {
|
||||
"properties": {
|
||||
"function": {
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"additionalProperties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"type": {
|
||||
"const": "function"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-parser": "json",
|
||||
"x-parser-args": {
|
||||
"transform": "{type: 'function', function: @}"
|
||||
}
|
||||
},
|
||||
"type": "array",
|
||||
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
|
||||
},
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"truncation_side": "left",
|
||||
"unk_token": null
|
||||
}
|
||||
3
sft_warm_start/training_args.bin
Normal file
3
sft_warm_start/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:44e78eec69cef56830cc59b94a060abe906ff4571c26fb4566a6bdcb27e4499d
|
||||
size 5265
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
|
||||
size 11422650
|
||||
78
tokenizer_config.json
Normal file
78
tokenizer_config.json
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"local_files_only": false,
|
||||
"max_length": 3072,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"response_schema": {
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning_content": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"const": "assistant"
|
||||
},
|
||||
"tool_calls": {
|
||||
"items": {
|
||||
"properties": {
|
||||
"function": {
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"additionalProperties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"type": {
|
||||
"const": "function"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-parser": "json",
|
||||
"x-parser-args": {
|
||||
"transform": "{type: 'function', function: @}"
|
||||
}
|
||||
},
|
||||
"type": "array",
|
||||
"x-regex-iterator": "<tool_call>\\s*(.+?)\\s*</tool_call>"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"x-regex": "^(?:<think>\\n?(?:(?P<reasoning_content>.*?\\S.*?)\\n?|[\\s]*)</think>\\s*)?(?P<content>.*?)(?:\\n(?=<tool_call>))?(?=(?:<tool_call>|<\\|im_end\\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\\s*)+)?\\s*(?:<\\|im_end\\|>|$)"
|
||||
},
|
||||
"split_special_tokens": false,
|
||||
"stride": 0,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"truncation_side": "left",
|
||||
"truncation_strategy": "longest_first",
|
||||
"unk_token": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:623158c464bdeaec4fa82a52817596e39771e95ece6659a737aa9e9d07d03327
|
||||
size 7185
|
||||
23
training_summary.json
Normal file
23
training_summary.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2",
|
||||
"env_url": "https://heavycoderhh-counsel-env.hf.space",
|
||||
"evidence_pressure": 1.4,
|
||||
"grpo_dataset_size": 160,
|
||||
"grpo_learning_rate": 5e-06,
|
||||
"grpo_max_steps": 250,
|
||||
"max_completion_length": 320,
|
||||
"metrics": {
|
||||
"total_flos": 0.0,
|
||||
"train_loss": -0.00972448107972741,
|
||||
"train_runtime": 4103.8977,
|
||||
"train_samples_per_second": 0.244,
|
||||
"train_steps_per_second": 0.061
|
||||
},
|
||||
"model": "heavycoderhh/counsel-env-qwen3-0.6b-grpo",
|
||||
"num_generations": 4,
|
||||
"sft_dataset_size": 320,
|
||||
"sft_dir": "/tmp/counsel-sft-grpo-output/sft_warm_start",
|
||||
"sft_epochs": 1.0,
|
||||
"sft_learning_rate": 1e-05,
|
||||
"space_repo": "heavycoderhh/counsel-env"
|
||||
}
|
||||
Reference in New Issue
Block a user