commit 85e3919d976e6242b478f7002b3b9f3b39949882 Author: ModelHub XC Date: Mon Jun 15 18:34:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: janhq/Jan-v1-edge Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d2fc868 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,51 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +merges.txt filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..83c87cc --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +license: apache-2.0 +language: +- en +base_model: +- Qwen/Qwen3-1.7B +pipeline_tag: text-generation +library_name: transformers +--- + +# Jan-v1-edge: Distilled for Edge, Built for Web Search + +[![GitHub](https://img.shields.io/badge/GitHub-Repository-blue?logo=github)](https://github.com/menloresearch/deep-research) +[![License](https://img.shields.io/badge/License-Apache%202.0-yellow)](https://opensource.org/licenses/Apache-2.0) +[![Jan App](https://img.shields.io/badge/Powered%20by-Jan%20App-purple?style=flat&logo=android)](https://jan.ai/) + +## Overview + +**Jan-v1-edge** is a lightweight agentic model built for fast, reliable on-device execution. As the second release in the **Jan Family**, it is distilled from the larger [`Jan-v1`](https://huggingface.co/janhq/Jan-v1-4B) model, preserving strong reasoning and problem-solving ability in a smaller footprint suitable for resource-constrained environments. + +Jan-v1-edge was developed through a two-phase post-training process. The first phase, **Supervised Fine-Tuning (SFT)**, transferred core capabilities from the `Jan-v1` teacher model to the smaller student. The second phase, **Reinforcement Learning with Verifiable Rewards (RLVR)** —the same method used in `Jan-v1` and `Lucy`—further optimized reasoning efficiency, tool use, and correctness. This staged approach delivers reliable results on complex, interactive workloads. + +## Performance + +### Question Answering(SimpleQA) + +Despite having only 1.7B parameters, **Jan-v1-edge** achieves 83% accuracy—nearly matching the larger Jan-nano-128k—demonstrating its efficiency and robustness. + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/655e3b59d5c0d3db5359ca3c/gV6Ph1m3rW6KeYkpj_b4s.png) + +### Chat & Instruction Following + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/655e3b59d5c0d3db5359ca3c/xNWL41L__oULHJkuAaGGt.png) + +Versus Qwen 3 1.7B Thinking, Jan-v1-edge shows a slight degradation on instruction-following and CreativeWriting, while remaining comparable or better on EQBench and recency QA. + +## Quick Start + +### Integration with Jan App + +Jan-v1-edge is optimized for direct integration with the [Jan App](https://jan.ai/). Simply select the model from the Jan App interface for immediate access to its full capabilities. + +### Local Deployment + +**Using vLLM:** +```bash +vllm serve janhq/Jan-v1-edge \ + --host 0.0.0.0 \ + --port 1234 \ + --enable-auto-tool-choice \ + --tool-call-parser hermes + +``` + +**Using llama.cpp:** +```bash +llama-server --model Jan-v1-edge-Q8_0.gguf \ + --host 0.0.0.0 \ + --port 1234 \ + --jinja \ + --no-context-shift +``` + +### Recommended Inference Parameters +```yaml +temperature: 0.6 +top_p: 0.95 +top_k: 20 +min_p: 0.0 +max_tokens: 2048 +``` + +## 🤝 Community & Support + +- **Discussions**: [HuggingFace Community](https://huggingface.co/janhq/Jan-v1-edge/discussions) +- **Jan App**: Discover more about the Jan App at [jan.ai](https://jan.ai/) + +## 📄 Citation +```bibtex +Updated Soon +``` \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..5bbc9d9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,100 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role }} + {% generation %} + {{- '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {% endgeneration %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' }} + {% generation %} + {{- content }} + {% endgeneration %} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' }} + {% generation %} + {{- content }} + {% endgeneration %} + {%- endif %} + {% generation %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {% endgeneration %} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..26358f4 --- /dev/null +++ b/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..4b23077 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.55.0" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..217baa9 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15024564b172a09f1deea887d9bb3fd42a3eb629435e2fb92c7d1a6698ef9fca +size 3441185608 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddaf698 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,239 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..d95c832 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1069 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.0014534883720930232, + "eval_steps": 500, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10086.0, + "completions/max_terminated_length": 10086.0, + "completions/mean_length": 4296.546875, + "completions/mean_terminated_length": 4296.546875, + "completions/min_length": 1720.0, + "completions/min_terminated_length": 1720.0, + "epoch": 2.4224806201550387e-05, + "grad_norm": 0.016954593260394005, + "kl": 0.0009393692016601562, + "learning_rate": 0.0, + "loss": -0.0003, + "num_tokens": 601834.0, + "reward": 0.4602593183517456, + "reward_std": 0.24803586304187775, + "rewards/avg_thinking_length_func": 185.02471923828125, + "rewards/correct_answer_reward_func": 0.453125, + "rewards/efficient_thinking_reward_func": 0.8889554441999474, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.7176268100738525, + "rewards/tool_execution_reward_func": 1.9936248064041138, + "rewards/visit_tool_reward_func": 0.9308543801307678, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 4.8449612403100775e-05, + "grad_norm": 0.016953615886545852, + "kl": 0.0009393692016601562, + "learning_rate": 6.25e-08, + "loss": -0.0003, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 7.267441860465116e-05, + "grad_norm": 0.016864690676516626, + "kl": 0.0009565353393554688, + "learning_rate": 1.25e-07, + "loss": -0.0003, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 9.689922480620155e-05, + "grad_norm": 0.016822420848305722, + "kl": 0.0009622573852539062, + "learning_rate": 1.875e-07, + "loss": -0.0003, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9385.0, + "completions/max_terminated_length": 9385.0, + "completions/mean_length": 4270.703125, + "completions/mean_terminated_length": 4270.703125, + "completions/min_length": 1390.0, + "completions/min_terminated_length": 1390.0, + "epoch": 0.00012112403100775194, + "grad_norm": 0.025862550499858347, + "kl": 0.000957489013671875, + "learning_rate": 2.5e-07, + "loss": 0.0031, + "num_tokens": 1199795.0, + "reward": 0.566771388053894, + "reward_std": 0.48137491941452026, + "rewards/avg_thinking_length_func": 182.33303833007812, + "rewards/correct_answer_reward_func": 0.578125, + "rewards/efficient_thinking_reward_func": 0.8707049785861538, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.7195165157318115, + "rewards/tool_execution_reward_func": 1.9965277910232544, + "rewards/visit_tool_reward_func": 0.9274243116378784, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00014534883720930232, + "grad_norm": 0.025877236026611388, + "kl": 0.0009489059448242188, + "learning_rate": 3.1249999999999997e-07, + "loss": 0.0031, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0001695736434108527, + "grad_norm": 0.025817236127475232, + "kl": 0.0009660720825195312, + "learning_rate": 3.75e-07, + "loss": 0.0031, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0001937984496124031, + "grad_norm": 0.02584169829863559, + "kl": 0.0009441375732421875, + "learning_rate": 4.375e-07, + "loss": 0.0031, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7008.0, + "completions/max_terminated_length": 7008.0, + "completions/mean_length": 4088.546875, + "completions/mean_terminated_length": 4088.546875, + "completions/min_length": 1705.0, + "completions/min_terminated_length": 1705.0, + "epoch": 0.00021802325581395349, + "grad_norm": 0.01625597308376849, + "kl": 0.0009918212890625, + "learning_rate": 5e-07, + "loss": 0.0013, + "num_tokens": 1783761.0, + "reward": 0.3732198178768158, + "reward_std": 0.2907864451408386, + "rewards/avg_thinking_length_func": 177.95510864257812, + "rewards/correct_answer_reward_func": 0.390625, + "rewards/efficient_thinking_reward_func": 0.8993925619789238, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.6866124868392944, + "rewards/tool_execution_reward_func": 1.950781226158142, + "rewards/visit_tool_reward_func": 0.8574961423873901, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00024224806201550387, + "grad_norm": 0.016618535814852814, + "kl": 0.0009899139404296875, + "learning_rate": 5.625e-07, + "loss": 0.0013, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00026647286821705426, + "grad_norm": 0.016248156263205492, + "kl": 0.0009660720825195312, + "learning_rate": 6.249999999999999e-07, + "loss": 0.0013, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00029069767441860465, + "grad_norm": 0.016111032400620007, + "kl": 0.0009870529174804688, + "learning_rate": 6.875e-07, + "loss": 0.0013, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 6572.0, + "completions/max_terminated_length": 6572.0, + "completions/mean_length": 4119.703125, + "completions/mean_terminated_length": 4119.703125, + "completions/min_length": 1356.0, + "completions/min_terminated_length": 1356.0, + "epoch": 0.00031492248062015503, + "grad_norm": 0.019643777904198217, + "kl": 0.0009822845458984375, + "learning_rate": 7.5e-07, + "loss": -0.0008, + "num_tokens": 2367034.0, + "reward": 0.6774564981460571, + "reward_std": 0.3563808798789978, + "rewards/avg_thinking_length_func": 176.69476318359375, + "rewards/correct_answer_reward_func": 0.625, + "rewards/efficient_thinking_reward_func": 0.8704519537344548, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.6578426361083984, + "rewards/tool_execution_reward_func": 2.0, + "rewards/visit_tool_reward_func": 0.9361900091171265, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0003391472868217054, + "grad_norm": 0.0194815826710202, + "kl": 0.0010242462158203125, + "learning_rate": 8.125e-07, + "loss": -0.0008, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0003633720930232558, + "grad_norm": 0.019402854833833996, + "kl": 0.0010585784912109375, + "learning_rate": 8.75e-07, + "loss": -0.0008, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0003875968992248062, + "grad_norm": 0.019438299719581362, + "kl": 0.0011272430419921875, + "learning_rate": 9.374999999999999e-07, + "loss": -0.0008, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7597.0, + "completions/max_terminated_length": 7597.0, + "completions/mean_length": 4205.671875, + "completions/mean_terminated_length": 4205.671875, + "completions/min_length": 1507.0, + "completions/min_terminated_length": 1507.0, + "epoch": 0.0004118217054263566, + "grad_norm": 0.014823687168402296, + "kl": 0.0011005401611328125, + "learning_rate": 1e-06, + "loss": 0.0009, + "num_tokens": 2985545.0, + "reward": 0.3260263204574585, + "reward_std": 0.2300996333360672, + "rewards/avg_thinking_length_func": 177.14329528808594, + "rewards/correct_answer_reward_func": 0.375, + "rewards/efficient_thinking_reward_func": 0.8988714947132084, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.8095711469650269, + "rewards/tool_execution_reward_func": 1.99609375, + "rewards/visit_tool_reward_func": 0.852025032043457, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00043604651162790697, + "grad_norm": 0.014727006858324664, + "kl": 0.0011577606201171875, + "learning_rate": 1.0625e-06, + "loss": 0.0009, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00046027131782945736, + "grad_norm": 0.014837711956269274, + "kl": 0.0012722015380859375, + "learning_rate": 1.125e-06, + "loss": 0.0009, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.00048449612403100775, + "grad_norm": 0.014894430575329584, + "kl": 0.00146484375, + "learning_rate": 1.1874999999999999e-06, + "loss": 0.0009, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7476.0, + "completions/max_terminated_length": 7476.0, + "completions/mean_length": 4097.921875, + "completions/mean_terminated_length": 4097.921875, + "completions/min_length": 1514.0, + "completions/min_terminated_length": 1514.0, + "epoch": 0.0005087209302325581, + "grad_norm": 0.0189498267274778, + "kl": 0.0019931793212890625, + "learning_rate": 1.2499999999999999e-06, + "loss": 0.0003, + "num_tokens": 3561495.0, + "reward": 0.5717383623123169, + "reward_std": 0.33007949590682983, + "rewards/avg_thinking_length_func": 177.5142822265625, + "rewards/correct_answer_reward_func": 0.53125, + "rewards/efficient_thinking_reward_func": 0.8662384906971484, + "rewards/format_reward_func": 0.9937499761581421, + "rewards/num_xml_reward_func": 1.779766321182251, + "rewards/tool_execution_reward_func": 1.979819416999817, + "rewards/visit_tool_reward_func": 0.9004297256469727, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0005329457364341085, + "grad_norm": 0.019010527717988047, + "kl": 0.00229644775390625, + "learning_rate": 1.3125e-06, + "loss": 0.0003, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0005571705426356589, + "grad_norm": 0.01910688815244073, + "kl": 0.00276947021484375, + "learning_rate": 1.375e-06, + "loss": 0.0003, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0005813953488372093, + "grad_norm": 0.019047374161024387, + "kl": 0.00327301025390625, + "learning_rate": 1.4375e-06, + "loss": 0.0003, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7779.0, + "completions/max_terminated_length": 7779.0, + "completions/mean_length": 4011.9375, + "completions/mean_terminated_length": 4011.9375, + "completions/min_length": 1884.0, + "completions/min_terminated_length": 1884.0, + "epoch": 0.0006056201550387597, + "grad_norm": 0.01969391991938911, + "kl": 0.0029449462890625, + "learning_rate": 1.5e-06, + "loss": 0.0003, + "num_tokens": 4148002.0, + "reward": 0.4466557502746582, + "reward_std": 0.2478387951850891, + "rewards/avg_thinking_length_func": 174.6974639892578, + "rewards/correct_answer_reward_func": 0.40625, + "rewards/efficient_thinking_reward_func": 0.9054659197504085, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.806973934173584, + "rewards/tool_execution_reward_func": 1.9922122955322266, + "rewards/visit_tool_reward_func": 0.871803879737854, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0006298449612403101, + "grad_norm": 0.01979038843755439, + "kl": 0.003414154052734375, + "learning_rate": 1.5624999999999999e-06, + "loss": 0.0003, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0006540697674418605, + "grad_norm": 0.019676702255338734, + "kl": 0.004245758056640625, + "learning_rate": 1.625e-06, + "loss": 0.0003, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0006782945736434108, + "grad_norm": 0.0198896583655868, + "kl": 0.00508880615234375, + "learning_rate": 1.6875e-06, + "loss": 0.0003, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7881.0, + "completions/max_terminated_length": 7881.0, + "completions/mean_length": 4278.0, + "completions/mean_terminated_length": 4278.0, + "completions/min_length": 1269.0, + "completions/min_terminated_length": 1269.0, + "epoch": 0.0007025193798449612, + "grad_norm": 0.02473412222614823, + "kl": 0.00722503662109375, + "learning_rate": 1.75e-06, + "loss": 0.0005, + "num_tokens": 4732732.0, + "reward": 0.639769971370697, + "reward_std": 0.3489268720149994, + "rewards/avg_thinking_length_func": 183.79090881347656, + "rewards/correct_answer_reward_func": 0.640625, + "rewards/efficient_thinking_reward_func": 0.8433743364598003, + "rewards/format_reward_func": 0.9991071224212646, + "rewards/num_xml_reward_func": 1.686936616897583, + "rewards/tool_execution_reward_func": 1.9818710088729858, + "rewards/visit_tool_reward_func": 0.923589289188385, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0007267441860465116, + "grad_norm": 0.024757116664213524, + "kl": 0.0076904296875, + "learning_rate": 1.8125e-06, + "loss": 0.0005, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.000750968992248062, + "grad_norm": 0.02444644630643307, + "kl": 0.0073394775390625, + "learning_rate": 1.8749999999999998e-06, + "loss": 0.0005, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0007751937984496124, + "grad_norm": 0.024210451469423133, + "kl": 0.007171630859375, + "learning_rate": 1.9375e-06, + "loss": 0.0005, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7912.0, + "completions/max_terminated_length": 7912.0, + "completions/mean_length": 4317.890625, + "completions/mean_terminated_length": 4317.890625, + "completions/min_length": 1736.0, + "completions/min_terminated_length": 1736.0, + "epoch": 0.0007994186046511628, + "grad_norm": 0.020658762871057952, + "kl": 0.007049560546875, + "learning_rate": 2e-06, + "loss": -0.0, + "num_tokens": 5347783.0, + "reward": 0.33683592081069946, + "reward_std": 0.32624948024749756, + "rewards/avg_thinking_length_func": 177.01129150390625, + "rewards/correct_answer_reward_func": 0.375, + "rewards/efficient_thinking_reward_func": 0.8817601664392056, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.5408036708831787, + "rewards/tool_execution_reward_func": 1.9917367696762085, + "rewards/visit_tool_reward_func": 0.9276807308197021, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0008236434108527132, + "grad_norm": 0.02072632567074888, + "kl": 0.0077972412109375, + "learning_rate": 2e-06, + "loss": -0.0, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0008478682170542636, + "grad_norm": 0.020770020029080613, + "kl": 0.0087432861328125, + "learning_rate": 2e-06, + "loss": -0.0, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0008720930232558139, + "grad_norm": 0.020487067102301602, + "kl": 0.0097198486328125, + "learning_rate": 2e-06, + "loss": -0.0, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7378.0, + "completions/max_terminated_length": 7378.0, + "completions/mean_length": 4152.5, + "completions/mean_terminated_length": 4152.5, + "completions/min_length": 1423.0, + "completions/min_terminated_length": 1423.0, + "epoch": 0.0008963178294573643, + "grad_norm": 0.022364107178309313, + "kl": 0.0112152099609375, + "learning_rate": 2e-06, + "loss": -0.0001, + "num_tokens": 5921090.0, + "reward": 0.6556656360626221, + "reward_std": 0.5008378028869629, + "rewards/avg_thinking_length_func": 170.4791259765625, + "rewards/correct_answer_reward_func": 0.625, + "rewards/efficient_thinking_reward_func": 0.8892575272805912, + "rewards/format_reward_func": 0.987500011920929, + "rewards/num_xml_reward_func": 1.5408031940460205, + "rewards/tool_execution_reward_func": 1.96875, + "rewards/visit_tool_reward_func": 0.9249746799468994, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0009205426356589147, + "grad_norm": 0.022597206540891295, + "kl": 0.0123443603515625, + "learning_rate": 2e-06, + "loss": -0.0001, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0009447674418604651, + "grad_norm": 0.02246679376217943, + "kl": 0.013580322265625, + "learning_rate": 2e-06, + "loss": -0.0001, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0009689922480620155, + "grad_norm": 0.022296105800735398, + "kl": 0.015106201171875, + "learning_rate": 2e-06, + "loss": -0.0001, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7494.0, + "completions/max_terminated_length": 7494.0, + "completions/mean_length": 4562.296875, + "completions/mean_terminated_length": 4562.296875, + "completions/min_length": 2143.0, + "completions/min_terminated_length": 2143.0, + "epoch": 0.0009932170542635659, + "grad_norm": 0.021503135345542313, + "kl": 0.015594482421875, + "learning_rate": 2e-06, + "loss": 0.0007, + "num_tokens": 6556719.0, + "reward": 0.47225743532180786, + "reward_std": 0.3904932141304016, + "rewards/avg_thinking_length_func": 169.57839965820312, + "rewards/correct_answer_reward_func": 0.4375, + "rewards/efficient_thinking_reward_func": 0.917264621947748, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.817958116531372, + "rewards/tool_execution_reward_func": 1.9884111881256104, + "rewards/visit_tool_reward_func": 0.9651369452476501, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0010174418604651163, + "grad_norm": 0.02149252867250571, + "kl": 0.01715087890625, + "learning_rate": 2e-06, + "loss": 0.0007, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0010416666666666667, + "grad_norm": 0.02173596902997293, + "kl": 0.018798828125, + "learning_rate": 2e-06, + "loss": 0.0007, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.001065891472868217, + "grad_norm": 0.02188237517399594, + "kl": 0.020751953125, + "learning_rate": 2e-06, + "loss": 0.0007, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9017.0, + "completions/max_terminated_length": 9017.0, + "completions/mean_length": 4664.796875, + "completions/mean_terminated_length": 4664.796875, + "completions/min_length": 1910.0, + "completions/min_terminated_length": 1910.0, + "epoch": 0.0010901162790697674, + "grad_norm": 0.02354857583102173, + "kl": 0.020477294921875, + "learning_rate": 2e-06, + "loss": -0.0014, + "num_tokens": 7181732.0, + "reward": 0.7991669178009033, + "reward_std": 0.36247026920318604, + "rewards/avg_thinking_length_func": 171.8461151123047, + "rewards/correct_answer_reward_func": 0.703125, + "rewards/efficient_thinking_reward_func": 0.8915984372821139, + "rewards/format_reward_func": 0.9998437166213989, + "rewards/num_xml_reward_func": 1.8501074314117432, + "rewards/tool_execution_reward_func": 1.9971354007720947, + "rewards/visit_tool_reward_func": 1.071668028831482, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0011143410852713178, + "grad_norm": 0.023994471938115103, + "kl": 0.0224609375, + "learning_rate": 2e-06, + "loss": -0.0014, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0011385658914728682, + "grad_norm": 0.026516939220345738, + "kl": 0.02508544921875, + "learning_rate": 2e-06, + "loss": -0.0014, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0011627906976744186, + "grad_norm": 0.024485287814160223, + "kl": 0.0262451171875, + "learning_rate": 2e-06, + "loss": -0.0014, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 8522.0, + "completions/max_terminated_length": 8522.0, + "completions/mean_length": 4866.125, + "completions/mean_terminated_length": 4866.125, + "completions/min_length": 1959.0, + "completions/min_terminated_length": 1959.0, + "epoch": 0.001187015503875969, + "grad_norm": 0.02407332594201, + "kl": 0.032012939453125, + "learning_rate": 2e-06, + "loss": 0.0014, + "num_tokens": 7868034.0, + "reward": 0.39128515124320984, + "reward_std": 0.3533371090888977, + "rewards/avg_thinking_length_func": 164.74734497070312, + "rewards/correct_answer_reward_func": 0.359375, + "rewards/efficient_thinking_reward_func": 0.9209367558816545, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.6406757831573486, + "rewards/tool_execution_reward_func": 1.98927903175354, + "rewards/visit_tool_reward_func": 1.0120830535888672, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0012112403100775194, + "grad_norm": 0.02479690454991753, + "kl": 0.035888671875, + "learning_rate": 2e-06, + "loss": 0.0014, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0012354651162790698, + "grad_norm": 0.027012142633289393, + "kl": 0.04046630859375, + "learning_rate": 2e-06, + "loss": 0.0014, + "step": 51 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0012596899224806201, + "grad_norm": 0.026499465739179152, + "kl": 0.04803466796875, + "learning_rate": 2e-06, + "loss": 0.0014, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7622.0, + "completions/max_terminated_length": 7622.0, + "completions/mean_length": 4509.75, + "completions/mean_terminated_length": 4509.75, + "completions/min_length": 1816.0, + "completions/min_terminated_length": 1816.0, + "epoch": 0.0012839147286821705, + "grad_norm": 0.019741394516818018, + "kl": 0.04510498046875, + "learning_rate": 2e-06, + "loss": 0.0, + "num_tokens": 8481102.0, + "reward": 0.7655854225158691, + "reward_std": 0.27847254276275635, + "rewards/avg_thinking_length_func": 158.9434051513672, + "rewards/correct_answer_reward_func": 0.671875, + "rewards/efficient_thinking_reward_func": 0.884494477975468, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.8834664821624756, + "rewards/tool_execution_reward_func": 2.0, + "rewards/visit_tool_reward_func": 1.1049017906188965, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.001308139534883721, + "grad_norm": 0.028517188784132036, + "kl": 0.06060791015625, + "learning_rate": 2e-06, + "loss": 0.0001, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0013323643410852713, + "grad_norm": 0.02643367822401968, + "kl": 0.06280517578125, + "learning_rate": 2e-06, + "loss": 0.0001, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0013565891472868217, + "grad_norm": 0.020594752118506976, + "kl": 0.056884765625, + "learning_rate": 2e-06, + "loss": 0.0001, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 7375.0, + "completions/max_terminated_length": 7375.0, + "completions/mean_length": 4285.046875, + "completions/mean_terminated_length": 4285.046875, + "completions/min_length": 2418.0, + "completions/min_terminated_length": 2418.0, + "epoch": 0.001380813953488372, + "grad_norm": 0.019100627823517295, + "kl": 0.06195068359375, + "learning_rate": 2e-06, + "loss": 0.0005, + "num_tokens": 9112297.0, + "reward": 0.5274717807769775, + "reward_std": 0.2380232810974121, + "rewards/avg_thinking_length_func": 145.75924682617188, + "rewards/correct_answer_reward_func": 0.453125, + "rewards/efficient_thinking_reward_func": 0.9274070198828231, + "rewards/format_reward_func": 1.0, + "rewards/num_xml_reward_func": 1.7929463386535645, + "rewards/tool_execution_reward_func": 1.9959805011749268, + "rewards/visit_tool_reward_func": 1.0335674285888672, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0014050387596899225, + "grad_norm": 0.019834849658967178, + "kl": 0.06695556640625, + "learning_rate": 2e-06, + "loss": 0.0005, + "step": 58 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0014292635658914728, + "grad_norm": 0.020359737753586633, + "kl": 0.0740966796875, + "learning_rate": 2e-06, + "loss": 0.0005, + "step": 59 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0014534883720930232, + "grad_norm": 0.020904893352951728, + "kl": 0.085693359375, + "learning_rate": 2e-06, + "loss": 0.0005, + "step": 60 + } + ], + "logging_steps": 1, + "max_steps": 640, + "num_input_tokens_seen": 9112297, + "num_train_epochs": 1, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..3c8412c --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae74d09b5b242d5ca59c2266e1297852a0f23aabeea82e2a73b716a08ef1d73 +size 8465 diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833