From abb6781573a86c7e7b22e41fd2924094a7d4a135 Mon Sep 17 00:00:00 2001
From: Yuxuan Zhang <2448370773@qq.com>
Date: Mon, 29 Sep 2025 02:21:27 +0800
Subject: [PATCH] Update GLM-4.5 Model Doc (#11017)

---
 python/sglang/srt/function_call/glm4_moe_detector.py | 6 +++---
 python/sglang/srt/models/glm4_moe.py                 | 6 +++---
 python/sglang/srt/models/glm4_moe_nextn.py           | 4 ++--
 sgl-router/src/reasoning_parser/README.md            | 2 +-
 sgl-router/src/tool_parser/registry.rs               | 5 ++---
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/python/sglang/srt/function_call/glm4_moe_detector.py b/python/sglang/srt/function_call/glm4_moe_detector.py
index 6e89fe0a1..845b5d41f 100644
--- a/python/sglang/srt/function_call/glm4_moe_detector.py
+++ b/python/sglang/srt/function_call/glm4_moe_detector.py
@@ -39,7 +39,7 @@ def parse_arguments(json_value):
 
 class Glm4MoeDetector(BaseFormatDetector):
     """
-    Detector for GLM-4.5 models.
+    Detector for GLM-4.5 and GLM-4.6 models.
     Assumes function call format:
       <tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
     """
@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector):
         self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
 
     def has_tool_call(self, text: str) -> bool:
-        """Check if the text contains a glm-4.5 format tool call."""
+        """Check if the text contains a glm-4.5 / glm-4.6 format tool call."""
         return self.bot_token in text
 
     def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector):
         self, new_text: str, tools: List[Tool]
     ) -> StreamingParseResult:
         """
-        Streaming incremental parsing tool calls for GLM-4.5 format.
+        Streaming incremental parsing tool calls for GLM-4.5 and GLM-4.6 format.
         """
         self._buffer += new_text
         current_text = self._buffer
diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py
index 867ffe91b..d4cc9e1e6 100644
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Inference-only GLM-4.5 model compatible with HuggingFace weights"""
+"""Inference-only GLM-4.5, GLM-4.6 model compatible with HuggingFace weights"""
 
 import logging
 from typing import Any, Dict, Iterable, Optional, Tuple
@@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
             or self.config.architectures[0] != architecture
             or self.config.n_shared_experts != 1
         ):
-            disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization."
+            disable_reason = "Only GLM-4.5 or GLM-4.6 on NV-platform with capability >= 80 can use shared experts fusion optimization."
         elif get_moe_expert_parallel_world_size() > 1:
-            disable_reason = "Deepseek and GLM-4.5 can not use shared experts fusion optimization under expert parallelism."
+            disable_reason = "Deepseek and GLM-4.5 or GLM-4.6 can not use shared experts fusion optimization under expert parallelism."
 
         if disable_reason is not None:
             global_server_args_dict["disable_shared_experts_fusion"] = True
diff --git a/python/sglang/srt/models/glm4_moe_nextn.py b/python/sglang/srt/models/glm4_moe_nextn.py
index 399f0f4e0..4816f5775 100644
--- a/python/sglang/srt/models/glm4_moe_nextn.py
+++ b/python/sglang/srt/models/glm4_moe_nextn.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Inference-only GLM-4.5 NextN Speculative Decoding."""
+"""Inference-only GLM-4.5, GLM-4.6 NextN Speculative Decoding."""
 import logging
 from typing import Iterable, Optional, Tuple
 
@@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module):
         super().__init__()
         if quant_config is not None and quant_config.get_name() == "modelopt_fp4":
             logger.warning(
-                "Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 model."
+                "Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 / GLM-4.6 model."
             )
             quant_config = None
 
diff --git a/sgl-router/src/reasoning_parser/README.md b/sgl-router/src/reasoning_parser/README.md
index 92a6ffce7..763028f0c 100644
--- a/sgl-router/src/reasoning_parser/README.md
+++ b/sgl-router/src/reasoning_parser/README.md
@@ -325,7 +325,7 @@ classDiagram
 - `qwen3`: Qwen3 base model (initial_in_reasoning=false)
 - `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true)
 - `kimi`: Kimi with Unicode tokens
-- `glm45`: GLM-4.5 parser
+- `glm45`: GLM-4.5 / GLM-4.6 parser
 - `step3`: Step3 parser
 - `passthrough`: No-op fallback parser
 
diff --git a/sgl-router/src/tool_parser/registry.rs b/sgl-router/src/tool_parser/registry.rs
index f694d680c..a21640443 100644
--- a/sgl-router/src/tool_parser/registry.rs
+++ b/sgl-router/src/tool_parser/registry.rs
@@ -180,10 +180,9 @@ impl ParserRegistry {
         self.map_model("deepseek-*", "pythonic");
 
         // GLM models
-        // GLM-4 MoE uses XML-style format
-        self.map_model("glm-4-moe*", "glm4_moe");
-        self.map_model("THUDM/glm-4-moe*", "glm4_moe");
+        // GLM-4.5 and GLM-4.6 uses XML-style format
         self.map_model("glm-4.5*", "glm4_moe");
+        self.map_model("glm-4.6*", "glm4_moe");
         // Other GLM models may use JSON
         self.map_model("glm-*", "json");