From abb6781573a86c7e7b22e41fd2924094a7d4a135 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Mon, 29 Sep 2025 02:21:27 +0800 Subject: [PATCH] Update GLM-4.5 Model Doc (#11017) --- python/sglang/srt/function_call/glm4_moe_detector.py | 6 +++--- python/sglang/srt/models/glm4_moe.py | 6 +++--- python/sglang/srt/models/glm4_moe_nextn.py | 4 ++-- sgl-router/src/reasoning_parser/README.md | 2 +- sgl-router/src/tool_parser/registry.rs | 5 ++--- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/python/sglang/srt/function_call/glm4_moe_detector.py b/python/sglang/srt/function_call/glm4_moe_detector.py index 6e89fe0a1..845b5d41f 100644 --- a/python/sglang/srt/function_call/glm4_moe_detector.py +++ b/python/sglang/srt/function_call/glm4_moe_detector.py @@ -39,7 +39,7 @@ def parse_arguments(json_value): class Glm4MoeDetector(BaseFormatDetector): """ - Detector for GLM-4.5 models. + Detector for GLM-4.5 and GLM-4.6 models. Assumes function call format: get_weather\ncity\n北京\ndate\n2024-06-27\n\nget_weather\ncity\n上海\ndate\n2024-06-27\n """ @@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector): self.func_arg_regex = r"(.*?)\s*(.*?)" def has_tool_call(self, text: str) -> bool: - """Check if the text contains a glm-4.5 format tool call.""" + """Check if the text contains a glm-4.5 / glm-4.6 format tool call.""" return self.bot_token in text def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult: @@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector): self, new_text: str, tools: List[Tool] ) -> StreamingParseResult: """ - Streaming incremental parsing tool calls for GLM-4.5 format. + Streaming incremental parsing tool calls for GLM-4.5 and GLM-4.6 format. """ self._buffer += new_text current_text = self._buffer diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index 867ffe91b..d4cc9e1e6 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -12,7 +12,7 @@ # limitations under the License. # ============================================================================== -"""Inference-only GLM-4.5 model compatible with HuggingFace weights""" +"""Inference-only GLM-4.5, GLM-4.6 model compatible with HuggingFace weights""" import logging from typing import Any, Dict, Iterable, Optional, Tuple @@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): or self.config.architectures[0] != architecture or self.config.n_shared_experts != 1 ): - disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization." + disable_reason = "Only GLM-4.5 or GLM-4.6 on NV-platform with capability >= 80 can use shared experts fusion optimization." elif get_moe_expert_parallel_world_size() > 1: - disable_reason = "Deepseek and GLM-4.5 can not use shared experts fusion optimization under expert parallelism." + disable_reason = "Deepseek and GLM-4.5 or GLM-4.6 can not use shared experts fusion optimization under expert parallelism." if disable_reason is not None: global_server_args_dict["disable_shared_experts_fusion"] = True diff --git a/python/sglang/srt/models/glm4_moe_nextn.py b/python/sglang/srt/models/glm4_moe_nextn.py index 399f0f4e0..4816f5775 100644 --- a/python/sglang/srt/models/glm4_moe_nextn.py +++ b/python/sglang/srt/models/glm4_moe_nextn.py @@ -12,7 +12,7 @@ # limitations under the License. # ============================================================================== -"""Inference-only GLM-4.5 NextN Speculative Decoding.""" +"""Inference-only GLM-4.5, GLM-4.6 NextN Speculative Decoding.""" import logging from typing import Iterable, Optional, Tuple @@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module): super().__init__() if quant_config is not None and quant_config.get_name() == "modelopt_fp4": logger.warning( - "Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 model." + "Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 / GLM-4.6 model." ) quant_config = None diff --git a/sgl-router/src/reasoning_parser/README.md b/sgl-router/src/reasoning_parser/README.md index 92a6ffce7..763028f0c 100644 --- a/sgl-router/src/reasoning_parser/README.md +++ b/sgl-router/src/reasoning_parser/README.md @@ -325,7 +325,7 @@ classDiagram - `qwen3`: Qwen3 base model (initial_in_reasoning=false) - `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true) - `kimi`: Kimi with Unicode tokens -- `glm45`: GLM-4.5 parser +- `glm45`: GLM-4.5 / GLM-4.6 parser - `step3`: Step3 parser - `passthrough`: No-op fallback parser diff --git a/sgl-router/src/tool_parser/registry.rs b/sgl-router/src/tool_parser/registry.rs index f694d680c..a21640443 100644 --- a/sgl-router/src/tool_parser/registry.rs +++ b/sgl-router/src/tool_parser/registry.rs @@ -180,10 +180,9 @@ impl ParserRegistry { self.map_model("deepseek-*", "pythonic"); // GLM models - // GLM-4 MoE uses XML-style format - self.map_model("glm-4-moe*", "glm4_moe"); - self.map_model("THUDM/glm-4-moe*", "glm4_moe"); + // GLM-4.5 and GLM-4.6 uses XML-style format self.map_model("glm-4.5*", "glm4_moe"); + self.map_model("glm-4.6*", "glm4_moe"); // Other GLM models may use JSON self.map_model("glm-*", "json");