Update GLM-4.5 Model Doc (#11017)
This commit is contained in:
@@ -39,7 +39,7 @@ def parse_arguments(json_value):
|
|||||||
|
|
||||||
class Glm4MoeDetector(BaseFormatDetector):
|
class Glm4MoeDetector(BaseFormatDetector):
|
||||||
"""
|
"""
|
||||||
Detector for GLM-4.5 models.
|
Detector for GLM-4.5 and GLM-4.6 models.
|
||||||
Assumes function call format:
|
Assumes function call format:
|
||||||
<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
|
<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
|
||||||
"""
|
"""
|
||||||
@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector):
|
|||||||
self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
|
self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
|
||||||
|
|
||||||
def has_tool_call(self, text: str) -> bool:
|
def has_tool_call(self, text: str) -> bool:
|
||||||
"""Check if the text contains a glm-4.5 format tool call."""
|
"""Check if the text contains a glm-4.5 / glm-4.6 format tool call."""
|
||||||
return self.bot_token in text
|
return self.bot_token in text
|
||||||
|
|
||||||
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
||||||
@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector):
|
|||||||
self, new_text: str, tools: List[Tool]
|
self, new_text: str, tools: List[Tool]
|
||||||
) -> StreamingParseResult:
|
) -> StreamingParseResult:
|
||||||
"""
|
"""
|
||||||
Streaming incremental parsing tool calls for GLM-4.5 format.
|
Streaming incremental parsing tool calls for GLM-4.5 and GLM-4.6 format.
|
||||||
"""
|
"""
|
||||||
self._buffer += new_text
|
self._buffer += new_text
|
||||||
current_text = self._buffer
|
current_text = self._buffer
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
"""Inference-only GLM-4.5 model compatible with HuggingFace weights"""
|
"""Inference-only GLM-4.5, GLM-4.6 model compatible with HuggingFace weights"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Iterable, Optional, Tuple
|
from typing import Any, Dict, Iterable, Optional, Tuple
|
||||||
@@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
|
|||||||
or self.config.architectures[0] != architecture
|
or self.config.architectures[0] != architecture
|
||||||
or self.config.n_shared_experts != 1
|
or self.config.n_shared_experts != 1
|
||||||
):
|
):
|
||||||
disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization."
|
disable_reason = "Only GLM-4.5 or GLM-4.6 on NV-platform with capability >= 80 can use shared experts fusion optimization."
|
||||||
elif get_moe_expert_parallel_world_size() > 1:
|
elif get_moe_expert_parallel_world_size() > 1:
|
||||||
disable_reason = "Deepseek and GLM-4.5 can not use shared experts fusion optimization under expert parallelism."
|
disable_reason = "Deepseek and GLM-4.5 or GLM-4.6 can not use shared experts fusion optimization under expert parallelism."
|
||||||
|
|
||||||
if disable_reason is not None:
|
if disable_reason is not None:
|
||||||
global_server_args_dict["disable_shared_experts_fusion"] = True
|
global_server_args_dict["disable_shared_experts_fusion"] = True
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
"""Inference-only GLM-4.5 NextN Speculative Decoding."""
|
"""Inference-only GLM-4.5, GLM-4.6 NextN Speculative Decoding."""
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterable, Optional, Tuple
|
from typing import Iterable, Optional, Tuple
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
if quant_config is not None and quant_config.get_name() == "modelopt_fp4":
|
if quant_config is not None and quant_config.get_name() == "modelopt_fp4":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 model."
|
"Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 / GLM-4.6 model."
|
||||||
)
|
)
|
||||||
quant_config = None
|
quant_config = None
|
||||||
|
|
||||||
|
|||||||
@@ -325,7 +325,7 @@ classDiagram
|
|||||||
- `qwen3`: Qwen3 base model (initial_in_reasoning=false)
|
- `qwen3`: Qwen3 base model (initial_in_reasoning=false)
|
||||||
- `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true)
|
- `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true)
|
||||||
- `kimi`: Kimi with Unicode tokens
|
- `kimi`: Kimi with Unicode tokens
|
||||||
- `glm45`: GLM-4.5 parser
|
- `glm45`: GLM-4.5 / GLM-4.6 parser
|
||||||
- `step3`: Step3 parser
|
- `step3`: Step3 parser
|
||||||
- `passthrough`: No-op fallback parser
|
- `passthrough`: No-op fallback parser
|
||||||
|
|
||||||
|
|||||||
@@ -180,10 +180,9 @@ impl ParserRegistry {
|
|||||||
self.map_model("deepseek-*", "pythonic");
|
self.map_model("deepseek-*", "pythonic");
|
||||||
|
|
||||||
// GLM models
|
// GLM models
|
||||||
// GLM-4 MoE uses XML-style format
|
// GLM-4.5 and GLM-4.6 uses XML-style format
|
||||||
self.map_model("glm-4-moe*", "glm4_moe");
|
|
||||||
self.map_model("THUDM/glm-4-moe*", "glm4_moe");
|
|
||||||
self.map_model("glm-4.5*", "glm4_moe");
|
self.map_model("glm-4.5*", "glm4_moe");
|
||||||
|
self.map_model("glm-4.6*", "glm4_moe");
|
||||||
// Other GLM models may use JSON
|
// Other GLM models may use JSON
|
||||||
self.map_model("glm-*", "json");
|
self.map_model("glm-*", "json");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user