Iluvatar-mrv100 SDK 4.3.0

2025-09-15 14:58:11 +08:00
parent 9efe891f99
commit 8af8290b1d
1052 changed files with 294967 additions and 1 deletions
--- a/vllm/model_executor/models/glm.py
+++ b/vllm/model_executor/models/glm.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Inference-only HF format GLM-4 model compatible with THUDM weights."""
+from vllm.config import VllmConfig
+from vllm.model_executor.models.llama import LlamaForCausalLM
+
+from .interfaces import SupportsV0Only
+from .utils import PPMissingLayer
+
+
+class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only):
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__(vllm_config=vllm_config, prefix=prefix)
+        # Hack Llama model to fit HF format GLM implementation
+        # Attention difference between GLM and Llama:
+        # 1. Half partial rotary_dim and no Neox style.
+        # 2. There is no bias for o_proj in attention
+        for layer in self.model.layers:
+            if not isinstance(layer, PPMissingLayer):
+                layer.self_attn.rotary_emb.rotary_dim //= 2
+                layer.self_attn.rotary_emb.is_neox_style = False
+                layer.self_attn.o_proj.bias = None
+                layer.self_attn.o_proj.skip_bias_add = True