diff --git a/README.md b/README.md index de2b05fcc..b41389671 100644 --- a/README.md +++ b/README.md @@ -303,6 +303,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/ - MiniCPM / MiniCPM 3 - XVERSE / XVERSE MoE - SmolLM +- GLM-4 **Embedding Models** diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index e9110d067..c564b6cf5 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -303,7 +303,7 @@ class GLMTransformer(nn.Module): return hidden_states -class ChatGLMModel(nn.Module): +class ChatGLMM(nn.Module): def __init__( self, config, @@ -366,7 +366,7 @@ class ChatGLMForCausalLM(nn.Module): self.config: ChatGLMConfig = config self.quant_config = quant_config self.max_position_embeddings = getattr(config, "max_sequence_length", 8192) - self.transformer = ChatGLMModel(config, cache_config, quant_config) + self.transformer = ChatGLMM(config, cache_config, quant_config) self.lm_head = self.transformer.output_layer self.logits_processor = LogitsProcessor(config) @@ -401,4 +401,4 @@ class ChatGLMModel(ChatGLMForCausalLM): pass -EntryClass = [ChatGLMForCausalLM, ChatGLMModel] +EntryClass = [ChatGLMModel] diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index 4d05eab8d..ba4c05ee4 100755 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -57,6 +57,7 @@ ALL_OTHER_MODELS = [ ModelCase("Qwen/Qwen2.5-14B-Instruct"), ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True), ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True), + ModelCase("THUDM/glm-4-9b-chat"), ] TORCH_DTYPES = [torch.float16]