Add GLM-4 TextGeneration Model support for SGLang (#1736)
This commit is contained in:
@@ -303,6 +303,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/
|
|||||||
- MiniCPM / MiniCPM 3
|
- MiniCPM / MiniCPM 3
|
||||||
- XVERSE / XVERSE MoE
|
- XVERSE / XVERSE MoE
|
||||||
- SmolLM
|
- SmolLM
|
||||||
|
- GLM-4
|
||||||
|
|
||||||
**Embedding Models**
|
**Embedding Models**
|
||||||
|
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ class GLMTransformer(nn.Module):
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
class ChatGLMModel(nn.Module):
|
class ChatGLMM(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config,
|
config,
|
||||||
@@ -366,7 +366,7 @@ class ChatGLMForCausalLM(nn.Module):
|
|||||||
self.config: ChatGLMConfig = config
|
self.config: ChatGLMConfig = config
|
||||||
self.quant_config = quant_config
|
self.quant_config = quant_config
|
||||||
self.max_position_embeddings = getattr(config, "max_sequence_length", 8192)
|
self.max_position_embeddings = getattr(config, "max_sequence_length", 8192)
|
||||||
self.transformer = ChatGLMModel(config, cache_config, quant_config)
|
self.transformer = ChatGLMM(config, cache_config, quant_config)
|
||||||
self.lm_head = self.transformer.output_layer
|
self.lm_head = self.transformer.output_layer
|
||||||
self.logits_processor = LogitsProcessor(config)
|
self.logits_processor = LogitsProcessor(config)
|
||||||
|
|
||||||
@@ -401,4 +401,4 @@ class ChatGLMModel(ChatGLMForCausalLM):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
EntryClass = [ChatGLMForCausalLM, ChatGLMModel]
|
EntryClass = [ChatGLMModel]
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ ALL_OTHER_MODELS = [
|
|||||||
ModelCase("Qwen/Qwen2.5-14B-Instruct"),
|
ModelCase("Qwen/Qwen2.5-14B-Instruct"),
|
||||||
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
|
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
|
||||||
ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True),
|
ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True),
|
||||||
|
ModelCase("THUDM/glm-4-9b-chat"),
|
||||||
]
|
]
|
||||||
|
|
||||||
TORCH_DTYPES = [torch.float16]
|
TORCH_DTYPES = [torch.float16]
|
||||||
|
|||||||
Reference in New Issue
Block a user