From 400d3b97aebc5f923fc9607ebea049cd58a812bd Mon Sep 17 00:00:00 2001
From: kk <43161300+kkHuang-amd@users.noreply.github.com>
Date: Mon, 8 Sep 2025 11:45:17 +0800
Subject: [PATCH] Fix run time error in dsv3-fp8 model on mi35x (#10104)

Co-authored-by: wunhuang <wunhuang@amd.com>
Co-authored-by: HaiShaw <hixiao@gmail.com>
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
---
 python/sglang/srt/models/deepseek_v2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
index 05b5490f8..252d08d8b 100644
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -249,7 +249,11 @@ class DeepseekV2MLP(nn.Module):
         if (self.tp_size == 1) and x.shape[0] == 0:
             return x
 
-        if gemm_output_zero_allocator != None and x.shape[0] <= 256:
+        if (
+            gemm_output_zero_allocator is not None
+            and x.shape[0] <= 256
+            and self.gate_up_proj.weight.dtype == torch.uint8
+        ):
             y = gemm_output_zero_allocator.allocate(
                 x.shape[0] * self.gate_up_proj.output_size_per_partition
             ).view(x.shape[0], self.gate_up_proj.output_size_per_partition)