From afb752bcbeb192c71effaf8bd84b0314e69a6584 Mon Sep 17 00:00:00 2001 From: Hubert Lu <55214931+hubertlu-tw@users.noreply.github.com> Date: Mon, 7 Apr 2025 22:38:25 -0700 Subject: [PATCH] [AMD] Fix missing per_token_group_quant_fp8 for ROCm (#5140) --- python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py index 588173cec..c9e7547bf 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py @@ -765,6 +765,8 @@ def invoke_fused_moe_kernel( from sglang.srt.layers.quantization.fp8_kernel import ( sglang_per_token_group_quant_fp8, ) + else: + from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8 assert topk_weights.stride(1) == 1 assert sorted_token_ids.stride(0) == 1