[AMD] Fix missing per_token_group_quant_fp8 for ROCm (#5140)

This commit is contained in:
Hubert Lu
2025-04-07 22:38:25 -07:00
committed by GitHub
parent 9731eca77b
commit afb752bcbe

View File

@@ -765,6 +765,8 @@ def invoke_fused_moe_kernel(
from sglang.srt.layers.quantization.fp8_kernel import (
sglang_per_token_group_quant_fp8,
)
else:
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
assert topk_weights.stride(1) == 1
assert sorted_token_ids.stride(0) == 1