[Fix] MoE: fix w8a8_fp8 MoE and add tests to cover this code path (#10429)

This commit is contained in:
Cheng Wan
2025-09-14 17:34:28 -07:00
committed by GitHub
parent 7ce6c10eb6
commit 2f8ba6fe82
2 changed files with 44 additions and 8 deletions

View File

@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
import torch
from torch.nn.parameter import Parameter
from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.parameter import ChannelQuantScaleParameter, ModelWeightParameter
from sglang.srt.layers.quantization.base_config import (
@@ -27,7 +28,6 @@ from sglang.srt.layers.quantization.fp8_utils import (
from sglang.srt.utils import set_weight_attrs
if TYPE_CHECKING:
from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
from sglang.srt.layers.moe.token_dispatcher import (
CombineInput,
StandardDispatchOutput,