[Bugfix] Qwen3MoE aclrtMemcpy failed with NPUGraph (#10013)

This commit is contained in:
Even Zhou
2025-09-08 12:50:49 +08:00
committed by GitHub
parent 8116804e4f
commit b67c277f86
5 changed files with 180 additions and 10 deletions

View File

@@ -17,7 +17,11 @@ from sglang.srt.layers.quantization.base_config import (
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
from sglang.srt.layers.quantization.utils import is_layer_skipped
from sglang.srt.utils import set_weight_attrs
from sglang.srt.utils import is_npu, set_weight_attrs
_is_npu = is_npu()
if not _is_npu:
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
if TYPE_CHECKING:
from sglang.srt.layers.moe import MoeRunnerConfig