[Bugfix] Qwen3MoE aclrtMemcpy failed with NPUGraph (#10013)
This commit is contained in:
@@ -17,7 +17,11 @@ from sglang.srt.layers.quantization.base_config import (
|
||||
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
|
||||
from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
|
||||
from sglang.srt.layers.quantization.utils import is_layer_skipped
|
||||
from sglang.srt.utils import set_weight_attrs
|
||||
from sglang.srt.utils import is_npu, set_weight_attrs
|
||||
|
||||
_is_npu = is_npu()
|
||||
if not _is_npu:
|
||||
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sglang.srt.layers.moe import MoeRunnerConfig
|
||||
|
||||
Reference in New Issue
Block a user