diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index f2c0d6139..fa67bba4d 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -88,7 +88,7 @@ _is_fp8_fnuz = is_fp8_fnuz() _use_hip_int4 = get_bool_env_var("SGLANG_INT4_WEIGHT") _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip -if _is_hip: +if _is_hip and (_use_aiter or _use_hip_int4): from aiter import ActivationType, QuantType from aiter.fused_moe import fused_moe from aiter.fused_moe_bf16_asm import asm_moe, ck_moe_2stages