support qwen2 running on ascend npu device (#7022)
Co-authored-by: 刁莹煜 <diaoyingyu1@hisilicon.com>
This commit is contained in:
@@ -17,11 +17,12 @@ from sglang.srt.layers.quantization.utils import (
|
||||
per_tensor_dequantize,
|
||||
replace_parameter,
|
||||
)
|
||||
from sglang.srt.utils import is_cuda, set_weight_attrs
|
||||
from sglang.srt.utils import is_cuda, is_npu, set_weight_attrs
|
||||
|
||||
_is_cuda = is_cuda()
|
||||
_is_npu = is_npu()
|
||||
|
||||
if not _is_cuda:
|
||||
if not _is_cuda and not _is_npu:
|
||||
from vllm import _custom_ops as vllm_ops
|
||||
from vllm._custom_ops import scaled_fp8_quant
|
||||
|
||||
|
||||
Reference in New Issue
Block a user