[Feat][BugFix]Support the Qwen3-Next-80B-A3B-Instruct quantization model&Fix the NZ issue (#4245)
### What this PR does / why we need it?
Support the Qwen3-Next-80B-A3B-Instruct quantization model and Fix the
NZ issue. Triton kernel doesn't support data format nz, thus we skip
converting weight to nz on layer `conv1d`
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: IncSec <1790766300@qq.com>
This commit is contained in:
@@ -46,18 +46,12 @@ class TestUtils(TestBase):
|
||||
self.assertFalse(utils.is_310p())
|
||||
|
||||
def test_is_enable_nz(self):
|
||||
# Case when _ENABLE_NZ is already set
|
||||
utils._ENABLE_NZ = True
|
||||
self.assertTrue(utils.is_enable_nz())
|
||||
|
||||
utils._ENABLE_NZ = False
|
||||
self.assertFalse(utils.is_enable_nz())
|
||||
|
||||
# Case when _ENABLE_NZ is None and vllm_config is not provided
|
||||
utils._ENABLE_NZ = None
|
||||
with self.assertRaises(ValueError) as context:
|
||||
utils.is_enable_nz()
|
||||
self.assertIn("vllm_config must be provided", str(context.exception))
|
||||
with mock.patch("vllm_ascend.utils.envs_ascend.VLLM_ASCEND_ENABLE_NZ",
|
||||
1):
|
||||
self.assertTrue(utils.is_enable_nz())
|
||||
with mock.patch("vllm_ascend.utils.envs_ascend.VLLM_ASCEND_ENABLE_NZ",
|
||||
0):
|
||||
self.assertFalse(utils.is_enable_nz())
|
||||
|
||||
def test_sleep_mode_enabled(self):
|
||||
utils._SLEEP_MODE_ENABLED = None
|
||||
|
||||
Reference in New Issue
Block a user