[Feat][BugFix]Support the Qwen3-Next-80B-A3B-Instruct quantization model&Fix the NZ issue (#4245)

### What this PR does / why we need it? Support the Qwen3-Next-80B-A3B-Instruct quantization model and Fix the NZ issue. Triton kernel doesn't support data format nz, thus we skip converting weight to nz on layer `conv1d` - vLLM version: v0.11.0 - vLLM main: 2918c1b49c --------- Signed-off-by: IncSec <1790766300@qq.com>
2025-11-21 10:42:56 +08:00
parent cbb27feaf2
commit 5a4e8cdeba
10 changed files with 39 additions and 30 deletions
--- a/tests/ut/test_utils.py
+++ b/tests/ut/test_utils.py
@@ -46,18 +46,12 @@ class TestUtils(TestBase):
            self.assertFalse(utils.is_310p())

    def test_is_enable_nz(self):
-        # Case when _ENABLE_NZ is already set
-        utils._ENABLE_NZ = True
-        self.assertTrue(utils.is_enable_nz())
-
-        utils._ENABLE_NZ = False
-        self.assertFalse(utils.is_enable_nz())
-
-        # Case when _ENABLE_NZ is None and vllm_config is not provided
-        utils._ENABLE_NZ = None
-        with self.assertRaises(ValueError) as context:
-            utils.is_enable_nz()
-        self.assertIn("vllm_config must be provided", str(context.exception))
+        with mock.patch("vllm_ascend.utils.envs_ascend.VLLM_ASCEND_ENABLE_NZ",
+                        1):
+            self.assertTrue(utils.is_enable_nz())
+        with mock.patch("vllm_ascend.utils.envs_ascend.VLLM_ASCEND_ENABLE_NZ",
+                        0):
+            self.assertFalse(utils.is_enable_nz())

    def test_sleep_mode_enabled(self):
        utils._SLEEP_MODE_ENABLED = None