init v0.11.0rc0

2025-10-14 10:38:28 +08:00
parent 67afd0ea78
commit 66dc16f966
278 changed files with 28130 additions and 11708 deletions
--- a/tests/ut/test_ascend_config.py
+++ b/tests/ut/test_ascend_config.py
@@ -43,6 +43,7 @@ class TestAscendConfig(TestBase):
        # No additional config given, check the default value here.
        ascend_config = init_ascend_config(test_vllm_config)
        self.assertIsNone(ascend_config.expert_map_path)
+        self.assertFalse(ascend_config.multistream_overlap_shared_expert)

        torchair_graph_config = ascend_config.torchair_graph_config
        self.assertFalse(torchair_graph_config.enabled)
@@ -51,8 +52,8 @@ class TestAscendConfig(TestBase):
        self.assertEqual(torchair_graph_config.graph_batch_sizes, [])
        self.assertFalse(torchair_graph_config.graph_batch_sizes_init)
        self.assertFalse(torchair_graph_config.enable_multistream_mla)
-        self.assertFalse(torchair_graph_config.enable_multistream_moe)
        self.assertTrue(torchair_graph_config.enable_view_optimize)
+        self.assertTrue(torchair_graph_config.enable_frozen_parameter)
        self.assertFalse(torchair_graph_config.enable_kv_nz)

        ascend_scheduler_config = ascend_config.ascend_scheduler_config
@@ -68,10 +69,11 @@ class TestAscendConfig(TestBase):
                "graph_batch_sizes": [1, 2, 4],
                "graph_batch_sizes_init": False,
                "enable_multistream_mla": True,
-                "enable_multistream_moe": True,
                "enable_view_optimize": True,
+                "enable_frozen_parameter": True,
                "enable_kv_nz": True
            },
+            "multistream_overlap_shared_expert": True,
            "ascend_scheduler_config": {
                "enabled": True
            },
@@ -80,6 +82,7 @@ class TestAscendConfig(TestBase):
        }
        ascend_config = init_ascend_config(test_vllm_config)
        self.assertEqual(ascend_config.expert_map_path, "test_expert_map_path")
+        self.assertTrue(ascend_config.multistream_overlap_shared_expert)

        torchair_graph_config = ascend_config.torchair_graph_config
        self.assertTrue(torchair_graph_config.enabled)
@@ -87,8 +90,8 @@ class TestAscendConfig(TestBase):
        self.assertEqual(torchair_graph_config.graph_batch_sizes, [1, 2, 4])
        self.assertFalse(torchair_graph_config.graph_batch_sizes_init)
        self.assertTrue(torchair_graph_config.enable_multistream_mla)
-        self.assertTrue(torchair_graph_config.enable_multistream_moe)
        self.assertTrue(torchair_graph_config.enable_view_optimize)
+        self.assertTrue(torchair_graph_config.enable_frozen_parameter)
        self.assertTrue(torchair_graph_config.enable_kv_nz)

        ascend_scheduler_config = ascend_config.ascend_scheduler_config
@@ -215,21 +218,6 @@ class TestAscendConfig(TestBase):
            test_vllm_config.model_config = fake_model_config
            init_ascend_config(test_vllm_config)
            check_ascend_config(test_vllm_config, False)
-        # aclgraph + deepseek model
-        with self.assertRaises(NotImplementedError):
-            test_vllm_config.additional_config = {
-                "torchair_graph_config": {
-                    "enabled": False,
-                },
-                "refresh": True
-            }
-            model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
-            fake_model_config = ModelConfig(model=model_path)
-            fake_model_config.hf_config = PretrainedConfig()
-            fake_model_config.hf_config.model_type = "deepseek"
-            test_vllm_config.model_config = fake_model_config
-            init_ascend_config(test_vllm_config)
-            check_ascend_config(test_vllm_config, False)

    def test_check_torchair_supported(self):
        test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
@@ -318,17 +306,6 @@ class TestAscendConfig(TestBase):
            }
            init_ascend_config(test_vllm_config)

-        # enable_multistream_moe should not be enabled without torchair graph mode
-        with self.assertRaises(RuntimeError):
-            test_vllm_config.additional_config = {
-                "torchair_graph_config": {
-                    "enabled": False,
-                    "enable_multistream_moe": True,
-                },
-                "refresh": True
-            }
-            init_ascend_config(test_vllm_config)
-
        # mode should not be configured without torchair graph mode
        with self.assertRaises(RuntimeError):
            test_vllm_config.additional_config = {
@@ -359,3 +336,27 @@ class TestAscendConfig(TestBase):
            test_vllm_config.parallel_config = ParallelConfig(
                data_parallel_size=4, tensor_parallel_size=2)
            init_ascend_config(test_vllm_config)
+
+        with self.assertRaises(AssertionError):
+            test_vllm_config.additional_config = {
+                "torchair_graph_config": {
+                    "enabled": True,
+                },
+                "oproj_tensor_parallel_size": 2,
+                "refresh": True
+            }
+            test_vllm_config.parallel_config = ParallelConfig(
+                data_parallel_size=4, tensor_parallel_size=2)
+            init_ascend_config(test_vllm_config)
+
+        with self.assertRaises(AssertionError):
+            test_vllm_config.additional_config = {
+                "torchair_graph_config": {
+                    "enabled": False,
+                },
+                "oproj_tensor_parallel_size": 2,
+                "refresh": True
+            }
+            test_vllm_config.parallel_config = ParallelConfig(
+                data_parallel_size=4, tensor_parallel_size=1)
+            init_ascend_config(test_vllm_config)