[BugFix] Fix a bug of running chunked-prefill with torchair. (#1378) (#1844)

This PR fixes the bug `local variable 'decode_hs_or_q_c' referenced before assignment` when running chunked-prefill with torchair. We should calculate `decode_hs_or_q_c` whether or not torchair graphics mode is enabled. backport of #1378 fix https://github.com/vllm-project/vllm-ascend/issues/1369 - vLLM version: v0.10.0 - vLLM main: 0e36abf993 --------- Signed-off-by: whx-sjtu <2952154980@qq.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: whx-sjtu <2952154980@qq.com>
2025-07-31 20:08:45 +08:00
parent db310c6ec9
commit 4c8842da65
2 changed files with 32 additions and 22 deletions
--- a/tests/e2e/multicard/test_torchair_graph_mode.py
+++ b/tests/e2e/multicard/test_torchair_graph_mode.py
@@ -31,6 +31,7 @@ def _deepseek_torchair_test_fixture(
    additional_config: Dict,
    *,
    tensor_parallel_size=2,
+    use_v1_schduler=False,
 ):
    example_prompts = [
        "Hello, my name is",
@@ -38,14 +39,14 @@ def _deepseek_torchair_test_fixture(
        "The capital of France is",
        "The future of AI is",
    ]
-
-    # torchair is only work without chunked-prefill now
-    kwargs = {
-        "ascend_scheduler_config": {
-            "enabled": True,
-        },
-        "refresh": True,
-    }
+    kwargs = {}
+    if not use_v1_schduler:
+        kwargs = {
+            "ascend_scheduler_config": {
+                "enabled": True,
+            },
+            "refresh": True,
+        }
    additional_config.update(**kwargs)

    with VllmRunner(
@@ -95,6 +96,15 @@ def test_e2e_deepseekv3_with_torchair_ms_mla():
    _deepseek_torchair_test_fixture(additional_config)


+def test_e2e_deepseekv3_with_torchair_v1scheduler():
+    additional_config = {
+        "torchair_graph_config": {
+            "enabled": True,
+        },
+    }
+    _deepseek_torchair_test_fixture(additional_config, use_v1_schduler=True)
+
+
 def _pangu_torchair_test_fixture(
    additional_config: Dict,
    *,