From 8d00775fcedcee8e652c424be336943ddfb9a38a Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sat, 7 Jun 2025 11:23:30 +0800
Subject: [PATCH] [SpecDecode][CI] Set default values to fix spec decode and
 fix multicard CI (#1109)

### What this PR does / why we need it?
- Set default values to fix spec decode
- To avoid oom, we need to run the test in a single process

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- CI passed, espcecially multicards CI
- For spec decode test, long term CI passed

Closes: https://github.com/vllm-project/vllm-ascend/pull/1105

---------

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
Co-authored-by: Yizhou Liu <liu_yizhou@outlook.com>
Co-authored-by: mengwei805 <mengwei25@huawei.com>
---
 .github/workflows/vllm_ascend_test.yaml                   | 8 +++++++-
 .../patch/worker/patch_common/patch_spec_decode_worker.py | 6 ++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index 96aa0d5..1334328 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -123,7 +123,11 @@ jobs:
             --ignore=tests/singlecard/test_camem.py
           else
             pytest -sv tests/multicard/test_ilama_lora_tp2.py
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py
+            # To avoid oom, we need to run the test in a single process.
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
           fi
 
       - name: Run vllm-project/vllm-ascend test on V0 engine
@@ -149,7 +153,9 @@ jobs:
           else
             pytest -sv tests/multicard/test_ilama_lora_tp2.py
             # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
+            # To avoid oom, we need to run the test in a single process.
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
           fi
diff --git a/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py b/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py
index 8af68c1..66e7aa5 100644
--- a/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py
+++ b/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py
@@ -56,6 +56,12 @@ def create_worker(
         draft_worker_kwargs.pop("ngram_prompt_lookup_max"))
     ngram_prompt_lookup_min = (
         draft_worker_kwargs.pop("ngram_prompt_lookup_min"))
+
+    # TODO(Yizhou): A quick fix, must be refactored ASAP
+    draft_worker_kwargs["vllm_config"].parallel_config.expert_parallel_size = 1
+    draft_worker_kwargs[
+        "vllm_config"].parallel_config.expert_tensor_parallel_size = 1
+
     draft_model_config = draft_worker_kwargs["vllm_config"].model_config
     draft_parallel_config: ParallelConfig = draft_worker_kwargs[
         'vllm_config'].parallel_config