diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 52ef401..7f8ac0e 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -115,7 +115,7 @@ jobs: pytest -sv tests/ops pytest -sv tests/compile else - pytest -sv tests/multicard/test_offline_inference_distributed.py + pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py pytest -sv tests/ops pytest -sv tests/compile fi @@ -128,7 +128,8 @@ jobs: pytest -sv tests/singlecard/test_offline_inference.py pytest -sv tests/ops else - pytest -sv tests/multicard/test_offline_inference_distributed.py + pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py + pytest -sv -k "DeepSeek" tests/multicard/test_offline_inference_distributed.py pytest -sv tests/ops fi diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py index dfc6675..26a3de5 100644 --- a/tests/multicard/test_offline_inference_distributed.py +++ b/tests/multicard/test_offline_inference_distributed.py @@ -28,10 +28,12 @@ import vllm # noqa: F401 from tests.conftest import VllmRunner os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256" +os.environ["VLLM_USE_MODELSCOPE"] = "True" @pytest.mark.parametrize("model, distributed_executor_backend", [ ("Qwen/QwQ-32B", "mp"), + ("deepseek-ai/DeepSeek-V2-Lite", "mp"), ]) def test_models_distributed(model: str, distributed_executor_backend: str) -> None: