[CI] Add unit test framework (#1201)

This PR added the unit test framework to enable ut for vLLM Ascend. Unit test runs on CPU machines. It'll be ran once lint check is passed the same as e2e test. For unit test, this PR created a new folder called `ut` under `tests` module. All the test file in `ut` should keep the same with the code in `vllm-ascend`. The file name should be start with `test_` prefix. For example, in this PR. the `test_ascend_config.py` is added for `ascend_config.py` test. A new fille `worker/test_worker_v1.py` is also added as the placeholder. This file should be the unit test for `vllm-ascend/worker/worker_v1.py`. Additional, a new `fake_weight` folder is added, it contains the config.json from `facebook/opt-125m`, so that the test will not always visit huggingface. TODO: We should add all the unit test file one by one in the future. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-06-16 18:32:28 +08:00
parent 966557a2a3
commit 69b817ed65
57 changed files with 396 additions and 267 deletions
--- a/vllm_ascend/ascend_config.py
+++ b/vllm_ascend/ascend_config.py
@@ -138,12 +138,6 @@ def check_ascend_config(vllm_config, enforce_eager):
        else:
            # torchair_graph case
            if ascend_config.torchair_graph_config.enabled:
-                # torchair_graph is not supported for V1 without mla currently.
-                if envs.VLLM_MLA_DISABLE:
-                    logger.warning(
-                        "Torchair graph mode is still experimental and not supported for V1 without mla currently, "
-                        "it has been disabled automatically.")
-                    ascend_config.torchair_graph_config.enabled = False
                # torchair_graph is supported for deepseek model only currently.
                if vllm_config.model_config:
                    model_type = vllm_config.model_config.hf_config.model_type
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -164,6 +164,14 @@ class NPUPlatform(Platform):
        else:
            enforce_eager = getattr(model_config, "enforce_eager", False)

+        if ascend_config.torchair_graph_config.enabled and envs.VLLM_MLA_DISABLE:
+            # torchair_graph is not supported for V1 without mla currently.
+            logger.warning(
+                "Torchair graph mode is still experimental and not supported for V1 without mla currently, "
+                "Fallback to eager mode.")
+            ascend_config.torchair_graph_config.enabled = False
+            enforce_eager = True
+
        check_ascend_config(vllm_config, enforce_eager)

        if enforce_eager or compilation_config.level == CompilationLevel.NO_COMPILATION:
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -24,6 +24,7 @@ from threading import Lock
 from typing import TYPE_CHECKING, List, Tuple

 import torch
+import torch_npu  # noqa: F401
 import torchair  # type: ignore[import]  # noqa: F401
 from packaging.version import InvalidVersion, Version
 from torch_npu.npu.streams import Event