[CI] speed up ut (#4901)

avoid model download to speed up ut test. - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-11 18:45:43 +08:00
parent 3fade30275
commit b89763f1ed
6 changed files with 35 additions and 36 deletions
--- a/tests/e2e/singlecard/test_aclgraph_accuracy.py
+++ b/tests/e2e/singlecard/test_aclgraph_accuracy.py
@@ -192,3 +192,22 @@ def test_output_between_eager_and_full_decode_only(
        name_0="vllm_eager_outputs",
        name_1="vllm_aclgraph_outputs",
    )
+
+
+def test_aclgraph_enable():
+    # Generally, this test is not belong to e2e, but it is a good way to check if
+    # aclgraph is enabled in real environment
+    from vllm.config.compilation import CompilationMode, CUDAGraphMode
+    from vllm.engine.arg_utils import EngineArgs
+
+    from vllm_ascend.platform import NPUPlatform
+
+    # vLLM default mode is piecewise cudagraph
+    config = EngineArgs()
+    VllmConfig = config.create_engine_config()
+    assert VllmConfig.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE
+
+    # after check_and_update_config, mode should be VLLM_COMPILE and piecewise cudagraph
+    NPUPlatform.check_and_update_config(VllmConfig)
+    assert VllmConfig.compilation_config.mode == CompilationMode.VLLM_COMPILE
+    assert VllmConfig.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE