[UT] fix skip ut test for test_utils (#3803)

### What this PR does / why we need it?
[UT] fix ut test for test_utils that
https://github.com/vllm-project/vllm-ascend/pull/3612 skipped.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
vLLM version: v0.11.0rc3
vLLM main:
17c540a993

- vLLM version: v0.11.0rc3
- vLLM main:
83f478bb19

---------

Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
Meihan-chen
2025-10-30 15:52:53 +08:00
committed by GitHub
parent eed1957f03
commit 67dd3a4581
3 changed files with 21 additions and 12 deletions

View File

@@ -29,6 +29,12 @@ from vllm_ascend.utils import REGISTERED_ASCEND_OPS
class TestUtils(TestBase):
def setUp(self):
import importlib
from vllm_ascend import platform
importlib.reload(platform)
def test_is_310p(self):
utils._IS_310P = None
with mock.patch("vllm_ascend._build_info.__soc_version__",
@@ -252,16 +258,12 @@ class TestUtils(TestBase):
self.assertIn("num_hidden_layers", str(context.exception))
def test_update_aclgraph_sizes(self):
# max_num_batch_sizes < len(original_sizes)
test_compilation_config = CompilationConfig(
cudagraph_capture_sizes=[i for i in range(150)])
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
test_model_config = ModelConfig(model=model_path, enforce_eager=True)
test_parallel_config = ParallelConfig()
ascend_config = mock.MagicMock()
ascend_config.max_num_batched_tokens = 2048
ascend_config.max_model_len = 1024
ascend_config.ascend_scheduler_config.enabled = False
ascend_config = {"ascend_scheduler_config": {"enabled": False}}
test_vllm_config = VllmConfig(
model_config=test_model_config,
compilation_config=test_compilation_config,
@@ -271,9 +273,18 @@ class TestUtils(TestBase):
os.environ['HCCL_OP_EXPANSION_MODE'] = 'AIV'
utils.update_aclgraph_sizes(test_vllm_config)
del os.environ['HCCL_OP_EXPANSION_MODE']
self.assertEqual(
137,
len(test_vllm_config.compilation_config.cudagraph_capture_sizes))
if utils.vllm_version_is("0.11.0"):
self.assertEqual(
137,
len(test_vllm_config.compilation_config.cudagraph_capture_sizes
))
else:
self.assertEqual(
0,
len(test_vllm_config.compilation_config.cudagraph_capture_sizes
))
return
test_vllm_config.speculative_config = mock.MagicMock()
test_vllm_config.speculative_config.num_speculative_tokens = 2