Upgrade vLLM to v0.10.0 (#1927)

### What this PR does / why we need it?
- Upgrade to v0.10.0
- Drop v0.9.2 version compatibility
- Add patch for
`vllm_ascend/patch/worker/patch_common/patch_sampler_gather_logprobs.py`
as workaround of
f3a683b7c9
for v0.10.0 and also add e2e test `test_models_prompt_logprobs`
- Pin transformers<4.54.0 as workaround of
https://github.com/vllm-project/vllm-ascend/issues/2034

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Test locally:
`VLLM_USE_MODELSCOPE=true pytest -sv
tests/e2e/singlecard/test_offline_inference.py::test_models_prompt_logprobs`
- CI passed

- vLLM version: v0.9.2
- vLLM main:
7728dd77bb

---------

Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
Yikun Jiang
2025-07-26 15:43:29 +08:00
committed by GitHub
parent 2f50304c19
commit 17a430f7b8
29 changed files with 198 additions and 251 deletions

View File

@@ -127,3 +127,19 @@ def test_models_topk() -> None:
enforce_eager=True,
gpu_memory_utilization=0.7) as vllm_model:
vllm_model.generate(example_prompts, sampling_params)
def test_models_prompt_logprobs() -> None:
example_prompts = [
"Hello, my name is",
]
with VllmRunner("Qwen/Qwen2.5-0.5B-Instruct",
max_model_len=8192,
dtype="float16",
enforce_eager=True,
gpu_memory_utilization=0.7) as vllm_model:
vllm_model.generate_greedy_logprobs(example_prompts,
max_tokens=5,
num_logprobs=1)

View File

@@ -3,15 +3,12 @@ from unittest.mock import MagicMock, patch
import torch
from tests.ut.base import TestBase
from vllm_ascend.attention.attention_v1 import \
AscendAttentionBackendImpl092 # isort: skip
from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
AscendAttentionBackendImpl,
AscendAttentionMetadataBuilder,
AscendAttentionState,
AscendMetadata,
CommonAttentionState)
from vllm_ascend.utils import vllm_version_is
class TestAscendAttentionBackend(TestBase):
@@ -20,12 +17,8 @@ class TestAscendAttentionBackend(TestBase):
self.assertEqual(AscendAttentionBackend.get_name(), "ASCEND")
def test_get_impl_cls(self):
if vllm_version_is("0.9.2"):
self.assertEqual(AscendAttentionBackend.get_impl_cls(),
AscendAttentionBackendImpl092)
else:
self.assertEqual(AscendAttentionBackend.get_impl_cls(),
AscendAttentionBackendImpl)
self.assertEqual(AscendAttentionBackend.get_impl_cls(),
AscendAttentionBackendImpl)
def test_get_metadata_cls(self):
self.assertEqual(AscendAttentionBackend.get_metadata_cls(),