From b8b68b3dfe415f0f10a5e9407b5b60ddbf6b9c9d Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sun, 21 Sep 2025 09:49:17 +0800 Subject: [PATCH] [CI] Upgrade vLLM to 20250920 (c60e613) and address config break (#3067) ### What this PR does / why we need it? Bump main to https://github.com/vllm-project/vllm/commit/c60e6137f0bf2034853919b3a9d705d7e06b93cf - Updated imports in `vllm.config` to `vllm.config.model`(https://github.com/vllm-project/vllm/commit/aed16879a9191a58adc5b8ac3973454dddefe018) https://github.com/vllm-project/vllm/pull/25252 - Refactored `vllm_ascend/sample/sampler.py` to use string values for `logprobs_mode` instead of the `LogprobsMode` enum, simplifying logprobs mode handling and improving compatibility with recent vLLM changes (https://github.com/vllm-project/vllm/commit/aed16879a9191a58adc5b8ac3973454dddefe018) https://github.com/vllm-project/vllm/pull/25252 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/6d8246aaffff3ebec84767e373212a7b8da328e2 --------- Signed-off-by: Yikun Jiang --- .github/workflows/format_pr_body.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 ++--- .github/workflows/vllm_ascend_test_full.yaml | 4 ++-- tests/e2e/conftest.py | 9 ++++++- vllm_ascend/sample/sampler.py | 25 ++++++++++++++------ 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml index 407ce22..8b0661a 100644 --- a/.github/workflows/format_pr_body.yaml +++ b/.github/workflows/format_pr_body.yaml @@ -36,7 +36,7 @@ jobs: - name: Get vLLM version run: | - VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2 + VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV - name: Checkout repository diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 7ffff02..c406907 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -82,7 +82,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] steps: - name: Install packages run: | @@ -140,7 +140,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - light runs-on: ${{ matrix.os }} container: @@ -206,7 +206,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - light runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index ab9992f..0c389a5 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -72,7 +72,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: singlecard e2e test - full runs-on: ${{ matrix.os }} container: @@ -156,7 +156,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2] + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] name: multicard e2e test - full runs-on: ${{ matrix.os }} container: diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 430153a..d0f1b76 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -32,7 +32,14 @@ from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, BatchEncoding, BatchFeature) from transformers.models.auto.auto_factory import _BaseAutoModelClass from vllm import LLM, SamplingParams -from vllm.config import TaskOption, _get_and_verify_dtype + +from vllm_ascend.utils import vllm_version_is + +if vllm_version_is("0.10.2"): + from vllm.config import TaskOption, _get_and_verify_dtype +else: + from vllm.config.model import TaskOption, _get_and_verify_dtype + from vllm.inputs import TextPrompt from vllm.outputs import RequestOutput from vllm.transformers_utils.utils import maybe_model_redirect diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py index e009e4c..6a5c130 100644 --- a/vllm_ascend/sample/sampler.py +++ b/vllm_ascend/sample/sampler.py @@ -1,12 +1,15 @@ import torch import torch_npu -from vllm.config import LogprobsMode from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample from vllm.v1.sample.sampler import Sampler -from vllm_ascend.utils import is_310p +from vllm_ascend.utils import is_310p, vllm_version_is -DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +if vllm_version_is("0.10.2"): + from vllm.config import LogprobsMode + DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS +else: + DEFAULT_LOGPROBS_MODE = "raw_logprobs" class AscendSampler(Sampler): @@ -65,10 +68,18 @@ class AscendTopKTopPSampler(TopKTopPSampler): """Override pytorch native implementation to torch_npu""" logits = self._apply_top_k_top_p(logits, k, p) logits_to_return = None - if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: - logits_to_return = logits - elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: - logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) + if vllm_version_is("0.10.2"): + if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + logits_to_return = logits + elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) + else: + if self.logprobs_mode == "processed_logits": + logits_to_return = logits + elif self.logprobs_mode == "processed_logprobs": + logits_to_return = logits.log_softmax(dim=-1, + dtype=torch.float32) probs = logits.softmax(dim=-1, dtype=torch.float32) return random_sample(probs, generators), logits_to_return