[CI] Upgrade vLLM to 20250920 (c60e613) and address config break (#3067)
### What this PR does / why we need it? Bump main toc60e6137f0- Updated imports in `vllm.config` to `vllm.config.model`(aed16879a9) https://github.com/vllm-project/vllm/pull/25252 - Refactored `vllm_ascend/sample/sampler.py` to use string values for `logprobs_mode` instead of the `LogprobsMode` enum, simplifying logprobs mode handling and improving compatibility with recent vLLM changes (aed16879a9) https://github.com/vllm-project/vllm/pull/25252 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main:6d8246aaff--------- Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
2
.github/workflows/format_pr_body.yaml
vendored
2
.github/workflows/format_pr_body.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Get vLLM version
|
||||
run: |
|
||||
VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2
|
||||
VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf
|
||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout repository
|
||||
|
||||
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -82,7 +82,7 @@ jobs:
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
||||
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||
steps:
|
||||
- name: Install packages
|
||||
run: |
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-aarch64-a2-1]
|
||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
||||
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||
name: singlecard e2e test - light
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
@@ -206,7 +206,7 @@ jobs:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-aarch64-a2-2]
|
||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
||||
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||
name: multicard e2e test - light
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
|
||||
4
.github/workflows/vllm_ascend_test_full.yaml
vendored
4
.github/workflows/vllm_ascend_test_full.yaml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-aarch64-a2-1]
|
||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
||||
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||
name: singlecard e2e test - full
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
@@ -156,7 +156,7 @@ jobs:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-aarch64-a2-2]
|
||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
||||
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||
name: multicard e2e test - full
|
||||
runs-on: ${{ matrix.os }}
|
||||
container:
|
||||
|
||||
@@ -32,7 +32,14 @@ from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
|
||||
BatchEncoding, BatchFeature)
|
||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.config import TaskOption, _get_and_verify_dtype
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.10.2"):
|
||||
from vllm.config import TaskOption, _get_and_verify_dtype
|
||||
else:
|
||||
from vllm.config.model import TaskOption, _get_and_verify_dtype
|
||||
|
||||
from vllm.inputs import TextPrompt
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.transformers_utils.utils import maybe_model_redirect
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import torch
|
||||
import torch_npu
|
||||
from vllm.config import LogprobsMode
|
||||
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
|
||||
from vllm.v1.sample.sampler import Sampler
|
||||
|
||||
from vllm_ascend.utils import is_310p
|
||||
from vllm_ascend.utils import is_310p, vllm_version_is
|
||||
|
||||
DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
|
||||
if vllm_version_is("0.10.2"):
|
||||
from vllm.config import LogprobsMode
|
||||
DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
|
||||
else:
|
||||
DEFAULT_LOGPROBS_MODE = "raw_logprobs"
|
||||
|
||||
|
||||
class AscendSampler(Sampler):
|
||||
@@ -65,10 +68,18 @@ class AscendTopKTopPSampler(TopKTopPSampler):
|
||||
"""Override pytorch native implementation to torch_npu"""
|
||||
logits = self._apply_top_k_top_p(logits, k, p)
|
||||
logits_to_return = None
|
||||
if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
|
||||
logits_to_return = logits
|
||||
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
|
||||
logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32)
|
||||
if vllm_version_is("0.10.2"):
|
||||
if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
|
||||
logits_to_return = logits
|
||||
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
|
||||
logits_to_return = logits.log_softmax(dim=-1,
|
||||
dtype=torch.float32)
|
||||
else:
|
||||
if self.logprobs_mode == "processed_logits":
|
||||
logits_to_return = logits
|
||||
elif self.logprobs_mode == "processed_logprobs":
|
||||
logits_to_return = logits.log_softmax(dim=-1,
|
||||
dtype=torch.float32)
|
||||
|
||||
probs = logits.softmax(dim=-1, dtype=torch.float32)
|
||||
return random_sample(probs, generators), logits_to_return
|
||||
|
||||
Reference in New Issue
Block a user