[CI] Upgrade vLLM to 20250920 (c60e613) and address config break (#3067)
### What this PR does / why we need it? Bump main toc60e6137f0- Updated imports in `vllm.config` to `vllm.config.model`(aed16879a9) https://github.com/vllm-project/vllm/pull/25252 - Refactored `vllm_ascend/sample/sampler.py` to use string values for `logprobs_mode` instead of the `LogprobsMode` enum, simplifying logprobs mode handling and improving compatibility with recent vLLM changes (aed16879a9) https://github.com/vllm-project/vllm/pull/25252 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main:6d8246aaff--------- Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
2
.github/workflows/format_pr_body.yaml
vendored
2
.github/workflows/format_pr_body.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Get vLLM version
|
- name: Get vLLM version
|
||||||
run: |
|
run: |
|
||||||
VLLM_COMMIT=6d8246aaffff3ebec84767e373212a7b8da328e2
|
VLLM_COMMIT=c60e6137f0bf2034853919b3a9d705d7e06b93cf
|
||||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|||||||
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -82,7 +82,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
@@ -140,7 +140,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-a2-1]
|
os: [linux-aarch64-a2-1]
|
||||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||||
name: singlecard e2e test - light
|
name: singlecard e2e test - light
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
@@ -206,7 +206,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-a2-2]
|
os: [linux-aarch64-a2-2]
|
||||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||||
name: multicard e2e test - light
|
name: multicard e2e test - light
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
4
.github/workflows/vllm_ascend_test_full.yaml
vendored
4
.github/workflows/vllm_ascend_test_full.yaml
vendored
@@ -72,7 +72,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-a2-1]
|
os: [linux-aarch64-a2-1]
|
||||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||||
name: singlecard e2e test - full
|
name: singlecard e2e test - full
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
@@ -156,7 +156,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-a2-2]
|
os: [linux-aarch64-a2-2]
|
||||||
vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
|
vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2]
|
||||||
name: multicard e2e test - full
|
name: multicard e2e test - full
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
@@ -32,7 +32,14 @@ from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
|
|||||||
BatchEncoding, BatchFeature)
|
BatchEncoding, BatchFeature)
|
||||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
from vllm_ascend.utils import vllm_version_is
|
||||||
|
|
||||||
|
if vllm_version_is("0.10.2"):
|
||||||
from vllm.config import TaskOption, _get_and_verify_dtype
|
from vllm.config import TaskOption, _get_and_verify_dtype
|
||||||
|
else:
|
||||||
|
from vllm.config.model import TaskOption, _get_and_verify_dtype
|
||||||
|
|
||||||
from vllm.inputs import TextPrompt
|
from vllm.inputs import TextPrompt
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
from vllm.transformers_utils.utils import maybe_model_redirect
|
from vllm.transformers_utils.utils import maybe_model_redirect
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch_npu
|
import torch_npu
|
||||||
from vllm.config import LogprobsMode
|
|
||||||
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
|
from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler, random_sample
|
||||||
from vllm.v1.sample.sampler import Sampler
|
from vllm.v1.sample.sampler import Sampler
|
||||||
|
|
||||||
from vllm_ascend.utils import is_310p
|
from vllm_ascend.utils import is_310p, vllm_version_is
|
||||||
|
|
||||||
|
if vllm_version_is("0.10.2"):
|
||||||
|
from vllm.config import LogprobsMode
|
||||||
DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
|
DEFAULT_LOGPROBS_MODE = LogprobsMode.RAW_LOGPROBS
|
||||||
|
else:
|
||||||
|
DEFAULT_LOGPROBS_MODE = "raw_logprobs"
|
||||||
|
|
||||||
|
|
||||||
class AscendSampler(Sampler):
|
class AscendSampler(Sampler):
|
||||||
@@ -65,10 +68,18 @@ class AscendTopKTopPSampler(TopKTopPSampler):
|
|||||||
"""Override pytorch native implementation to torch_npu"""
|
"""Override pytorch native implementation to torch_npu"""
|
||||||
logits = self._apply_top_k_top_p(logits, k, p)
|
logits = self._apply_top_k_top_p(logits, k, p)
|
||||||
logits_to_return = None
|
logits_to_return = None
|
||||||
|
if vllm_version_is("0.10.2"):
|
||||||
if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
|
if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS:
|
||||||
logits_to_return = logits
|
logits_to_return = logits
|
||||||
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
|
elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS:
|
||||||
logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32)
|
logits_to_return = logits.log_softmax(dim=-1,
|
||||||
|
dtype=torch.float32)
|
||||||
|
else:
|
||||||
|
if self.logprobs_mode == "processed_logits":
|
||||||
|
logits_to_return = logits
|
||||||
|
elif self.logprobs_mode == "processed_logprobs":
|
||||||
|
logits_to_return = logits.log_softmax(dim=-1,
|
||||||
|
dtype=torch.float32)
|
||||||
|
|
||||||
probs = logits.softmax(dim=-1, dtype=torch.float32)
|
probs = logits.softmax(dim=-1, dtype=torch.float32)
|
||||||
return random_sample(probs, generators), logits_to_return
|
return random_sample(probs, generators), logits_to_return
|
||||||
|
|||||||
Reference in New Issue
Block a user