[CI] Pin vLLM to releases/v0.11.0 (#3211)

### What this PR does / why we need it?
- Pin vLLM commit to releases/v0.11.0 branch.
- Fix the break change by vLLM commit
d4d9899860

### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?

- vLLM version: v0.10.2
- vLLM main:
17b4c6685c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-09-27 10:41:48 +08:00
committed by GitHub
parent 9caf6fbaf5
commit e9359bd8fa
6 changed files with 83 additions and 38 deletions

View File

@@ -36,7 +36,7 @@ jobs:
- name: Get vLLM version - name: Get vLLM version
run: | run: |
VLLM_COMMIT=17b4c6685ce62d5652654784d6771a3d38e4273e VLLM_COMMIT=releases/v0.11.0
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository - name: Checkout repository

View File

@@ -42,7 +42,7 @@ jobs:
lint: lint:
uses: ./.github/workflows/pre-commit.yml uses: ./.github/workflows/pre-commit.yml
with: with:
vllm: 17b4c6685ce62d5652654784d6771a3d38e4273e vllm: releases/v0.11.0
changes: changes:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
VLLM_USE_MODELSCOPE: True VLLM_USE_MODELSCOPE: True
strategy: strategy:
matrix: matrix:
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2] vllm_version: [releases/v0.11.0, v0.10.2]
steps: steps:
- name: Install packages - name: Install packages
run: | run: |
@@ -138,7 +138,7 @@ jobs:
name: e2e-light name: e2e-light
strategy: strategy:
matrix: matrix:
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2] vllm_version: [releases/v0.11.0, v0.10.2]
# Note (yikun): If CI resource are limited we can split job into two chain jobs # Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes] needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request. # only trigger e2e test after lint passed and the change is e2e related with pull request.

View File

@@ -68,7 +68,7 @@ jobs:
name: e2e-full name: e2e-full
strategy: strategy:
matrix: matrix:
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2] vllm_version: [releases/v0.11.0, v0.10.2]
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml uses: ./.github/workflows/_e2e_test.yaml

View File

@@ -42,6 +42,8 @@ from vllm.model_executor.models.qwen2_5_vl import (
from vllm.model_executor.models.utils import maybe_prefix from vllm.model_executor.models.utils import maybe_prefix
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm_ascend.utils import vllm_version_is
MIN_PAD_SIZE = 64 # min_size to pad weight MIN_PAD_SIZE = 64 # min_size to pad weight
MAX_PAD_SIZE = 128 # max_size to pad weight MAX_PAD_SIZE = 128 # max_size to pad weight
@@ -496,12 +498,20 @@ class AscendQwen2_5_VLForConditionalGeneration(
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.visual = AscendQwen2_5_VisionTransformer( if vllm_version_is("0.10.2"):
vision_config=config.vision_config, self.visual = AscendQwen2_5_VisionTransformer(
norm_eps=getattr(config, "rms_norm_eps", 1e-6), vision_config=config.vision_config,
quant_config=self._maybe_ignore_quant_config(quant_config), norm_eps=getattr(config, "rms_norm_eps", 1e-6),
prefix=maybe_prefix(prefix, "visual"), quant_config=self._maybe_ignore_quant_config(quant_config),
) prefix=maybe_prefix(prefix, "visual"),
)
else:
self.visual = AscendQwen2_5_VisionTransformer(
vision_config=config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=self.quant_config,
prefix=maybe_prefix(prefix, "visual"),
)
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]: def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:

View File

@@ -68,6 +68,7 @@ from vllm.model_executor.models.utils import WeightsMapper, maybe_prefix
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm_ascend.models.qwen2_5_vl import AscendQwen2_5_VisionRotaryEmbedding from vllm_ascend.models.qwen2_5_vl import AscendQwen2_5_VisionRotaryEmbedding
from vllm_ascend.utils import vllm_version_is
class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention): class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention):
@@ -483,12 +484,20 @@ class AscendQwen2_5_VLForConditionalGeneration_Without_Padding(
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.visual = AscendQwen2_5_VisionTransformer_Without_Padding( if vllm_version_is("0.10.2"):
vision_config=config.vision_config, self.visual = AscendQwen2_5_VisionTransformer_Without_Padding(
norm_eps=getattr(config, "rms_norm_eps", 1e-6), vision_config=config.vision_config,
quant_config=self._maybe_ignore_quant_config(quant_config), norm_eps=getattr(config, "rms_norm_eps", 1e-6),
prefix=maybe_prefix(prefix, "visual"), quant_config=self._maybe_ignore_quant_config(quant_config),
) prefix=maybe_prefix(prefix, "visual"),
)
else:
self.visual = AscendQwen2_5_VisionTransformer_Without_Padding(
vision_config=config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=self.quant_config,
prefix=maybe_prefix(prefix, "visual"),
)
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]: def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
@@ -554,12 +563,20 @@ class AscendQwen3VLForConditionalGeneration(Qwen3VLForConditionalGeneration):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
config: Qwen3VLConfig = vllm_config.model_config.hf_config config: Qwen3VLConfig = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.visual = AscendQwen3_VisionTransformer( if vllm_version_is("0.10.2"):
config.vision_config, self.visual = AscendQwen3_VisionTransformer(
norm_eps=getattr(config, "rms_norm_eps", 1e-6), config.vision_config,
quant_config=self._maybe_ignore_quant_config(quant_config), norm_eps=getattr(config, "rms_norm_eps", 1e-6),
prefix=maybe_prefix(prefix, "visual"), quant_config=self._maybe_ignore_quant_config(quant_config),
use_data_parallel=self.use_data_parallel) prefix=maybe_prefix(prefix, "visual"),
use_data_parallel=self.use_data_parallel)
else:
self.visual = AscendQwen3_VisionTransformer(
config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=self.quant_config,
prefix=maybe_prefix(prefix, "visual"),
use_data_parallel=self.use_data_parallel)
@MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor, @MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor,
@@ -596,11 +613,19 @@ class AscendQwen3VLMoeForConditionalGeneration(
multimodal_config = vllm_config.model_config.multimodal_config multimodal_config = vllm_config.model_config.multimodal_config
self.multimodal_config = multimodal_config self.multimodal_config = multimodal_config
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
if vllm_version_is("0.10.2"):
self.visual = AscendQwen3_VisionTransformer( self.visual = AscendQwen3_VisionTransformer(
config.vision_config, config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6), norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=self._maybe_ignore_quant_config(quant_config), quant_config=self._maybe_ignore_quant_config(quant_config),
prefix=maybe_prefix(prefix, "visual"), prefix=maybe_prefix(prefix, "visual"),
use_data_parallel=self.use_data_parallel, use_data_parallel=self.use_data_parallel,
) )
else:
self.visual = AscendQwen3_VisionTransformer(
config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=self.quant_config,
prefix=maybe_prefix(prefix, "visual"),
use_data_parallel=self.use_data_parallel,
)

View File

@@ -40,6 +40,8 @@ from vllm.model_executor.models.qwen2_vl import (
from vllm.model_executor.models.utils import maybe_prefix from vllm.model_executor.models.utils import maybe_prefix
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm_ascend.utils import vllm_version_is
MIN_PAD_SIZE = 64 # min_size to pad weight MIN_PAD_SIZE = 64 # min_size to pad weight
MAX_PAD_SIZE = 128 # max_size to pad weight MAX_PAD_SIZE = 128 # max_size to pad weight
@@ -343,10 +345,18 @@ class AscendQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
self.visual = AscendQwen2VisionTransformer( if vllm_version_is("0.10.2"):
self.config.vision_config, self.visual = AscendQwen2VisionTransformer(
norm_eps=getattr(self.config, "rms_norm_eps", 1e-6), self.config.vision_config,
quant_config=self._maybe_ignore_quant_config( norm_eps=getattr(self.config, "rms_norm_eps", 1e-6),
vllm_config.quant_config), quant_config=self._maybe_ignore_quant_config(
prefix=maybe_prefix(prefix, "visual"), vllm_config.quant_config),
) prefix=maybe_prefix(prefix, "visual"),
)
else:
self.visual = AscendQwen2VisionTransformer(
self.config.vision_config,
norm_eps=getattr(self.config, "rms_norm_eps", 1e-6),
quant_config=self.vllm_config.quant_config,
prefix=maybe_prefix(prefix, "visual"),
)