[CI] Pin vLLM to releases/v0.11.0 (#3211)
### What this PR does / why we need it? - Pin vLLM commit to releases/v0.11.0 branch. - Fix the break change by vLLM commitd4d9899860### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main:17b4c6685cSigned-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
2
.github/workflows/format_pr_body.yaml
vendored
2
.github/workflows/format_pr_body.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Get vLLM version
|
- name: Get vLLM version
|
||||||
run: |
|
run: |
|
||||||
VLLM_COMMIT=17b4c6685ce62d5652654784d6771a3d38e4273e
|
VLLM_COMMIT=releases/v0.11.0
|
||||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|||||||
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -42,7 +42,7 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
uses: ./.github/workflows/pre-commit.yml
|
uses: ./.github/workflows/pre-commit.yml
|
||||||
with:
|
with:
|
||||||
vllm: 17b4c6685ce62d5652654784d6771a3d38e4273e
|
vllm: releases/v0.11.0
|
||||||
|
|
||||||
changes:
|
changes:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -83,7 +83,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2]
|
vllm_version: [releases/v0.11.0, v0.10.2]
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
@@ -138,7 +138,7 @@ jobs:
|
|||||||
name: e2e-light
|
name: e2e-light
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2]
|
vllm_version: [releases/v0.11.0, v0.10.2]
|
||||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||||
needs: [lint, changes]
|
needs: [lint, changes]
|
||||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||||
|
|||||||
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
@@ -68,7 +68,7 @@ jobs:
|
|||||||
name: e2e-full
|
name: e2e-full
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [17b4c6685ce62d5652654784d6771a3d38e4273e, v0.10.2]
|
vllm_version: [releases/v0.11.0, v0.10.2]
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
||||||
uses: ./.github/workflows/_e2e_test.yaml
|
uses: ./.github/workflows/_e2e_test.yaml
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ from vllm.model_executor.models.qwen2_5_vl import (
|
|||||||
from vllm.model_executor.models.utils import maybe_prefix
|
from vllm.model_executor.models.utils import maybe_prefix
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
|
|
||||||
|
from vllm_ascend.utils import vllm_version_is
|
||||||
|
|
||||||
MIN_PAD_SIZE = 64 # min_size to pad weight
|
MIN_PAD_SIZE = 64 # min_size to pad weight
|
||||||
MAX_PAD_SIZE = 128 # max_size to pad weight
|
MAX_PAD_SIZE = 128 # max_size to pad weight
|
||||||
|
|
||||||
@@ -496,12 +498,20 @@ class AscendQwen2_5_VLForConditionalGeneration(
|
|||||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||||
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
|
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.visual = AscendQwen2_5_VisionTransformer(
|
if vllm_version_is("0.10.2"):
|
||||||
vision_config=config.vision_config,
|
self.visual = AscendQwen2_5_VisionTransformer(
|
||||||
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
vision_config=config.vision_config,
|
||||||
quant_config=self._maybe_ignore_quant_config(quant_config),
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
quant_config=self._maybe_ignore_quant_config(quant_config),
|
||||||
)
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.visual = AscendQwen2_5_VisionTransformer(
|
||||||
|
vision_config=config.vision_config,
|
||||||
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
|
quant_config=self.quant_config,
|
||||||
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
|
||||||
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
|
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ from vllm.model_executor.models.utils import WeightsMapper, maybe_prefix
|
|||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
|
|
||||||
from vllm_ascend.models.qwen2_5_vl import AscendQwen2_5_VisionRotaryEmbedding
|
from vllm_ascend.models.qwen2_5_vl import AscendQwen2_5_VisionRotaryEmbedding
|
||||||
|
from vllm_ascend.utils import vllm_version_is
|
||||||
|
|
||||||
|
|
||||||
class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention):
|
class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention):
|
||||||
@@ -483,12 +484,20 @@ class AscendQwen2_5_VLForConditionalGeneration_Without_Padding(
|
|||||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||||
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
|
config: Qwen2_5_VLConfig = vllm_config.model_config.hf_config
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.visual = AscendQwen2_5_VisionTransformer_Without_Padding(
|
if vllm_version_is("0.10.2"):
|
||||||
vision_config=config.vision_config,
|
self.visual = AscendQwen2_5_VisionTransformer_Without_Padding(
|
||||||
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
vision_config=config.vision_config,
|
||||||
quant_config=self._maybe_ignore_quant_config(quant_config),
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
quant_config=self._maybe_ignore_quant_config(quant_config),
|
||||||
)
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.visual = AscendQwen2_5_VisionTransformer_Without_Padding(
|
||||||
|
vision_config=config.vision_config,
|
||||||
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
|
quant_config=self.quant_config,
|
||||||
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
|
||||||
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
|
def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
|
||||||
|
|
||||||
@@ -554,12 +563,20 @@ class AscendQwen3VLForConditionalGeneration(Qwen3VLForConditionalGeneration):
|
|||||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||||
config: Qwen3VLConfig = vllm_config.model_config.hf_config
|
config: Qwen3VLConfig = vllm_config.model_config.hf_config
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.visual = AscendQwen3_VisionTransformer(
|
if vllm_version_is("0.10.2"):
|
||||||
config.vision_config,
|
self.visual = AscendQwen3_VisionTransformer(
|
||||||
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
config.vision_config,
|
||||||
quant_config=self._maybe_ignore_quant_config(quant_config),
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
quant_config=self._maybe_ignore_quant_config(quant_config),
|
||||||
use_data_parallel=self.use_data_parallel)
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
use_data_parallel=self.use_data_parallel)
|
||||||
|
else:
|
||||||
|
self.visual = AscendQwen3_VisionTransformer(
|
||||||
|
config.vision_config,
|
||||||
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
|
quant_config=self.quant_config,
|
||||||
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
use_data_parallel=self.use_data_parallel)
|
||||||
|
|
||||||
|
|
||||||
@MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor,
|
@MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor,
|
||||||
@@ -596,11 +613,19 @@ class AscendQwen3VLMoeForConditionalGeneration(
|
|||||||
multimodal_config = vllm_config.model_config.multimodal_config
|
multimodal_config = vllm_config.model_config.multimodal_config
|
||||||
self.multimodal_config = multimodal_config
|
self.multimodal_config = multimodal_config
|
||||||
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
|
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
|
||||||
|
if vllm_version_is("0.10.2"):
|
||||||
self.visual = AscendQwen3_VisionTransformer(
|
self.visual = AscendQwen3_VisionTransformer(
|
||||||
config.vision_config,
|
config.vision_config,
|
||||||
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
quant_config=self._maybe_ignore_quant_config(quant_config),
|
quant_config=self._maybe_ignore_quant_config(quant_config),
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
use_data_parallel=self.use_data_parallel,
|
use_data_parallel=self.use_data_parallel,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
self.visual = AscendQwen3_VisionTransformer(
|
||||||
|
config.vision_config,
|
||||||
|
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
|
||||||
|
quant_config=self.quant_config,
|
||||||
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
use_data_parallel=self.use_data_parallel,
|
||||||
|
)
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ from vllm.model_executor.models.qwen2_vl import (
|
|||||||
from vllm.model_executor.models.utils import maybe_prefix
|
from vllm.model_executor.models.utils import maybe_prefix
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
|
|
||||||
|
from vllm_ascend.utils import vllm_version_is
|
||||||
|
|
||||||
MIN_PAD_SIZE = 64 # min_size to pad weight
|
MIN_PAD_SIZE = 64 # min_size to pad weight
|
||||||
MAX_PAD_SIZE = 128 # max_size to pad weight
|
MAX_PAD_SIZE = 128 # max_size to pad weight
|
||||||
|
|
||||||
@@ -343,10 +345,18 @@ class AscendQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
|
|||||||
|
|
||||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||||
self.visual = AscendQwen2VisionTransformer(
|
if vllm_version_is("0.10.2"):
|
||||||
self.config.vision_config,
|
self.visual = AscendQwen2VisionTransformer(
|
||||||
norm_eps=getattr(self.config, "rms_norm_eps", 1e-6),
|
self.config.vision_config,
|
||||||
quant_config=self._maybe_ignore_quant_config(
|
norm_eps=getattr(self.config, "rms_norm_eps", 1e-6),
|
||||||
vllm_config.quant_config),
|
quant_config=self._maybe_ignore_quant_config(
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
vllm_config.quant_config),
|
||||||
)
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.visual = AscendQwen2VisionTransformer(
|
||||||
|
self.config.vision_config,
|
||||||
|
norm_eps=getattr(self.config, "rms_norm_eps", 1e-6),
|
||||||
|
quant_config=self.vllm_config.quant_config,
|
||||||
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user