[CI] Upgrade vllm version - 0925 (#3167)
Upgrade vLLM to newest commit. 1. Remove the useless func get_state_cls, it has been removed from vLLM already.e6750d0b182. Fix ut broken by6160ba4151- vLLM version: v0.10.2 - vLLM main:b1068903fd--------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
2
.github/workflows/format_pr_body.yaml
vendored
2
.github/workflows/format_pr_body.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Get vLLM version
|
- name: Get vLLM version
|
||||||
run: |
|
run: |
|
||||||
VLLM_COMMIT=b1068903fdca26cf6b4a1a51a32c3365ce3ac636
|
VLLM_COMMIT=52d0cb845866869d587fc013a7c59e60a86ebcf2
|
||||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|||||||
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -42,7 +42,7 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
uses: ./.github/workflows/pre-commit.yml
|
uses: ./.github/workflows/pre-commit.yml
|
||||||
with:
|
with:
|
||||||
vllm: b1068903fdca26cf6b4a1a51a32c3365ce3ac636
|
vllm: 52d0cb845866869d587fc013a7c59e60a86ebcf2
|
||||||
|
|
||||||
changes:
|
changes:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -83,7 +83,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
|
vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
@@ -138,7 +138,7 @@ jobs:
|
|||||||
name: e2e-light
|
name: e2e-light
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
|
vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
|
||||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||||
needs: [lint, changes]
|
needs: [lint, changes]
|
||||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||||
|
|||||||
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
@@ -68,7 +68,7 @@ jobs:
|
|||||||
name: e2e-full
|
name: e2e-full
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
|
vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
||||||
uses: ./.github/workflows/_e2e_test.yaml
|
uses: ./.github/workflows/_e2e_test.yaml
|
||||||
|
|||||||
@@ -7,8 +7,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
|
|||||||
AscendAttentionBackendImpl,
|
AscendAttentionBackendImpl,
|
||||||
AscendAttentionMetadataBuilder,
|
AscendAttentionMetadataBuilder,
|
||||||
AscendAttentionState,
|
AscendAttentionState,
|
||||||
AscendMetadata,
|
AscendMetadata)
|
||||||
CommonAttentionState)
|
|
||||||
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
||||||
|
|
||||||
|
|
||||||
@@ -25,10 +24,6 @@ class TestAscendAttentionBackend(TestBase):
|
|||||||
self.assertEqual(AscendAttentionBackend.get_metadata_cls(),
|
self.assertEqual(AscendAttentionBackend.get_metadata_cls(),
|
||||||
AscendMetadata)
|
AscendMetadata)
|
||||||
|
|
||||||
def test_get_state_cls(self):
|
|
||||||
self.assertEqual(AscendAttentionBackend.get_state_cls(),
|
|
||||||
CommonAttentionState)
|
|
||||||
|
|
||||||
def test_get_builder_cls(self):
|
def test_get_builder_cls(self):
|
||||||
self.assertEqual(AscendAttentionBackend.get_builder_cls(),
|
self.assertEqual(AscendAttentionBackend.get_builder_cls(),
|
||||||
AscendAttentionMetadataBuilder)
|
AscendAttentionMetadataBuilder)
|
||||||
|
|||||||
@@ -201,6 +201,8 @@ def default_moe_config():
|
|||||||
def moe_method(mock_dist_env):
|
def moe_method(mock_dist_env):
|
||||||
moe = MagicMock()
|
moe = MagicMock()
|
||||||
moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
|
moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
|
||||||
|
moe.moe_parallel_config.use_ep = False
|
||||||
|
moe.moe_parallel_config.dp_size = 1
|
||||||
return AscendUnquantizedFusedMoEMethod(moe)
|
return AscendUnquantizedFusedMoEMethod(moe)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -153,6 +153,8 @@ def default_moe_config():
|
|||||||
def moe_method(mock_dist_env):
|
def moe_method(mock_dist_env):
|
||||||
moe = MagicMock()
|
moe = MagicMock()
|
||||||
moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
|
moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
|
||||||
|
moe.moe_parallel_config.use_ep = False
|
||||||
|
moe.moe_parallel_config.dp_size = 1
|
||||||
return TorchairAscendUnquantizedFusedMoEMethod(moe)
|
return TorchairAscendUnquantizedFusedMoEMethod(moe)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import torch.nn as nn
|
|||||||
import torch_npu
|
import torch_npu
|
||||||
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
|
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
|
||||||
AttentionLayer, AttentionType)
|
AttentionLayer, AttentionType)
|
||||||
from vllm.attention.backends.utils import CommonAttentionState
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.forward_context import ForwardContext, get_forward_context
|
from vllm.forward_context import ForwardContext, get_forward_context
|
||||||
from vllm.utils import cdiv, direct_register_custom_op
|
from vllm.utils import cdiv, direct_register_custom_op
|
||||||
@@ -56,10 +55,6 @@ class AscendAttentionBackend(AttentionBackend):
|
|||||||
def get_metadata_cls() -> Type["AscendMetadata"]:
|
def get_metadata_cls() -> Type["AscendMetadata"]:
|
||||||
return AscendMetadata
|
return AscendMetadata
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_state_cls() -> Type["CommonAttentionState"]:
|
|
||||||
return CommonAttentionState
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]:
|
def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]:
|
||||||
return AscendAttentionMetadataBuilder
|
return AscendAttentionMetadataBuilder
|
||||||
|
|||||||
Reference in New Issue
Block a user