Upgrade to new vllm commit (#3719)

### What this PR does / why we need it?
Upgrade to new vllm commit:
c9461e05a4

- Fix many imports, caused by
https://github.com/vllm-project/vllm/pull/26908
- Fix import ```sha256```, caused by
https://github.com/vllm-project/vllm/pull/27169
- Remove ```SchedulerConfig.send_delta_data```, caused by
https://github.com/vllm-project/vllm/pull/27142
- Fix ```FusedMoE``` because of dual stream execution, caused by
https://github.com/vllm-project/vllm/pull/26440

### Does this PR introduce _any_ user-facing change?
N/A

### How was this patch tested?
CI passed with new added/existing test.


- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993

---------

Signed-off-by: MengqingCao <cmq0113@163.com>
Signed-off-by: Icey <1790571317@qq.com>
Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Icey
2025-10-25 15:36:32 +08:00
committed by GitHub
parent 226f832c0b
commit d9cdc65854
37 changed files with 229 additions and 71 deletions

View File

@@ -45,7 +45,6 @@ from vllm.inputs import TextPrompt
from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
from vllm.transformers_utils.utils import maybe_model_redirect
from vllm.utils import get_open_port
from tests.e2e.model_utils import (TokensTextLogprobs,
TokensTextLogprobsPromptLogprobs)
@@ -55,6 +54,12 @@ from vllm_ascend.ascend_config import clear_ascend_config
# we not explicitly patch here, some of them might be effectiveless
# in pytest scenario
from vllm_ascend.utils import adapt_patch # noqa E402
from vllm_ascend.utils import vllm_version_is
if vllm_version_is("0.11.0"):
from vllm.utils import get_open_port
else:
from vllm.utils.network_utils import get_open_port
adapt_patch(True)
adapt_patch(False)

View File

@@ -19,9 +19,14 @@ from typing import Any
import openai
import pytest
from vllm.utils import get_open_port
from tests.e2e.conftest import RemoteOpenAIServer
from vllm_ascend.utils import vllm_version_is
if vllm_version_is("0.11.0"):
from vllm.utils import get_open_port
else:
from vllm.utils.network_utils import get_open_port
MODELS = [
"Qwen/Qwen3-30B-A3B",

View File

@@ -18,10 +18,15 @@ from typing import Any
import openai
import pytest
from vllm.utils import get_open_port
from tests.e2e.conftest import RemoteOpenAIServer
from tools.aisbench import run_aisbench_cases
from vllm_ascend.utils import vllm_version_is
if vllm_version_is("0.11.0"):
from vllm.utils import get_open_port
else:
from vllm.utils.network_utils import get_open_port
MODELS = [
"Qwen/Qwen3-32B",

View File

@@ -21,11 +21,16 @@ import gc
import torch
from vllm import SamplingParams
from vllm.utils import GiB_bytes
from tests.e2e.conftest import VllmRunner
from tests.e2e.utils import fork_new_process_for_each_test
from vllm_ascend.device_allocator.camem import CaMemAllocator
from vllm_ascend.utils import vllm_version_is
if vllm_version_is("0.11.0"):
from vllm.utils import GiB_bytes
else:
from vllm.utils.mem_constants import GiB_bytes
@fork_new_process_for_each_test