[CI] Upgrade vLLM to 20250919 (6d8246aa) and fix some broken issue (#2907)

### What this PR does / why we need it? 1. This pr bump vllm commit to 6d8246aaff 2. fix upstream changes https://github.com/vllm-project/vllm/pull/24548 abort multi-modal kwargs, make vllm main and `v0.10.2` both adaptable 3. fix metadata_builder changes introduced by https://github.com/vllm-project/vllm/pull/23693 4. fix `structured_outputs_config` changes introduced by https://github.com/vllm-project/vllm/pull/22772 5. fix `moe_config` changes introduced by https://github.com/vllm-project/vllm/pull/22537 Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: Yikun Jiang <yikunkero@gmail.com> - vLLM version: v0.10.2 - vLLM main: c60e6137f0 --------- Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
2025-09-20 17:37:57 +08:00
parent 53ecd89e8f
commit 12bcbd02bb
14 changed files with 359 additions and 143 deletions
--- a/tests/e2e/singlecard/test_guided_decoding.py
+++ b/tests/e2e/singlecard/test_guided_decoding.py
@@ -18,12 +18,20 @@
 #
 import json
 import os
+from typing import Any, Dict

 import jsonschema
 import pytest
 import regex as re
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.10.2"):
+    from vllm.sampling_params import GuidedDecodingParams, SamplingParams
+else:
+    from vllm.sampling_params import SamplingParams, StructuredOutputsParams
+
 from vllm.outputs import RequestOutput
-from vllm.sampling_params import GuidedDecodingParams, SamplingParams

 from tests.e2e.conftest import VllmRunner

@@ -84,16 +92,29 @@ def sample_json_schema():
@pytest.mark.parametrize("guided_decoding_backend", GuidedDecodingBackend)
 def test_guided_json_completion(guided_decoding_backend: str,
                                sample_json_schema):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=500,
-        guided_decoding=GuidedDecodingParams(json=sample_json_schema))
-
-    with VllmRunner(
-            MODEL_NAME,
-            seed=0,
-            guided_decoding_backend=guided_decoding_backend,
-    ) as vllm_model:
+    runner_kwargs: Dict[str, Any] = {}
+    if vllm_version_is("0.10.2"):
+        sampling_params = SamplingParams(
+            temperature=1.0,
+            max_tokens=500,
+            guided_decoding=GuidedDecodingParams(json=sample_json_schema))
+        runner_kwargs = {
+            "seed": 0,
+            "guided_decoding_backend": guided_decoding_backend,
+        }
+    else:
+        sampling_params = SamplingParams(
+            temperature=1.0,
+            max_tokens=500,
+            structured_outputs=StructuredOutputsParams(
+                json=sample_json_schema))
+        runner_kwargs = {
+            "seed": 0,
+            "structured_outputs_config": {
+                "backend": guided_decoding_backend
+            },
+        }
+    with VllmRunner(MODEL_NAME, **runner_kwargs) as vllm_model:
        prompts = [
            f"Give an example JSON for an employee profile "
            f"that fits this schema: {sample_json_schema}"
@@ -121,17 +142,29 @@ def test_guided_json_completion(guided_decoding_backend: str,
 def test_guided_regex(guided_decoding_backend: str, sample_regex):
    if guided_decoding_backend == "outlines":
        pytest.skip("Outlines doesn't support regex-based guided decoding.")
+    runner_kwargs: Dict[str, Any] = {}
+    if vllm_version_is("0.10.2"):
+        sampling_params = SamplingParams(
+            temperature=0.8,
+            top_p=0.95,
+            guided_decoding=GuidedDecodingParams(regex=sample_regex))
+        runner_kwargs = {
+            "seed": 0,
+            "guided_decoding_backend": guided_decoding_backend,
+        }
+    else:
+        sampling_params = SamplingParams(
+            temperature=0.8,
+            top_p=0.95,
+            structured_outputs=StructuredOutputsParams(regex=sample_regex))
+        runner_kwargs = {
+            "seed": 0,
+            "structured_outputs_config": {
+                "backend": guided_decoding_backend
+            },
+        }

-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(regex=sample_regex))
-
-    with VllmRunner(
-            MODEL_NAME,
-            seed=0,
-            guided_decoding_backend=guided_decoding_backend,
-    ) as vllm_model:
+    with VllmRunner(MODEL_NAME, **runner_kwargs) as vllm_model:
        prompts = [
            f"Give an example IPv4 address with this regex: {sample_regex}"
        ] * 2
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -231,6 +231,9 @@ class MockFusedMoEMethod(FusedMoEMethodBase):
              expert_weights: torch.Tensor) -> torch.Tensor:
        pass

+    def get_fused_moe_quant_config(self, layer: torch.nn.Module):
+        pass
+

 class TestAscendFusedMoe:

--- a/tests/ut/torchair/ops/test_torchair_fused_moe.py
+++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py
@@ -197,6 +197,9 @@ class MockFusedMoEMethod(FusedMoEMethodBase):
              expert_weights: torch.Tensor) -> torch.Tensor:
        pass

+    def get_fused_moe_quant_config(self, layer: torch.nn.Module):
+        pass
+

 class TestTorchairAscendFusedMoe: