[Bugfix] Follow vLLM Qwen-Moe/VL and KV Connector change to fix broken CI (#2181)

### What this PR does / why we need it? This pr fix broken CI: 1. Fix the ee2eb6ecd8 changes, in this commit, they fused the gate and up projections in the vision MLP, This can improve performance by reducing one matrix multiplication. so, this pr do the following things: - Specify that the two linear layers are fused as `mlp.gate_up_proj` when loading the weights. - Use a SiluAndMul activation function. 2. Fix aefeea0fde, Update ModelRunnerOutput parameters to adapt to its changes 3. Fix [vllm-commit](https://github.com/vllm-project/vllm/pull/20815/files#diff-3ffb829a39ab2b3e4706aa28f5e476815f36c3a87b98d6a66514ebedc8f3ffb4R354-R356), fix qwen moe ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.10.0 - vLLM main: fed5849d3f --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-08-04 21:37:50 +08:00
parent e38fab011d
commit ad366bf908
8 changed files with 137 additions and 56 deletions
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -209,12 +209,27 @@ class NPUWorker(WorkerBase):
            if not has_kv_transfer_group():
                return None

-            new_output = EMPTY_MODEL_RUNNER_OUTPUT
-            if output.finished_sending or output.finished_recving:
-                new_output = copy.copy(new_output)
-                new_output.finished_sending = output.finished_sending
-                new_output.finished_recving = output.finished_recving
-            output = new_output
+            is_legacy = vllm_version_is("0.10.0")
+
+            if is_legacy:
+                finished_sending = output.finished_sending
+                finished_recving = output.finished_recving
+            else:
+                kv_connector_output = output.kv_connector_output
+                finished_sending = kv_connector_output.finished_sending
+                finished_recving = kv_connector_output.finished_recving
+
+            if not finished_sending and not finished_recving:
+                return EMPTY_MODEL_RUNNER_OUTPUT
+
+            new_output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
+
+            if is_legacy:
+                new_output.finished_sending = finished_sending
+                new_output.finished_recving = finished_recving
+            else:
+                new_output.kv_connector_output = kv_connector_output
+            return new_output

        assert isinstance(output, ModelRunnerOutput)
        return output