[Misc] Upgrade vllm hash to 12_14 (#5000)

### What this PR does / why we need it? ### Does this PR introduce _any_ user-facing change? 1. fix https://github.com/vllm-project/vllm/pull/27938 2. fix https://github.com/vllm-project/vllm/pull/27145 pooling models now supports chunked prefill and prefix caching, 3. fix https://github.com/vllm-project/vllm/pull/30181 define the CPU fields in the field config where they really belong. 4. fix https://github.com/vllm-project/vllm/pull/28168 define the CPU fields in the field config where they really belong. 5. fix https://github.com/vllm-project/vllm/pull/30201 some moudle rename 6. fix https://github.com/vllm-project/vllm/pull/29067 fusedmoe moudle refactor 7. fix https://github.com/vllm-project/vllm/pull/29066 fusedmoe moudle refactor 8. fix https://github.com/vllm-project/vllm/pull/29624 ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-12-15 19:54:23 +08:00
parent 3b7eb5179f
commit 8d2998d0e4
17 changed files with 167 additions and 1183 deletions
--- a/tests/ut/compilation/test_acl_graph.py
+++ b/tests/ut/compilation/test_acl_graph.py
@@ -803,7 +803,9 @@ class TestPCPDCPGraphParams(TestBase):
            (q_nope, q_pe, k_nope, k_pe, block_table, seq_lens, num_heads,
             scale, num_kv_heads, out, lse))

-        update_mla_attn_dcp_pcp_params(self.update_stream, forward_context, 4)
+        with patch("torch_npu._C._npu_setStream", return_value=None):
+            update_mla_attn_dcp_pcp_params(self.update_stream, forward_context,
+                                           4)

        _mock_graph_task_end.assert_called_once()

@@ -842,6 +844,7 @@ class TestPCPDCPGraphParams(TestBase):
             block_table, 128, actual_seq_lengths_kv, actual_seq_lengths_q,
             out, lse, 2, 0, 0))

-        update_attn_dcp_pcp_params(self.update_stream, forward_context, 4)
+        with patch("torch_npu._C._npu_setStream", return_value=None):
+            update_attn_dcp_pcp_params(self.update_stream, forward_context, 4)

        _mock_graph_task_end.assert_called_once()