longcontext chunk make attention crash, fix it (#117)

Co-authored-by: root <root@rdtest-node1150.bcc-zwlt.baidu.com>
2026-01-17 18:38:23 +08:00
parent 71a5a04e0a
commit 2512259944
3 changed files with 37 additions and 6 deletions
--- a/vllm_kunlun/ops/attention/merge_attn_states.py
+++ b/vllm_kunlun/ops/attention/merge_attn_states.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Optional
+
+import torch
+import xtorch_ops
+from vllm.platforms import current_platform
+
+
+def merge_attn_states(
+    output: torch.Tensor,
+    prefix_output: torch.Tensor,
+    prefix_lse: torch.Tensor,
+    suffix_output: torch.Tensor,
+    suffix_lse: torch.Tensor,
+    output_lse: Optional[torch.Tensor] = None,
+) -> None:
+
+    return xtorch_ops.attention_merge_stage(
+        prefix_output,
+        prefix_lse,
+        suffix_output,
+        suffix_lse,
+        output,
+        output_lse
+    )