longcontext chunk make attention crash, fix it (#117)

Co-authored-by: root <root@rdtest-node1150.bcc-zwlt.baidu.com>
This commit is contained in:
baoqian426
2026-01-17 18:38:23 +08:00
committed by GitHub
parent 71a5a04e0a
commit 2512259944
3 changed files with 37 additions and 6 deletions

View File

@@ -17,7 +17,8 @@ def _custom_import(module_name, globals=None, locals=None, fromlist=(), level=0)
"vllm.v1.sample.ops.topk_topp_sampler": "vllm_kunlun.v1.sample.ops.topk_topp_sampler",
"vllm.model_executor.layers.sampler": "vllm_kunlun.ops.sample.sampler",
"vllm.v1.sample.ops.topk_topp_sampler": "vllm_kunlun.v1.sample.ops.topk_topp_sampler",
"vllm.v1.sample.rejection_sampler": "vllm_kunlun.v1.sample.rejection_sampler"
"vllm.v1.sample.rejection_sampler": "vllm_kunlun.v1.sample.rejection_sampler",
"vllm.attention.ops.merge_attn_states": "vllm_kunlun.ops.attention.merge_attn_states"
}
if module_name in module_mappings: