[1/2] Speed up prefill mla attention (#10156)

This commit is contained in:
fzyzcjy
2025-09-09 00:00:33 +08:00
committed by GitHub
parent 2c2b19b18b
commit 0096798ed6
6 changed files with 130 additions and 0 deletions

View File

@@ -723,3 +723,4 @@ std::vector<int64_t> create_greenctx_stream_by_value(int64_t smA, int64_t smB, i
void store_kv_cache(at::Tensor k_cache, at::Tensor v_cache, at::Tensor out_loc, at::Tensor k, at::Tensor v);
void copy_to_gpu_no_ce(const at::Tensor& input, at::Tensor& output);
void concat_mla_k(torch::Tensor k, torch::Tensor k_nope, torch::Tensor k_rope);