forked from EngineX-Cambricon/enginex-mlu370-vllm
23 lines
964 B
Plaintext
23 lines
964 B
Plaintext
#include "swap_blocks.mluh"
|
|
|
|
namespace tmo {
|
|
KernelStatus invokeSwapBlocksKernel(const cnnlHandle_t handle,
|
|
void *dst,
|
|
const void *src,
|
|
const int64_t &block_size_in_bytes,
|
|
const cnrtMemTransDir_t &memcpy_type,
|
|
const std::map<int64_t, int64_t> &block_mapping) {
|
|
cnrtQueue_t queue;
|
|
cnnlGetQueue(handle, &queue);
|
|
for (const auto &pair : block_mapping) {
|
|
int64_t src_block_number = pair.first;
|
|
int64_t dst_block_number = pair.second;
|
|
int64_t src_offset = src_block_number * block_size_in_bytes;
|
|
int64_t dst_offset = dst_block_number * block_size_in_bytes;
|
|
cnrtMemcpyAsync((int8_t *)dst + dst_offset, (int8_t *)src + src_offset, block_size_in_bytes,
|
|
queue, memcpy_type);
|
|
}
|
|
return KernelStatus::KERNEL_STATUS_SUCCESS;
|
|
}
|
|
} // namespace tmo
|