forked from EngineX-Cambricon/enginex-mlu370-vllm
add ops
This commit is contained in:
34
torch_mlu_ops-v1.3.2/csrc/kernels/preload.mluh
Normal file
34
torch_mlu_ops-v1.3.2/csrc/kernels/preload.mluh
Normal file
@@ -0,0 +1,34 @@
|
||||
/*************************************************************************
|
||||
* Copyright (C) [2023-2024] by Cambricon, Inc.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*************************************************************************/
|
||||
#ifndef CSRC_KERNELS_PRELOAD_MLUH_
|
||||
#define CSRC_KERNELS_PRELOAD_MLUH_
|
||||
|
||||
#include "cnnl.h"
|
||||
#include "kernel_utils.h"
|
||||
namespace tmo {
|
||||
/**
|
||||
* @brief When tp is greater than 1, while executing reducesum, the weight of ffn
|
||||
* or selfattention to be calculated is loaded into LLC in advance.
|
||||
* @param queue: The queue for mlu.
|
||||
* @param filter_ptr: Input. Pointer to the MLU memory that stores the weight of ffn or
|
||||
* selfattention.
|
||||
* @param filter_size: The weight size of ffn or selfattention.
|
||||
* @param preload_size: The size of the preload weight.
|
||||
* @note The weights of ffn or selfattention must be continuous in filter_ptr.
|
||||
*/
|
||||
KernelStatus invokePreload(cnrtQueue_t queue,
|
||||
void *filter_ptr,
|
||||
size_t filter_size,
|
||||
size_t preload_size);
|
||||
} // namespace tmo
|
||||
|
||||
#endif // CSRC_KERNELS_PRELOAD_MLUH_
|
||||
Reference in New Issue
Block a user