enginex-mlu370-vllm/torch_mlu_ops-v1.3.2/csrc/kernels/preload.mluh

/*************************************************************************
 * Copyright (C) [2023-2024] by Cambricon, Inc.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *************************************************************************/
#ifndef CSRC_KERNELS_PRELOAD_MLUH_
#define CSRC_KERNELS_PRELOAD_MLUH_

#include "cnnl.h"
#include "kernel_utils.h"
namespace tmo {
/**
 * @brief When tp is greater than 1, while executing reducesum, the weight of ffn
 *        or selfattention to be calculated is loaded into LLC in advance.
 * @param queue: The queue for mlu.
 * @param filter_ptr: Input. Pointer to the MLU memory that stores the weight of ffn or
 * selfattention.
 * @param filter_size: The weight size of ffn or selfattention.
 * @param preload_size: The size of the preload weight.
 * @note The weights of ffn or selfattention must be continuous in filter_ptr.
 */
KernelStatus invokePreload(cnrtQueue_t queue,
                           void *filter_ptr,
                           size_t filter_size,
                           size_t preload_size);
}  // namespace tmo

#endif  // CSRC_KERNELS_PRELOAD_MLUH_