### What this PR does / why we need it?
1.Add the implementation of normal Aclnn operators: MoeCombineNormal,
MoeDispatchNormal, NotifyDispatch,and DispatchLayout.
- MoeCombineNormal: Implements the combine logic within MoE operations.
- MoeDispatchNormal: Implements the dispatch logic within MoE
operations.
- NotifyDispatch: Exchanges topk_idx information among different ranks
to calculate the device memory required for the dispatch stage.
- DispatchLayout: Used to calculate information related to the device
memory layout for the dispatch stage.
2.Provide PyTorch interfaces for normal operators—get_dispatch_layout,
dispatch_prefill, and combine_prefill—to be used for MoE communication
during the prefill stage in vLLM.
- get_dispatch_layout: Calculates information related to the device
memory layout for the dispatch operator, and is called before
dispatch_prefill.
- dispatch_prefill: Initiates the dispatch operation.
- combine_prefill: Initiates the combine operation.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
The functionality has already been validated using the local Qwen model.
Test cases will be added after support for multi-NPU use cases in the CI
pipeline is finalized.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: shiro-zzzz <zhangdianhao@huawei.com>
56 lines
2.1 KiB
C++
56 lines
2.1 KiB
C++
#pragma once
|
|
|
|
#include "kernels/types.h"
|
|
#include <c10/core/ScalarType.h>
|
|
#include <Python.h>
|
|
|
|
#define _CONCAT(A, B) A##B
|
|
#define CONCAT(A, B) _CONCAT(A, B)
|
|
|
|
#define _STRINGIFY(A) #A
|
|
#define STRINGIFY(A) _STRINGIFY(A)
|
|
|
|
// A version of the TORCH_LIBRARY macro that expands the NAME, i.e. so NAME
|
|
// could be a macro instead of a literal token.
|
|
#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
|
|
|
|
// A version of the TORCH_LIBRARY_IMPL macro that expands the NAME, i.e. so NAME
|
|
// could be a macro instead of a literal token.
|
|
#define TORCH_LIBRARY_IMPL_EXPAND(NAME, DEVICE, MODULE) \
|
|
TORCH_LIBRARY_IMPL(NAME, DEVICE, MODULE)
|
|
|
|
// REGISTER_EXTENSION allows the shared library to be loaded and initialized
|
|
// via python's import statement.
|
|
#define REGISTER_EXTENSION(NAME) \
|
|
PyMODINIT_FUNC CONCAT(PyInit_, NAME)() { \
|
|
static struct PyModuleDef module = {PyModuleDef_HEAD_INIT, \
|
|
STRINGIFY(NAME), nullptr, 0, nullptr}; \
|
|
return PyModule_Create(&module); \
|
|
}
|
|
|
|
class TrochBindException : public std::exception
|
|
{
|
|
private:
|
|
std::string message = {};
|
|
|
|
public:
|
|
explicit TrochBindException(const char *name, const char *file, const int line, const std::string &error)
|
|
{
|
|
message = std::string("Failed: ") + name + " error " + file + ":" + std::to_string(line) +
|
|
" error message or error code is '" + error + "'";
|
|
}
|
|
|
|
const char *what() const noexcept override
|
|
{
|
|
return message.c_str();
|
|
}
|
|
};
|
|
|
|
#define TORCH_BIND_ASSERT(cond) \
|
|
; \
|
|
do { \
|
|
if (not(cond)) { \
|
|
throw TrochBindException("Assertion", __FILE__, __LINE__, #cond); \
|
|
} \
|
|
} while (0)
|