[CustomOp] support TensorList for dispatchFFNCombine (#5665)
### What this PR does / why we need it?
To support tensorList for dispatch_ffn_combine, to adjust eplb
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
Single Operator Testing
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: lhchg <lhao_cheng@163.com>
Co-authored-by: lihaocheng <lihaosheng1@h-partners.com>
This commit is contained in:
@@ -100,6 +100,7 @@ private:
|
||||
int32_t expertPerRank;
|
||||
int32_t maxOutputSize;
|
||||
int32_t EP;
|
||||
int32_t listLen;
|
||||
|
||||
optiling::MoeInitRoutingQuantV2TilingData moeInitRoutingQuantV2TilingData;
|
||||
uint64_t initRoutingQuantTilingKey;
|
||||
@@ -138,6 +139,7 @@ __aicore__ inline void DispatchFFNCombine<TemplateMMA2ACFunc>::Init(GM_ADDR xGM,
|
||||
topK = tilingData.dispatchFFNCombineInfo.topK;
|
||||
expertPerRank = tilingData.dispatchFFNCombineInfo.expertPerRank;
|
||||
maxOutputSize = tilingData.dispatchFFNCombineInfo.maxOutputSize;
|
||||
listLen = tilingData.dispatchFFNCombineInfo.listLen;
|
||||
|
||||
m0 = tilingData.cocTiling.m0;
|
||||
k0 = tilingData.cocTiling.k0;
|
||||
@@ -254,7 +256,7 @@ __aicore__ inline void DispatchFFNCombine<TemplateMMA2ACFunc>::Process()
|
||||
uint32_t epilogueGranularity = expertPerRank - 1;
|
||||
|
||||
typename MatmulKernel::Params params{
|
||||
problemShape, static_cast<uint32_t>(EP), static_cast<uint32_t>(expertPerRank), static_cast<uint32_t>(maxOutputSize),
|
||||
problemShape, static_cast<uint32_t>(EP), static_cast<uint32_t>(listLen), static_cast<uint32_t>(expertPerRank), static_cast<uint32_t>(maxOutputSize),
|
||||
static_cast<uint32_t>(rank), static_cast<uint32_t>(rankSize),
|
||||
static_cast<uint32_t>(topK), initRoutingQuantTilingKey,
|
||||
epilogueCoreNum, epilogueGranularity,
|
||||
|
||||
Reference in New Issue
Block a user