[fix]: fix precision issue in dispatch_ffn_combine_bf16 and remove redundant sync (#7198)
### What this PR does / why we need it?
Fix the precision issue in dispatch_ffn_combine_bf16 operator.
Remove redundant synchronization operations in dispatch_ffn_combine
operator.
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: guanguan0308 <1546542263@qq.com>
This commit is contained in:
@@ -391,7 +391,6 @@ private:
|
||||
uint16_t syncgmmIdx = 0;
|
||||
AscendC::CrossCoreWaitFlag<0x2>(syncgmmIdx / CROSS_CORE_FLAG_MAX_SET_COUNT); // Wait for AIV to finish cumsum for matmul
|
||||
syncgmmIdx++;
|
||||
AscendC::PipeBarrier<PIPE_ALL>();
|
||||
|
||||
for (uint32_t groupIdx = 0; groupIdx < params.expertPerRank; ++groupIdx) {
|
||||
uint32_t currentM = cumsumMM((params.EP - 1) * params.expertPerRank + groupIdx);
|
||||
@@ -405,7 +404,6 @@ private:
|
||||
int32_t arrayGroupIdx = params.listLen == 1 ? 0 : groupIdx;
|
||||
gmB1.SetGlobalBuffer(reinterpret_cast<__gm__ ElementB *>(GetTensorAddr<int8_t>(arrayGroupIdx, params.ptrB1)));
|
||||
gmS.SetGlobalBuffer(reinterpret_cast<__gm__ ElementScale *>(GetTensorAddr<int64_t>(arrayGroupIdx, params.ptrScale1)));
|
||||
AscendC::PipeBarrier<PIPE_ALL>();
|
||||
if (currentM <= L1TileShape::M) {
|
||||
gmB1.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
|
||||
}
|
||||
@@ -493,8 +491,6 @@ private:
|
||||
|
||||
uint32_t startCoreIdx = 0;
|
||||
|
||||
AscendC::PipeBarrier<PIPE_ALL>();
|
||||
|
||||
int64_t preCurrentmSum = 0;
|
||||
int32_t syncLoopIdx = -1;
|
||||
uint32_t lastDequantExpertNum = params.expertPerRank;
|
||||
@@ -503,8 +499,6 @@ private:
|
||||
lastDequantExpertNum = params.expertPerRank - params.epilogueGranularity;
|
||||
}
|
||||
|
||||
AscendC::PipeBarrier<PIPE_ALL>();
|
||||
|
||||
for (uint32_t groupIdx = 0; groupIdx < params.expertPerRank; ++groupIdx) {
|
||||
uint32_t currentM = cumsumMM((params.EP - 1) * params.expertPerRank + groupIdx);
|
||||
if (preCurrentmSum >= params.maxOutputSize) {
|
||||
|
||||
Reference in New Issue
Block a user