[Build] Add support for Ascend950 chip (#7151)
### What this PR does / why we need it?
This PR adds support for the Ascend950 chip. This includes:
- Updating build scripts (`CMakeLists.txt` and `setup.py`) to recognize
the Ascend950 chip and set appropriate compilation flags.
- Disabling a set of custom operators that are not yet supported on the
Ascend950 hardware target.
- Performing a codebase-wide refactoring of `pipe_barrier()` calls to
the namespaced `AscendC::PipeBarrier<>()` for improved code consistency
and adherence to the latest API standards.
Ascend950DT e2e passed (Qwen3-32B-MXFP8) and CI passed
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
@@ -187,7 +187,7 @@ private:
|
||||
}
|
||||
} else {
|
||||
Cast(xDup, xLocal, AscendC::RoundMode::CAST_NONE, maxLoRARank_);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
|
||||
for (int32_t i = maxLoRARank_; i < NUM_ELEMENTS_PER_REPEAT; i += maxLoRARank_) {
|
||||
for (int32_t j = 0; j < maxLoRARank_; j++) {
|
||||
@@ -219,15 +219,15 @@ private:
|
||||
AscendC::LocalTensor<Y_T> yInLocal = inQueueY_.DeQue<Y_T>();
|
||||
AscendC::LocalTensor<float> yInLocalFP32 = inBufferY_.Get<float>();
|
||||
Cast(yInLocalFP32, yInLocal, AscendC::RoundMode::CAST_NONE, numElements);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
inQueueY_.FreeTensor(yInLocal);
|
||||
|
||||
Add(yLocal, yLocal, yInLocalFP32, numElements);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
|
||||
AscendC::LocalTensor<Y_T> yOutLocal = outQueueY_.AllocTensor<Y_T>();
|
||||
Cast(yOutLocal, yLocal, AscendC::RoundMode::CAST_RINT, numElements);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
|
||||
outQueueY_.EnQue<Y_T>(yOutLocal);
|
||||
}
|
||||
@@ -243,40 +243,40 @@ private:
|
||||
AscendC::LocalTensor<float> wTmpTensor = tmpBufferW_.Get<float>();
|
||||
|
||||
Cast(wTmpTensor, wLocal, AscendC::RoundMode::CAST_NONE, MASK_COUNT, blockReduceRepeatCount, castParams_);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
inQueueW_.FreeTensor(wLocal);
|
||||
|
||||
Mul(wTmpTensor, xDup, wTmpTensor, MASK_COUNT, blockReduceRepeatCount, dotProductParams_);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
|
||||
if (maxLoRARank_ == LORA_RANK_8) {
|
||||
BlockReduceSum(yLocal[progress], wTmpTensor, blockReduceRepeatCount, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
} else if (maxLoRARank_ == LORA_RANK_16) {
|
||||
BlockReduceSum(wTmpTensor, wTmpTensor, blockReduceRepeatCount, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
PairReduceSum(yLocal[progress], wTmpTensor, pairReduceRepeat16, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
} else if (maxLoRARank_ == LORA_RANK_32) {
|
||||
BlockReduceSum(wTmpTensor, wTmpTensor, blockReduceRepeatCount, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
PairReduceSum(wTmpTensor, wTmpTensor, pairReduceRepeat16, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
PairReduceSum(yLocal[progress], wTmpTensor, pairReduceRepeat32, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
} else if (maxLoRARank_ == LORA_RANK_64) {
|
||||
BlockReduceSum(wTmpTensor, wTmpTensor, blockReduceRepeatCount, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
BlockReduceSum(yLocal[progress], wTmpTensor, pairReduceRepeat16, MASK_COUNT,
|
||||
reduceSumParams_.dstRepStride, reduceSumParams_.srcBlkStride, reduceSumParams_.srcRepStride);
|
||||
pipe_barrier(PIPE_V);
|
||||
AscendC::PipeBarrier<PIPE_V>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user