[BugFix]Fix precision issue for LoRA feature (#4141)
vLLM version: v0.11.0
vLLM main: vllm-project/vllm
### What this PR does / why we need it?
Fix the precision issue of the LoRA feature in vllm-ascend.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
```bash
pytest tests/lora/test_llama_tp.py::test_llama_lora -s
```
<img width="1319" height="879" alt="lora_test"
src="https://github.com/user-attachments/assets/2a0b2325-5b05-4bbc-ac03-a7c9f0ad9d4c"
/>
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: hukongyi <hukongyi@cmbchina.com>
This commit is contained in:
@@ -342,7 +342,7 @@ private:
|
||||
|
||||
// declare all dtype kernel
|
||||
BGMV_EXPAND_TYPE_DECLARE(half)
|
||||
#if (__CCE_AICORE__ >= 220)
|
||||
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
|
||||
BGMV_EXPAND_TYPE_DECLARE(bfloat16_t)
|
||||
#endif
|
||||
|
||||
@@ -356,8 +356,8 @@ extern void bgmv_expand_impl(AscendType type, void* stream, void* x, void* weigh
|
||||
bgmv_expand_half<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize, numTokensPerCore,
|
||||
maxLoRARank, outputHiddenDim, sliceOffset, outputFullDim);
|
||||
} else if (type == AscendType::BF16) {
|
||||
#if (__CCE_AICORE__ >= 220)
|
||||
bgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize,
|
||||
#if !defined(__CCE_AICORE__) || (__CCE_AICORE__ >= 220)
|
||||
bgmv_expand_bfloat16_t<<<blockDim, nullptr, stream>>>(x, weight, indices, indicesSize, yIn, yOut, batchSize,
|
||||
numTokensPerCore, maxLoRARank, outputHiddenDim,
|
||||
sliceOffset, outputFullDim);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user