[bugfix] Fix mooncake kvpool accuracy issue (#4976)
### What this PR does / why we need it?
The current KVPool has a accuracy issue
https://github.com/vllm-project/vllm-ascend/issues/4412. This PR aims to
fix the precision problem without impacting prefill performance.
Note:Due to a bug in ADXL, calling `current_event.synchronize()` may
occasionally hang. This issue will be fixed in Cann version 8.5.rc1. You
can manually build the master branch of the project at
https://gitcode.com/cann/hixl to resolve this issue before the 8.5.RC1
release.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: LCAIZJ <leichao139636@163.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import \
|
||||
KVConnectorMetadata
|
||||
from vllm.logger import logger
|
||||
@@ -284,6 +285,8 @@ class ReqMeta:
|
||||
|
||||
is_last_chunk: Optional[bool] = None
|
||||
|
||||
current_event: Optional[torch.npu.Event] = None
|
||||
|
||||
@staticmethod
|
||||
def from_request_tracker(
|
||||
tracker: RequestTracker,
|
||||
@@ -375,3 +378,4 @@ class LasyerMultiBlockReqMeta:
|
||||
block_ids: list[int]
|
||||
layer_id: int
|
||||
is_last_chunk: Optional[bool] = True
|
||||
current_event: Optional[torch.npu.Event] = None
|
||||
Reference in New Issue
Block a user