[Lint] Fix mypy issue to make CI happy (#6272)
### What this PR does / why we need it?
The variables `self.prefiller_heap` `self.decoder_heap` are used as
`List[tuple[float, int, ServerState]]` but defined as `List[tuple[int,
int, ServerState]]`, which leads to the failed of mypy, see
https://github.com/vllm-project/vllm-ascend/actions/runs/21351411010/job/61448739554?pr=6265
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.14.1
- vLLM main:
d68209402d
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -209,8 +209,8 @@ class ProxyState:
|
||||
# Initialize priority queues for efficient server selection
|
||||
# Each entry is (priority_score, server_index, server_reference)
|
||||
# Lower priority score = higher priority (less loaded)
|
||||
self.prefiller_heap = [(0, i, server) for i, server in enumerate(self.prefillers)]
|
||||
self.decoder_heap = [(0, i, server) for i, server in enumerate(self.decoders)]
|
||||
self.prefiller_heap = [(0.0, i, server) for i, server in enumerate(self.prefillers)]
|
||||
self.decoder_heap = [(0.0, i, server) for i, server in enumerate(self.decoders)]
|
||||
heapq.heapify(self.prefiller_heap)
|
||||
heapq.heapify(self.decoder_heap)
|
||||
|
||||
@@ -221,7 +221,7 @@ class ProxyState:
|
||||
priority = server.active_tokens + server.active_kv_cache * 0.3
|
||||
# Remove old entry and add new one
|
||||
self.prefiller_heap = [(p, i, s) for p, i, s in self.prefiller_heap if i != server_idx]
|
||||
heapq.heappush(self.prefiller_heap, (priority, server_idx, server)) # type: ignore
|
||||
heapq.heappush(self.prefiller_heap, (priority, server_idx, server))
|
||||
|
||||
def _update_decoder_priority(self, server_idx: int):
|
||||
"""Update the priority of a decoder server in the heap."""
|
||||
@@ -229,7 +229,7 @@ class ProxyState:
|
||||
priority = server.active_tokens
|
||||
# Remove old entry and add new one
|
||||
self.decoder_heap = [(p, i, s) for p, i, s in self.decoder_heap if i != server_idx]
|
||||
heapq.heappush(self.decoder_heap, (priority, server_idx, server)) # type: ignore
|
||||
heapq.heappush(self.decoder_heap, (priority, server_idx, server))
|
||||
|
||||
def abort_prefiller_request(self, server_idx: int, request_id): # Changed to synchronous
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user