[Misc] Fix main lint to make CI happy (#7204)
### What this PR does / why we need it?
Fix lint failed due to the merging of a previous PR.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
1
.github/workflows/_pre_commit.yml
vendored
1
.github/workflows/_pre_commit.yml
vendored
@@ -76,7 +76,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
PYTHONPATH="$PYTHONPATH:$(pwd)/vllm-empty"
|
PYTHONPATH="$PYTHONPATH:$(pwd)/vllm-empty"
|
||||||
export PYTHONPATH
|
export PYTHONPATH
|
||||||
env
|
|
||||||
git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
|
git config --global --add safe.directory /__w/vllm-ascend/vllm-ascend
|
||||||
# Run mypy for Python 3.10, 3.11, 3.12 manually
|
# Run mypy for Python 3.10, 3.11, 3.12 manually
|
||||||
# Note: We are now separating mypy from pre-commit hooks for performance reasons.
|
# Note: We are now separating mypy from pre-commit hooks for performance reasons.
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ from contextlib import contextmanager, nullcontext
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch_npu
|
import torch_npu
|
||||||
|
from vllm.config import CUDAGraphMode
|
||||||
from vllm.logger import logger
|
from vllm.logger import logger
|
||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, MambaSpec
|
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, MambaSpec
|
||||||
@@ -103,7 +104,14 @@ class NPUModelRunner310(NPUModelRunner):
|
|||||||
num_encoder_reqs=num_encoder_reqs,
|
num_encoder_reqs=num_encoder_reqs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _pad_query_start_loc_for_fia(self, num_tokens_padded: int, num_reqs_padded: int, num_reqs: int) -> int:
|
def _pad_query_start_loc_for_fia(
|
||||||
|
self,
|
||||||
|
num_tokens_padded: int,
|
||||||
|
num_reqs_padded: int,
|
||||||
|
num_reqs: int,
|
||||||
|
cudagraph_runtime_mode: CUDAGraphMode | None = None,
|
||||||
|
batch_desc_num_reqs: int | None = None,
|
||||||
|
) -> int:
|
||||||
# Keep this aligned with the dispatcher because batch_desc.num_reqs is
|
# Keep this aligned with the dispatcher because batch_desc.num_reqs is
|
||||||
# generated by dispatcher._create_padded_batch_descriptor().
|
# generated by dispatcher._create_padded_batch_descriptor().
|
||||||
# For 310P ngram we intentionally set dispatcher q_len=1, while runner's
|
# For 310P ngram we intentionally set dispatcher q_len=1, while runner's
|
||||||
|
|||||||
Reference in New Issue
Block a user