[CI] Add pre-commit check for patch logger (#7446)
### What this PR does / why we need it?
See https://github.com/vllm-project/vllm-ascend/pull/7402, pre-commit
hook will forbid init_logger(__name__) in vllm_ascend patch modules
- vLLM version: v0.17.0
- vLLM main:
8a680463fa
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -21,7 +21,7 @@ from typing import Any
|
||||
|
||||
import torch
|
||||
from vllm.config import get_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase
|
||||
from vllm.model_executor.layers.quantization import register_quantization_config
|
||||
@@ -41,8 +41,6 @@ from vllm_ascend.quantization.modelslim_config import (
|
||||
)
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def create_scheme_for_layer(
|
||||
quant_description: dict[str, Any],
|
||||
|
||||
@@ -20,14 +20,13 @@ import os
|
||||
|
||||
import torch
|
||||
import torch_npu
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
# in case recursive call in reduce_sum.
|
||||
torch_sum = torch.sum
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
if HAS_TRITON:
|
||||
from vllm_ascend.ops.triton.batch_invariant.matmul import (
|
||||
|
||||
@@ -27,7 +27,7 @@ from vllm.distributed.ec_transfer.ec_connector.base import ECConnectorMetadata
|
||||
from vllm.distributed.kv_events import KVEventBatch
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.core.sched.async_scheduler import AsyncScheduler
|
||||
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
|
||||
@@ -42,8 +42,6 @@ from vllm.v1.sample.rejection_sampler import PLACEHOLDER_TOKEN_ID
|
||||
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
||||
from vllm.v1.utils import ConstantList, record_function_or_nullcontext
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
# `spec_manager_map` in single_type_kv_cache_manager is a module-level dict
|
||||
# whose keys are class objects bound at import time. When the async
|
||||
|
||||
@@ -5,11 +5,8 @@ import torch
|
||||
from ucm.integration.vllm.ucm_connector import UCMConnector
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole
|
||||
from vllm.logger import init_logger
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# isort: off
|
||||
if TYPE_CHECKING:
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.utils.platform_utils import is_pin_memory_available
|
||||
from vllm.v1.attention.backend import AttentionBackend # type: ignore
|
||||
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
|
||||
from vllm.v1.kv_offload.worker.worker import OffloadingHandler, TransferResult, TransferSpec
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def expand_block_ids(
|
||||
block_ids: np.ndarray,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from math import lcm
|
||||
|
||||
import vllm.model_executor.models.config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.models import ModelRegistry
|
||||
from vllm.model_executor.models.config import MambaModelConfig
|
||||
from vllm.utils.math_utils import cdiv
|
||||
@@ -24,7 +24,6 @@ def verify_and_update_config(cls, vllm_config) -> None:
|
||||
Args:
|
||||
vllm_config: vLLM Config
|
||||
"""
|
||||
logger = init_logger(__name__)
|
||||
# Save the user input before it gets modified by MambaModelConfig
|
||||
mamba_block_size = vllm_config.cache_config.mamba_block_size
|
||||
# Enable FULL_AND_PIECEWISE by default
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import Any, Optional, cast
|
||||
|
||||
import torch
|
||||
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy, QuantizationType
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS, register_quantization_config
|
||||
@@ -37,8 +37,6 @@ from vllm_ascend.utils import COMPRESSED_TENSORS_METHOD
|
||||
|
||||
from .methods import AscendLinearScheme, AscendMoEScheme
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
# Remove the original compressed_tensors method to replace with our implementation
|
||||
def _remove_quantization_method():
|
||||
|
||||
@@ -31,7 +31,7 @@ from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
from vllm.config import get_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase
|
||||
@@ -47,8 +47,6 @@ from .methods import get_scheme_class
|
||||
# The config filename that ModelSlim generates after quantizing a model.
|
||||
MODELSLIM_CONFIG_FILENAME = "quant_model_description.json"
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# key: model_type
|
||||
# value: vLLM prefix -> HF prefix mapping (used to convert vLLM layer names to HF format
|
||||
# for looking up keys in quant_model_description.json)
|
||||
|
||||
@@ -19,12 +19,10 @@ import json
|
||||
from pathlib import Path
|
||||
|
||||
from vllm import envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def get_model_file(
|
||||
model: str | Path,
|
||||
|
||||
@@ -2,14 +2,11 @@ import torch
|
||||
import torch.nn as nn
|
||||
from typing_extensions import override
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.model_loader import get_model
|
||||
from vllm.v1.spec_decode.utils import create_vllm_config_for_draft_model
|
||||
|
||||
from vllm_ascend.spec_decode.eagle_proposer import SpecDecodeBaseProposer
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class AscendDraftModelProposer(SpecDecodeBaseProposer):
|
||||
def __init__(
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
import torch
|
||||
from vllm.config import CUDAGraphMode
|
||||
from vllm.logger import init_logger
|
||||
from vllm.v1.sample.metadata import SamplingMetadata
|
||||
from vllm.v1.spec_decode.medusa import MedusaProposer
|
||||
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
||||
|
||||
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class AscendMedusaProposer(MedusaProposer):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user