[CI] Add pre-commit check for patch logger (#7446)
### What this PR does / why we need it?
See https://github.com/vllm-project/vllm-ascend/pull/7402, pre-commit
hook will forbid init_logger(__name__) in vllm_ascend patch modules
- vLLM version: v0.17.0
- vLLM main:
8a680463fa
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -91,6 +91,12 @@ repos:
|
||||
language: python
|
||||
types: [python]
|
||||
pass_filenames: false
|
||||
- id: check-logger
|
||||
name: Forbid init_logger(__name__) in vllm_ascend modules
|
||||
entry: tools/check_logger.sh
|
||||
language: script
|
||||
types: [python]
|
||||
pass_filenames: false
|
||||
# Keep `suggestion` last
|
||||
- id: suggestion
|
||||
name: Suggestion
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
@@ -135,16 +134,16 @@ class TestMaybeAutoDetectQuantization(TestBase):
|
||||
vllm_config = self._make_vllm_config(
|
||||
model_path="/fake/quant_model", quantization=None)
|
||||
|
||||
with self.assertLogs("vllm_ascend.quantization.utils",
|
||||
level=logging.INFO) as cm:
|
||||
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||
maybe_auto_detect_quantization(vllm_config)
|
||||
|
||||
self.assertEqual(vllm_config.model_config.quantization,
|
||||
ASCEND_QUANTIZATION_METHOD)
|
||||
log_output = "\n".join(cm.output)
|
||||
self.assertIn("Auto-detected quantization method", log_output)
|
||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output)
|
||||
self.assertIn("/fake/quant_model", log_output)
|
||||
mock_logger.info.assert_called_once()
|
||||
call_args = mock_logger.info.call_args[0]
|
||||
self.assertIn("Auto-detected quantization method", call_args[0])
|
||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args)
|
||||
self.assertIn("/fake/quant_model", call_args)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
||||
return_value=ASCEND_QUANTIZATION_METHOD)
|
||||
@@ -155,29 +154,28 @@ class TestMaybeAutoDetectQuantization(TestBase):
|
||||
model_path="/fake/quant_model",
|
||||
quantization=COMPRESSED_TENSORS_METHOD)
|
||||
|
||||
with self.assertLogs("vllm_ascend.quantization.utils",
|
||||
level=logging.WARNING) as cm:
|
||||
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||
maybe_auto_detect_quantization(vllm_config)
|
||||
|
||||
self.assertEqual(vllm_config.model_config.quantization,
|
||||
COMPRESSED_TENSORS_METHOD)
|
||||
log_output = "\n".join(cm.output)
|
||||
self.assertIn("Auto-detected quantization method", log_output)
|
||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output)
|
||||
self.assertIn(COMPRESSED_TENSORS_METHOD, log_output)
|
||||
mock_logger.warning.assert_called_once()
|
||||
call_args = mock_logger.warning.call_args[0]
|
||||
self.assertIn("Auto-detected quantization method", call_args[0])
|
||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args)
|
||||
self.assertIn(COMPRESSED_TENSORS_METHOD, call_args)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
||||
return_value=None)
|
||||
def test_no_detection_emits_no_log(self, mock_detect):
|
||||
"""When no quantization is detected, no log should be emitted."""
|
||||
vllm_config = self._make_vllm_config(quantization=None)
|
||||
logger_name = "vllm_ascend.quantization.utils"
|
||||
|
||||
with self.assertRaises(AssertionError):
|
||||
# assertLogs raises AssertionError when no logs are emitted
|
||||
with self.assertLogs(logger_name, level=logging.DEBUG):
|
||||
maybe_auto_detect_quantization(vllm_config)
|
||||
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||
maybe_auto_detect_quantization(vllm_config)
|
||||
|
||||
mock_logger.info.assert_not_called()
|
||||
mock_logger.warning.assert_not_called()
|
||||
self.assertIsNone(vllm_config.model_config.quantization)
|
||||
|
||||
@patch("vllm.config.VllmConfig._get_quantization_config",
|
||||
|
||||
69
tools/check_logger.sh
Executable file
69
tools/check_logger.sh
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Check that vllm_ascend modules do not use init_logger(__name__).
|
||||
#
|
||||
# vllm's logging config registers a handler only for the "vllm" logger
|
||||
# namespace. Any logger created via init_logger(__name__) inside a
|
||||
# vllm_ascend module ends up in the "vllm_ascend.*" namespace, which has
|
||||
# no handler, so every log call is silently dropped.
|
||||
#
|
||||
# The correct pattern is:
|
||||
# from vllm.logger import logger
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
PATCH_DIR="$REPO_ROOT/vllm_ascend/"
|
||||
|
||||
VIOLATIONS=0
|
||||
|
||||
for FILE in $(find "$PATCH_DIR" -type f -name "*.py" 2>/dev/null); do
|
||||
[[ -f "$FILE" ]] || continue
|
||||
|
||||
# Find lines that call init_logger(__name__)
|
||||
while IFS= read -r MATCH; do
|
||||
LINENUM=$(echo "$MATCH" | cut -d: -f1)
|
||||
LINE=$(echo "$MATCH" | cut -d: -f2-)
|
||||
if [[ $VIOLATIONS -eq 0 ]]; then
|
||||
echo ""
|
||||
fi
|
||||
echo " $FILE:$LINENUM: $LINE"
|
||||
VIOLATIONS=$(( VIOLATIONS + 1 ))
|
||||
done < <(grep -n 'init_logger[[:space:]]*([[:space:]]*__name__[[:space:]]*)' "$FILE" 2>/dev/null || true)
|
||||
done
|
||||
|
||||
if [[ $VIOLATIONS -gt 0 ]]; then
|
||||
echo ""
|
||||
echo "Found $VIOLATIONS violation(s): init_logger(__name__) must not be used in vllm_ascend modules."
|
||||
echo ""
|
||||
echo "vllm's logging handler is registered only for the 'vllm' namespace."
|
||||
echo "Loggers created with init_logger(__name__) inside vllm_ascend end up"
|
||||
echo "in the 'vllm_ascend.*' namespace, which has no handler — all log"
|
||||
echo "messages are silently dropped."
|
||||
echo ""
|
||||
echo "Fix: replace"
|
||||
echo " from vllm.logger import init_logger"
|
||||
echo " logger = init_logger(__name__)"
|
||||
echo "with"
|
||||
echo " from vllm.logger import logger"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@@ -21,7 +21,7 @@ from typing import Any
|
||||
|
||||
import torch
|
||||
from vllm.config import get_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase
|
||||
from vllm.model_executor.layers.quantization import register_quantization_config
|
||||
@@ -41,8 +41,6 @@ from vllm_ascend.quantization.modelslim_config import (
|
||||
)
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def create_scheme_for_layer(
|
||||
quant_description: dict[str, Any],
|
||||
|
||||
@@ -20,14 +20,13 @@ import os
|
||||
|
||||
import torch
|
||||
import torch_npu
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
# in case recursive call in reduce_sum.
|
||||
torch_sum = torch.sum
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
if HAS_TRITON:
|
||||
from vllm_ascend.ops.triton.batch_invariant.matmul import (
|
||||
|
||||
@@ -27,7 +27,7 @@ from vllm.distributed.ec_transfer.ec_connector.base import ECConnectorMetadata
|
||||
from vllm.distributed.kv_events import KVEventBatch
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.core.sched.async_scheduler import AsyncScheduler
|
||||
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
|
||||
@@ -42,8 +42,6 @@ from vllm.v1.sample.rejection_sampler import PLACEHOLDER_TOKEN_ID
|
||||
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
||||
from vllm.v1.utils import ConstantList, record_function_or_nullcontext
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
# `spec_manager_map` in single_type_kv_cache_manager is a module-level dict
|
||||
# whose keys are class objects bound at import time. When the async
|
||||
|
||||
@@ -5,11 +5,8 @@ import torch
|
||||
from ucm.integration.vllm.ucm_connector import UCMConnector
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole
|
||||
from vllm.logger import init_logger
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# isort: off
|
||||
if TYPE_CHECKING:
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.utils.platform_utils import is_pin_memory_available
|
||||
from vllm.v1.attention.backend import AttentionBackend # type: ignore
|
||||
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
|
||||
from vllm.v1.kv_offload.worker.worker import OffloadingHandler, TransferResult, TransferSpec
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def expand_block_ids(
|
||||
block_ids: np.ndarray,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from math import lcm
|
||||
|
||||
import vllm.model_executor.models.config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.models import ModelRegistry
|
||||
from vllm.model_executor.models.config import MambaModelConfig
|
||||
from vllm.utils.math_utils import cdiv
|
||||
@@ -24,7 +24,6 @@ def verify_and_update_config(cls, vllm_config) -> None:
|
||||
Args:
|
||||
vllm_config: vLLM Config
|
||||
"""
|
||||
logger = init_logger(__name__)
|
||||
# Save the user input before it gets modified by MambaModelConfig
|
||||
mamba_block_size = vllm_config.cache_config.mamba_block_size
|
||||
# Enable FULL_AND_PIECEWISE by default
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import Any, Optional, cast
|
||||
|
||||
import torch
|
||||
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy, QuantizationType
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS, register_quantization_config
|
||||
@@ -37,8 +37,6 @@ from vllm_ascend.utils import COMPRESSED_TENSORS_METHOD
|
||||
|
||||
from .methods import AscendLinearScheme, AscendMoEScheme
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
# Remove the original compressed_tensors method to replace with our implementation
|
||||
def _remove_quantization_method():
|
||||
|
||||
@@ -31,7 +31,7 @@ from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
from vllm.config import get_current_vllm_config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.linear import LinearBase
|
||||
@@ -47,8 +47,6 @@ from .methods import get_scheme_class
|
||||
# The config filename that ModelSlim generates after quantizing a model.
|
||||
MODELSLIM_CONFIG_FILENAME = "quant_model_description.json"
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# key: model_type
|
||||
# value: vLLM prefix -> HF prefix mapping (used to convert vLLM layer names to HF format
|
||||
# for looking up keys in quant_model_description.json)
|
||||
|
||||
@@ -19,12 +19,10 @@ import json
|
||||
from pathlib import Path
|
||||
|
||||
from vllm import envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logger import logger
|
||||
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def get_model_file(
|
||||
model: str | Path,
|
||||
|
||||
@@ -2,14 +2,11 @@ import torch
|
||||
import torch.nn as nn
|
||||
from typing_extensions import override
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.model_loader import get_model
|
||||
from vllm.v1.spec_decode.utils import create_vllm_config_for_draft_model
|
||||
|
||||
from vllm_ascend.spec_decode.eagle_proposer import SpecDecodeBaseProposer
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class AscendDraftModelProposer(SpecDecodeBaseProposer):
|
||||
def __init__(
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
import torch
|
||||
from vllm.config import CUDAGraphMode
|
||||
from vllm.logger import init_logger
|
||||
from vllm.v1.sample.metadata import SamplingMetadata
|
||||
from vllm.v1.spec_decode.medusa import MedusaProposer
|
||||
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
||||
|
||||
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class AscendMedusaProposer(MedusaProposer):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user