diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9cad2e5f..6470c435 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -91,6 +91,12 @@ repos: language: python types: [python] pass_filenames: false + - id: check-logger + name: Forbid init_logger(__name__) in vllm_ascend modules + entry: tools/check_logger.sh + language: script + types: [python] + pass_filenames: false # Keep `suggestion` last - id: suggestion name: Suggestion diff --git a/tests/ut/quantization/test_quant_utils.py b/tests/ut/quantization/test_quant_utils.py index 70c82ec8..551cf4ff 100644 --- a/tests/ut/quantization/test_quant_utils.py +++ b/tests/ut/quantization/test_quant_utils.py @@ -1,5 +1,4 @@ import json -import logging import os import tempfile from pathlib import Path @@ -135,16 +134,16 @@ class TestMaybeAutoDetectQuantization(TestBase): vllm_config = self._make_vllm_config( model_path="/fake/quant_model", quantization=None) - with self.assertLogs("vllm_ascend.quantization.utils", - level=logging.INFO) as cm: + with patch("vllm_ascend.quantization.utils.logger") as mock_logger: maybe_auto_detect_quantization(vllm_config) self.assertEqual(vllm_config.model_config.quantization, ASCEND_QUANTIZATION_METHOD) - log_output = "\n".join(cm.output) - self.assertIn("Auto-detected quantization method", log_output) - self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output) - self.assertIn("/fake/quant_model", log_output) + mock_logger.info.assert_called_once() + call_args = mock_logger.info.call_args[0] + self.assertIn("Auto-detected quantization method", call_args[0]) + self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args) + self.assertIn("/fake/quant_model", call_args) @patch("vllm_ascend.quantization.utils.detect_quantization_method", return_value=ASCEND_QUANTIZATION_METHOD) @@ -155,29 +154,28 @@ class TestMaybeAutoDetectQuantization(TestBase): model_path="/fake/quant_model", quantization=COMPRESSED_TENSORS_METHOD) - with self.assertLogs("vllm_ascend.quantization.utils", - level=logging.WARNING) as cm: + with patch("vllm_ascend.quantization.utils.logger") as mock_logger: maybe_auto_detect_quantization(vllm_config) self.assertEqual(vllm_config.model_config.quantization, COMPRESSED_TENSORS_METHOD) - log_output = "\n".join(cm.output) - self.assertIn("Auto-detected quantization method", log_output) - self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output) - self.assertIn(COMPRESSED_TENSORS_METHOD, log_output) + mock_logger.warning.assert_called_once() + call_args = mock_logger.warning.call_args[0] + self.assertIn("Auto-detected quantization method", call_args[0]) + self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args) + self.assertIn(COMPRESSED_TENSORS_METHOD, call_args) @patch("vllm_ascend.quantization.utils.detect_quantization_method", return_value=None) def test_no_detection_emits_no_log(self, mock_detect): """When no quantization is detected, no log should be emitted.""" vllm_config = self._make_vllm_config(quantization=None) - logger_name = "vllm_ascend.quantization.utils" - with self.assertRaises(AssertionError): - # assertLogs raises AssertionError when no logs are emitted - with self.assertLogs(logger_name, level=logging.DEBUG): - maybe_auto_detect_quantization(vllm_config) + with patch("vllm_ascend.quantization.utils.logger") as mock_logger: + maybe_auto_detect_quantization(vllm_config) + mock_logger.info.assert_not_called() + mock_logger.warning.assert_not_called() self.assertIsNone(vllm_config.model_config.quantization) @patch("vllm.config.VllmConfig._get_quantization_config", diff --git a/tools/check_logger.sh b/tools/check_logger.sh new file mode 100755 index 00000000..5971b191 --- /dev/null +++ b/tools/check_logger.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# +# Check that vllm_ascend modules do not use init_logger(__name__). +# +# vllm's logging config registers a handler only for the "vllm" logger +# namespace. Any logger created via init_logger(__name__) inside a +# vllm_ascend module ends up in the "vllm_ascend.*" namespace, which has +# no handler, so every log call is silently dropped. +# +# The correct pattern is: +# from vllm.logger import logger +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +PATCH_DIR="$REPO_ROOT/vllm_ascend/" + +VIOLATIONS=0 + +for FILE in $(find "$PATCH_DIR" -type f -name "*.py" 2>/dev/null); do + [[ -f "$FILE" ]] || continue + + # Find lines that call init_logger(__name__) + while IFS= read -r MATCH; do + LINENUM=$(echo "$MATCH" | cut -d: -f1) + LINE=$(echo "$MATCH" | cut -d: -f2-) + if [[ $VIOLATIONS -eq 0 ]]; then + echo "" + fi + echo " $FILE:$LINENUM: $LINE" + VIOLATIONS=$(( VIOLATIONS + 1 )) + done < <(grep -n 'init_logger[[:space:]]*([[:space:]]*__name__[[:space:]]*)' "$FILE" 2>/dev/null || true) +done + +if [[ $VIOLATIONS -gt 0 ]]; then + echo "" + echo "Found $VIOLATIONS violation(s): init_logger(__name__) must not be used in vllm_ascend modules." + echo "" + echo "vllm's logging handler is registered only for the 'vllm' namespace." + echo "Loggers created with init_logger(__name__) inside vllm_ascend end up" + echo "in the 'vllm_ascend.*' namespace, which has no handler — all log" + echo "messages are silently dropped." + echo "" + echo "Fix: replace" + echo " from vllm.logger import init_logger" + echo " logger = init_logger(__name__)" + echo "with" + echo " from vllm.logger import logger" + exit 1 +fi + +exit 0 diff --git a/vllm_ascend/_310p/quantization/modelslim_config.py b/vllm_ascend/_310p/quantization/modelslim_config.py index 5e9b0abf..1c47e2bf 100644 --- a/vllm_ascend/_310p/quantization/modelslim_config.py +++ b/vllm_ascend/_310p/quantization/modelslim_config.py @@ -21,7 +21,7 @@ from typing import Any import torch from vllm.config import get_current_vllm_config -from vllm.logger import init_logger +from vllm.logger import logger from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.linear import LinearBase from vllm.model_executor.layers.quantization import register_quantization_config @@ -41,8 +41,6 @@ from vllm_ascend.quantization.modelslim_config import ( ) from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD -logger = init_logger(__name__) - def create_scheme_for_layer( quant_description: dict[str, Any], diff --git a/vllm_ascend/batch_invariant.py b/vllm_ascend/batch_invariant.py index e7521a80..7f27fd6e 100644 --- a/vllm_ascend/batch_invariant.py +++ b/vllm_ascend/batch_invariant.py @@ -20,14 +20,13 @@ import os import torch import torch_npu -from vllm.logger import init_logger +from vllm.logger import logger from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant from vllm.triton_utils import HAS_TRITON # in case recursive call in reduce_sum. torch_sum = torch.sum -logger = init_logger(__name__) if HAS_TRITON: from vllm_ascend.ops.triton.batch_invariant.matmul import ( diff --git a/vllm_ascend/core/recompute_scheduler.py b/vllm_ascend/core/recompute_scheduler.py index 67e3ba0a..d5375e7e 100644 --- a/vllm_ascend/core/recompute_scheduler.py +++ b/vllm_ascend/core/recompute_scheduler.py @@ -27,7 +27,7 @@ from vllm.distributed.ec_transfer.ec_connector.base import ECConnectorMetadata from vllm.distributed.kv_events import KVEventBatch from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats -from vllm.logger import init_logger +from vllm.logger import logger from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.core.sched.async_scheduler import AsyncScheduler from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput @@ -42,8 +42,6 @@ from vllm.v1.sample.rejection_sampler import PLACEHOLDER_TOKEN_ID from vllm.v1.spec_decode.metrics import SpecDecodingStats from vllm.v1.utils import ConstantList, record_function_or_nullcontext -logger = init_logger(__name__) - # `spec_manager_map` in single_type_kv_cache_manager is a module-level dict # whose keys are class objects bound at import time. When the async diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py index 550d98ac..a88a7ce7 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py @@ -5,11 +5,8 @@ import torch from ucm.integration.vllm.ucm_connector import UCMConnector from vllm.config import VllmConfig from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole -from vllm.logger import init_logger from vllm.v1.core.sched.output import SchedulerOutput -logger = init_logger(__name__) - # isort: off if TYPE_CHECKING: from vllm.v1.attention.backend import AttentionMetadata # type: ignore diff --git a/vllm_ascend/kv_offload/cpu_npu.py b/vllm_ascend/kv_offload/cpu_npu.py index 6932ac53..1f326a6c 100644 --- a/vllm_ascend/kv_offload/cpu_npu.py +++ b/vllm_ascend/kv_offload/cpu_npu.py @@ -1,13 +1,11 @@ import numpy as np import torch -from vllm.logger import init_logger +from vllm.logger import logger from vllm.utils.platform_utils import is_pin_memory_available from vllm.v1.attention.backend import AttentionBackend # type: ignore from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec from vllm.v1.kv_offload.worker.worker import OffloadingHandler, TransferResult, TransferSpec -logger = init_logger(__name__) - def expand_block_ids( block_ids: np.ndarray, diff --git a/vllm_ascend/patch/platform/patch_mamba_config_310.py b/vllm_ascend/patch/platform/patch_mamba_config_310.py index db9775f3..121ac27e 100644 --- a/vllm_ascend/patch/platform/patch_mamba_config_310.py +++ b/vllm_ascend/patch/platform/patch_mamba_config_310.py @@ -4,7 +4,7 @@ from math import lcm import vllm.model_executor.models.config -from vllm.logger import init_logger +from vllm.logger import logger from vllm.model_executor.models import ModelRegistry from vllm.model_executor.models.config import MambaModelConfig from vllm.utils.math_utils import cdiv @@ -24,7 +24,6 @@ def verify_and_update_config(cls, vllm_config) -> None: Args: vllm_config: vLLM Config """ - logger = init_logger(__name__) # Save the user input before it gets modified by MambaModelConfig mamba_block_size = vllm_config.cache_config.mamba_block_size # Enable FULL_AND_PIECEWISE by default diff --git a/vllm_ascend/quantization/compressed_tensors_config.py b/vllm_ascend/quantization/compressed_tensors_config.py index ea138110..271f4d04 100644 --- a/vllm_ascend/quantization/compressed_tensors_config.py +++ b/vllm_ascend/quantization/compressed_tensors_config.py @@ -21,7 +21,7 @@ from typing import Any, Optional, cast import torch from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy, QuantizationType -from vllm.logger import init_logger +from vllm.logger import logger from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS, register_quantization_config @@ -37,8 +37,6 @@ from vllm_ascend.utils import COMPRESSED_TENSORS_METHOD from .methods import AscendLinearScheme, AscendMoEScheme -logger = init_logger(__name__) - # Remove the original compressed_tensors method to replace with our implementation def _remove_quantization_method(): diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py index 555b05cd..151109c5 100644 --- a/vllm_ascend/quantization/modelslim_config.py +++ b/vllm_ascend/quantization/modelslim_config.py @@ -31,7 +31,7 @@ from typing import Any, Optional import torch from vllm.config import get_current_vllm_config -from vllm.logger import init_logger +from vllm.logger import logger from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.linear import LinearBase @@ -47,8 +47,6 @@ from .methods import get_scheme_class # The config filename that ModelSlim generates after quantizing a model. MODELSLIM_CONFIG_FILENAME = "quant_model_description.json" -logger = init_logger(__name__) - # key: model_type # value: vLLM prefix -> HF prefix mapping (used to convert vLLM layer names to HF format # for looking up keys in quant_model_description.json) diff --git a/vllm_ascend/quantization/utils.py b/vllm_ascend/quantization/utils.py index c1042761..0ec87437 100644 --- a/vllm_ascend/quantization/utils.py +++ b/vllm_ascend/quantization/utils.py @@ -19,12 +19,10 @@ import json from pathlib import Path from vllm import envs -from vllm.logger import init_logger +from vllm.logger import logger from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD -logger = init_logger(__name__) - def get_model_file( model: str | Path, diff --git a/vllm_ascend/spec_decode/draft_proposer.py b/vllm_ascend/spec_decode/draft_proposer.py index 65348b20..a35d9656 100644 --- a/vllm_ascend/spec_decode/draft_proposer.py +++ b/vllm_ascend/spec_decode/draft_proposer.py @@ -2,14 +2,11 @@ import torch import torch.nn as nn from typing_extensions import override from vllm.config import VllmConfig -from vllm.logger import init_logger from vllm.model_executor.model_loader import get_model from vllm.v1.spec_decode.utils import create_vllm_config_for_draft_model from vllm_ascend.spec_decode.eagle_proposer import SpecDecodeBaseProposer -logger = init_logger(__name__) - class AscendDraftModelProposer(SpecDecodeBaseProposer): def __init__( diff --git a/vllm_ascend/spec_decode/medusa_proposer.py b/vllm_ascend/spec_decode/medusa_proposer.py index de62cfd2..7032a16e 100644 --- a/vllm_ascend/spec_decode/medusa_proposer.py +++ b/vllm_ascend/spec_decode/medusa_proposer.py @@ -1,14 +1,11 @@ import torch from vllm.config import CUDAGraphMode -from vllm.logger import init_logger from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.spec_decode.medusa import MedusaProposer from vllm.v1.spec_decode.metadata import SpecDecodeMetadata from vllm_ascend.ascend_forward_context import set_ascend_forward_context -logger = init_logger(__name__) - class AscendMedusaProposer(MedusaProposer): """