[CI] Add pre-commit check for patch logger (#7446)
### What this PR does / why we need it?
See https://github.com/vllm-project/vllm-ascend/pull/7402, pre-commit
hook will forbid init_logger(__name__) in vllm_ascend patch modules
- vLLM version: v0.17.0
- vLLM main:
8a680463fa
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -91,6 +91,12 @@ repos:
|
|||||||
language: python
|
language: python
|
||||||
types: [python]
|
types: [python]
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
|
- id: check-logger
|
||||||
|
name: Forbid init_logger(__name__) in vllm_ascend modules
|
||||||
|
entry: tools/check_logger.sh
|
||||||
|
language: script
|
||||||
|
types: [python]
|
||||||
|
pass_filenames: false
|
||||||
# Keep `suggestion` last
|
# Keep `suggestion` last
|
||||||
- id: suggestion
|
- id: suggestion
|
||||||
name: Suggestion
|
name: Suggestion
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -135,16 +134,16 @@ class TestMaybeAutoDetectQuantization(TestBase):
|
|||||||
vllm_config = self._make_vllm_config(
|
vllm_config = self._make_vllm_config(
|
||||||
model_path="/fake/quant_model", quantization=None)
|
model_path="/fake/quant_model", quantization=None)
|
||||||
|
|
||||||
with self.assertLogs("vllm_ascend.quantization.utils",
|
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||||
level=logging.INFO) as cm:
|
|
||||||
maybe_auto_detect_quantization(vllm_config)
|
maybe_auto_detect_quantization(vllm_config)
|
||||||
|
|
||||||
self.assertEqual(vllm_config.model_config.quantization,
|
self.assertEqual(vllm_config.model_config.quantization,
|
||||||
ASCEND_QUANTIZATION_METHOD)
|
ASCEND_QUANTIZATION_METHOD)
|
||||||
log_output = "\n".join(cm.output)
|
mock_logger.info.assert_called_once()
|
||||||
self.assertIn("Auto-detected quantization method", log_output)
|
call_args = mock_logger.info.call_args[0]
|
||||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output)
|
self.assertIn("Auto-detected quantization method", call_args[0])
|
||||||
self.assertIn("/fake/quant_model", log_output)
|
self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args)
|
||||||
|
self.assertIn("/fake/quant_model", call_args)
|
||||||
|
|
||||||
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
||||||
return_value=ASCEND_QUANTIZATION_METHOD)
|
return_value=ASCEND_QUANTIZATION_METHOD)
|
||||||
@@ -155,29 +154,28 @@ class TestMaybeAutoDetectQuantization(TestBase):
|
|||||||
model_path="/fake/quant_model",
|
model_path="/fake/quant_model",
|
||||||
quantization=COMPRESSED_TENSORS_METHOD)
|
quantization=COMPRESSED_TENSORS_METHOD)
|
||||||
|
|
||||||
with self.assertLogs("vllm_ascend.quantization.utils",
|
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||||
level=logging.WARNING) as cm:
|
|
||||||
maybe_auto_detect_quantization(vllm_config)
|
maybe_auto_detect_quantization(vllm_config)
|
||||||
|
|
||||||
self.assertEqual(vllm_config.model_config.quantization,
|
self.assertEqual(vllm_config.model_config.quantization,
|
||||||
COMPRESSED_TENSORS_METHOD)
|
COMPRESSED_TENSORS_METHOD)
|
||||||
log_output = "\n".join(cm.output)
|
mock_logger.warning.assert_called_once()
|
||||||
self.assertIn("Auto-detected quantization method", log_output)
|
call_args = mock_logger.warning.call_args[0]
|
||||||
self.assertIn(ASCEND_QUANTIZATION_METHOD, log_output)
|
self.assertIn("Auto-detected quantization method", call_args[0])
|
||||||
self.assertIn(COMPRESSED_TENSORS_METHOD, log_output)
|
self.assertIn(ASCEND_QUANTIZATION_METHOD, call_args)
|
||||||
|
self.assertIn(COMPRESSED_TENSORS_METHOD, call_args)
|
||||||
|
|
||||||
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
@patch("vllm_ascend.quantization.utils.detect_quantization_method",
|
||||||
return_value=None)
|
return_value=None)
|
||||||
def test_no_detection_emits_no_log(self, mock_detect):
|
def test_no_detection_emits_no_log(self, mock_detect):
|
||||||
"""When no quantization is detected, no log should be emitted."""
|
"""When no quantization is detected, no log should be emitted."""
|
||||||
vllm_config = self._make_vllm_config(quantization=None)
|
vllm_config = self._make_vllm_config(quantization=None)
|
||||||
logger_name = "vllm_ascend.quantization.utils"
|
|
||||||
|
|
||||||
with self.assertRaises(AssertionError):
|
with patch("vllm_ascend.quantization.utils.logger") as mock_logger:
|
||||||
# assertLogs raises AssertionError when no logs are emitted
|
maybe_auto_detect_quantization(vllm_config)
|
||||||
with self.assertLogs(logger_name, level=logging.DEBUG):
|
|
||||||
maybe_auto_detect_quantization(vllm_config)
|
|
||||||
|
|
||||||
|
mock_logger.info.assert_not_called()
|
||||||
|
mock_logger.warning.assert_not_called()
|
||||||
self.assertIsNone(vllm_config.model_config.quantization)
|
self.assertIsNone(vllm_config.model_config.quantization)
|
||||||
|
|
||||||
@patch("vllm.config.VllmConfig._get_quantization_config",
|
@patch("vllm.config.VllmConfig._get_quantization_config",
|
||||||
|
|||||||
69
tools/check_logger.sh
Executable file
69
tools/check_logger.sh
Executable file
@@ -0,0 +1,69 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Check that vllm_ascend modules do not use init_logger(__name__).
|
||||||
|
#
|
||||||
|
# vllm's logging config registers a handler only for the "vllm" logger
|
||||||
|
# namespace. Any logger created via init_logger(__name__) inside a
|
||||||
|
# vllm_ascend module ends up in the "vllm_ascend.*" namespace, which has
|
||||||
|
# no handler, so every log call is silently dropped.
|
||||||
|
#
|
||||||
|
# The correct pattern is:
|
||||||
|
# from vllm.logger import logger
|
||||||
|
#
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
PATCH_DIR="$REPO_ROOT/vllm_ascend/"
|
||||||
|
|
||||||
|
VIOLATIONS=0
|
||||||
|
|
||||||
|
for FILE in $(find "$PATCH_DIR" -type f -name "*.py" 2>/dev/null); do
|
||||||
|
[[ -f "$FILE" ]] || continue
|
||||||
|
|
||||||
|
# Find lines that call init_logger(__name__)
|
||||||
|
while IFS= read -r MATCH; do
|
||||||
|
LINENUM=$(echo "$MATCH" | cut -d: -f1)
|
||||||
|
LINE=$(echo "$MATCH" | cut -d: -f2-)
|
||||||
|
if [[ $VIOLATIONS -eq 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
echo " $FILE:$LINENUM: $LINE"
|
||||||
|
VIOLATIONS=$(( VIOLATIONS + 1 ))
|
||||||
|
done < <(grep -n 'init_logger[[:space:]]*([[:space:]]*__name__[[:space:]]*)' "$FILE" 2>/dev/null || true)
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $VIOLATIONS -gt 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "Found $VIOLATIONS violation(s): init_logger(__name__) must not be used in vllm_ascend modules."
|
||||||
|
echo ""
|
||||||
|
echo "vllm's logging handler is registered only for the 'vllm' namespace."
|
||||||
|
echo "Loggers created with init_logger(__name__) inside vllm_ascend end up"
|
||||||
|
echo "in the 'vllm_ascend.*' namespace, which has no handler — all log"
|
||||||
|
echo "messages are silently dropped."
|
||||||
|
echo ""
|
||||||
|
echo "Fix: replace"
|
||||||
|
echo " from vllm.logger import init_logger"
|
||||||
|
echo " logger = init_logger(__name__)"
|
||||||
|
echo "with"
|
||||||
|
echo " from vllm.logger import logger"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
@@ -21,7 +21,7 @@ from typing import Any
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm.config import get_current_vllm_config
|
from vllm.config import get_current_vllm_config
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||||
from vllm.model_executor.layers.linear import LinearBase
|
from vllm.model_executor.layers.linear import LinearBase
|
||||||
from vllm.model_executor.layers.quantization import register_quantization_config
|
from vllm.model_executor.layers.quantization import register_quantization_config
|
||||||
@@ -41,8 +41,6 @@ from vllm_ascend.quantization.modelslim_config import (
|
|||||||
)
|
)
|
||||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def create_scheme_for_layer(
|
def create_scheme_for_layer(
|
||||||
quant_description: dict[str, Any],
|
quant_description: dict[str, Any],
|
||||||
|
|||||||
@@ -20,14 +20,13 @@ import os
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch_npu
|
import torch_npu
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant
|
from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant
|
||||||
from vllm.triton_utils import HAS_TRITON
|
from vllm.triton_utils import HAS_TRITON
|
||||||
|
|
||||||
# in case recursive call in reduce_sum.
|
# in case recursive call in reduce_sum.
|
||||||
torch_sum = torch.sum
|
torch_sum = torch.sum
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
if HAS_TRITON:
|
if HAS_TRITON:
|
||||||
from vllm_ascend.ops.triton.batch_invariant.matmul import (
|
from vllm_ascend.ops.triton.batch_invariant.matmul import (
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ from vllm.distributed.ec_transfer.ec_connector.base import ECConnectorMetadata
|
|||||||
from vllm.distributed.kv_events import KVEventBatch
|
from vllm.distributed.kv_events import KVEventBatch
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
|
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||||
from vllm.v1.core.sched.async_scheduler import AsyncScheduler
|
from vllm.v1.core.sched.async_scheduler import AsyncScheduler
|
||||||
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
|
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
|
||||||
@@ -42,8 +42,6 @@ from vllm.v1.sample.rejection_sampler import PLACEHOLDER_TOKEN_ID
|
|||||||
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
||||||
from vllm.v1.utils import ConstantList, record_function_or_nullcontext
|
from vllm.v1.utils import ConstantList, record_function_or_nullcontext
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# `spec_manager_map` in single_type_kv_cache_manager is a module-level dict
|
# `spec_manager_map` in single_type_kv_cache_manager is a module-level dict
|
||||||
# whose keys are class objects bound at import time. When the async
|
# whose keys are class objects bound at import time. When the async
|
||||||
|
|||||||
@@ -5,11 +5,8 @@ import torch
|
|||||||
from ucm.integration.vllm.ucm_connector import UCMConnector
|
from ucm.integration.vllm.ucm_connector import UCMConnector
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole
|
from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole
|
||||||
from vllm.logger import init_logger
|
|
||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
# isort: off
|
# isort: off
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||||
|
|||||||
@@ -1,13 +1,11 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.utils.platform_utils import is_pin_memory_available
|
from vllm.utils.platform_utils import is_pin_memory_available
|
||||||
from vllm.v1.attention.backend import AttentionBackend # type: ignore
|
from vllm.v1.attention.backend import AttentionBackend # type: ignore
|
||||||
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
|
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
|
||||||
from vllm.v1.kv_offload.worker.worker import OffloadingHandler, TransferResult, TransferSpec
|
from vllm.v1.kv_offload.worker.worker import OffloadingHandler, TransferResult, TransferSpec
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def expand_block_ids(
|
def expand_block_ids(
|
||||||
block_ids: np.ndarray,
|
block_ids: np.ndarray,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
from math import lcm
|
from math import lcm
|
||||||
|
|
||||||
import vllm.model_executor.models.config
|
import vllm.model_executor.models.config
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.model_executor.models import ModelRegistry
|
from vllm.model_executor.models import ModelRegistry
|
||||||
from vllm.model_executor.models.config import MambaModelConfig
|
from vllm.model_executor.models.config import MambaModelConfig
|
||||||
from vllm.utils.math_utils import cdiv
|
from vllm.utils.math_utils import cdiv
|
||||||
@@ -24,7 +24,6 @@ def verify_and_update_config(cls, vllm_config) -> None:
|
|||||||
Args:
|
Args:
|
||||||
vllm_config: vLLM Config
|
vllm_config: vLLM Config
|
||||||
"""
|
"""
|
||||||
logger = init_logger(__name__)
|
|
||||||
# Save the user input before it gets modified by MambaModelConfig
|
# Save the user input before it gets modified by MambaModelConfig
|
||||||
mamba_block_size = vllm_config.cache_config.mamba_block_size
|
mamba_block_size = vllm_config.cache_config.mamba_block_size
|
||||||
# Enable FULL_AND_PIECEWISE by default
|
# Enable FULL_AND_PIECEWISE by default
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from typing import Any, Optional, cast
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy, QuantizationType
|
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy, QuantizationType
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||||
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
||||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS, register_quantization_config
|
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS, register_quantization_config
|
||||||
@@ -37,8 +37,6 @@ from vllm_ascend.utils import COMPRESSED_TENSORS_METHOD
|
|||||||
|
|
||||||
from .methods import AscendLinearScheme, AscendMoEScheme
|
from .methods import AscendLinearScheme, AscendMoEScheme
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# Remove the original compressed_tensors method to replace with our implementation
|
# Remove the original compressed_tensors method to replace with our implementation
|
||||||
def _remove_quantization_method():
|
def _remove_quantization_method():
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ from typing import Any, Optional
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm.config import get_current_vllm_config
|
from vllm.config import get_current_vllm_config
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
|
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
|
||||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||||
from vllm.model_executor.layers.linear import LinearBase
|
from vllm.model_executor.layers.linear import LinearBase
|
||||||
@@ -47,8 +47,6 @@ from .methods import get_scheme_class
|
|||||||
# The config filename that ModelSlim generates after quantizing a model.
|
# The config filename that ModelSlim generates after quantizing a model.
|
||||||
MODELSLIM_CONFIG_FILENAME = "quant_model_description.json"
|
MODELSLIM_CONFIG_FILENAME = "quant_model_description.json"
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
# key: model_type
|
# key: model_type
|
||||||
# value: vLLM prefix -> HF prefix mapping (used to convert vLLM layer names to HF format
|
# value: vLLM prefix -> HF prefix mapping (used to convert vLLM layer names to HF format
|
||||||
# for looking up keys in quant_model_description.json)
|
# for looking up keys in quant_model_description.json)
|
||||||
|
|||||||
@@ -19,12 +19,10 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import logger
|
||||||
|
|
||||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD
|
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_file(
|
def get_model_file(
|
||||||
model: str | Path,
|
model: str | Path,
|
||||||
|
|||||||
@@ -2,14 +2,11 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
|
||||||
from vllm.model_executor.model_loader import get_model
|
from vllm.model_executor.model_loader import get_model
|
||||||
from vllm.v1.spec_decode.utils import create_vllm_config_for_draft_model
|
from vllm.v1.spec_decode.utils import create_vllm_config_for_draft_model
|
||||||
|
|
||||||
from vllm_ascend.spec_decode.eagle_proposer import SpecDecodeBaseProposer
|
from vllm_ascend.spec_decode.eagle_proposer import SpecDecodeBaseProposer
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AscendDraftModelProposer(SpecDecodeBaseProposer):
|
class AscendDraftModelProposer(SpecDecodeBaseProposer):
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|||||||
@@ -1,14 +1,11 @@
|
|||||||
import torch
|
import torch
|
||||||
from vllm.config import CUDAGraphMode
|
from vllm.config import CUDAGraphMode
|
||||||
from vllm.logger import init_logger
|
|
||||||
from vllm.v1.sample.metadata import SamplingMetadata
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
from vllm.v1.spec_decode.medusa import MedusaProposer
|
from vllm.v1.spec_decode.medusa import MedusaProposer
|
||||||
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
||||||
|
|
||||||
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AscendMedusaProposer(MedusaProposer):
|
class AscendMedusaProposer(MedusaProposer):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user