From 8fac6062e437c5c3c96bb3cecca8ccab81f0e447 Mon Sep 17 00:00:00 2001 From: LZiBee <2736864745@qq.com> Date: Mon, 20 Apr 2026 14:00:44 +0800 Subject: [PATCH] Avoid eager benchmark imports in CLI startup --- README.md | 2 ++ vllm/entrypoints/cli/__init__.py | 25 +++++++++-------------- vllm/entrypoints/cli/benchmark/main.py | 28 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 5331776..8689007 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ 不再直接判定为非 CUDA 平台 - 改为回退到 `torch.cuda.is_available()` 和 `torch.cuda.device_count()` 继续判断 CUDA 是否可用 +- 调整 CLI 初始化逻辑,避免 benchmark 可选依赖缺失时阻塞 + `vllm serve ...` 启动 这个修复用于解决如下启动失败: diff --git a/vllm/entrypoints/cli/__init__.py b/vllm/entrypoints/cli/__init__.py index 704d94d..ff5a928 100644 --- a/vllm/entrypoints/cli/__init__.py +++ b/vllm/entrypoints/cli/__init__.py @@ -1,19 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand -from vllm.entrypoints.cli.benchmark.mm_processor import ( - BenchmarkMMProcessorSubcommand, -) -from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand -from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand -from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand -from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand -__all__: list[str] = [ - "BenchmarkLatencySubcommand", - "BenchmarkMMProcessorSubcommand", - "BenchmarkServingSubcommand", - "BenchmarkStartupSubcommand", - "BenchmarkSweepSubcommand", - "BenchmarkThroughputSubcommand", -] +# Keep this package init import-free. +# +# The `vllm` console script imports `vllm.entrypoints.cli.main`, which causes +# Python to import this package before loading the `main` submodule. +# Eagerly importing benchmark subcommands here makes every `vllm serve ...` +# startup depend on optional benchmark-only modules. +# +# Benchmark subcommands are loaded on demand in +# `vllm.entrypoints.cli.benchmark.main`. diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py index 48f34fc..ae490ba 100644 --- a/vllm/entrypoints/cli/benchmark/main.py +++ b/vllm/entrypoints/cli/benchmark/main.py @@ -2,6 +2,8 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import argparse +import importlib +import logging import typing from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase @@ -13,6 +15,30 @@ if typing.TYPE_CHECKING: else: FlexibleArgumentParser = argparse.ArgumentParser +logger = logging.getLogger(__name__) + + +def _load_benchmark_subcommands() -> None: + modules = [ + "vllm.entrypoints.cli.benchmark.latency", + "vllm.entrypoints.cli.benchmark.mm_processor", + "vllm.entrypoints.cli.benchmark.serve", + "vllm.entrypoints.cli.benchmark.startup", + "vllm.entrypoints.cli.benchmark.sweep", + "vllm.entrypoints.cli.benchmark.throughput", + ] + + for module_name in modules: + try: + importlib.import_module(module_name) + except ModuleNotFoundError as e: + logger.warning( + "Skipping benchmark subcommand module %s because an optional " + "dependency could not be imported: %r", + module_name, + e, + ) + class BenchmarkSubcommand(CLISubcommand): """The `bench` subcommand for the vLLM CLI.""" @@ -38,6 +64,8 @@ class BenchmarkSubcommand(CLISubcommand): ) bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type") + _load_benchmark_subcommands() + for cmd_cls in BenchmarkSubcommandBase.__subclasses__(): cmd_subparser = bench_subparsers.add_parser( cmd_cls.name,