Avoid eager benchmark imports in CLI startup

2026-04-20 14:00:44 +08:00
parent 91a15dfef1
commit 8fac6062e4
3 changed files with 39 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@
  不再直接判定为非 CUDA 平台
 - 改为回退到 `torch.cuda.is_available()` 和
  `torch.cuda.device_count()` 继续判断 CUDA 是否可用
 - 调整 CLI 初始化逻辑，避免 benchmark 可选依赖缺失时阻塞
  `vllm serve ...` 启动
 这个修复用于解决如下启动失败：
--- a/vllm/entrypoints/cli/init.py
+++ b/vllm/entrypoints/cli/init.py
@@ -1,19 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
 from vllm.entrypoints.cli.benchmark.mm_processor import (
    BenchmarkMMProcessorSubcommand,
 )
 from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
 from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand
 from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
 from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
-__all__: list[str] = [
+# Keep this package init import-free.
-    "BenchmarkLatencySubcommand",
+#
-    "BenchmarkMMProcessorSubcommand",
+# The `vllm` console script imports `vllm.entrypoints.cli.main`, which causes
-    "BenchmarkServingSubcommand",
+# Python to import this package before loading the `main` submodule.
-    "BenchmarkStartupSubcommand",
+# Eagerly importing benchmark subcommands here makes every `vllm serve ...`
-    "BenchmarkSweepSubcommand",
+# startup depend on optional benchmark-only modules.
-    "BenchmarkThroughputSubcommand",
+#
-]
+# Benchmark subcommands are loaded on demand in
 # `vllm.entrypoints.cli.benchmark.main`.
--- a/vllm/entrypoints/cli/benchmark/main.py
+++ b/vllm/entrypoints/cli/benchmark/main.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import argparse
 import importlib
 import logging
 import typing
 from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
@@ -13,6 +15,30 @@ if typing.TYPE_CHECKING:
 else:
    FlexibleArgumentParser = argparse.ArgumentParser
 logger = logging.getLogger(__name__)
 def _load_benchmark_subcommands() -> None:
    modules = [
        "vllm.entrypoints.cli.benchmark.latency",
        "vllm.entrypoints.cli.benchmark.mm_processor",
        "vllm.entrypoints.cli.benchmark.serve",
        "vllm.entrypoints.cli.benchmark.startup",
        "vllm.entrypoints.cli.benchmark.sweep",
        "vllm.entrypoints.cli.benchmark.throughput",
    ]
    for module_name in modules:
        try:
            importlib.import_module(module_name)
        except ModuleNotFoundError as e:
            logger.warning(
                "Skipping benchmark subcommand module %s because an optional "
                "dependency could not be imported: %r",
                module_name,
                e,
            )
 class BenchmarkSubcommand(CLISubcommand):
    """The `bench` subcommand for the vLLM CLI."""
@@ -38,6 +64,8 @@ class BenchmarkSubcommand(CLISubcommand):
        )
        bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type")
        _load_benchmark_subcommands()
        for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
            cmd_subparser = bench_subparsers.add_parser(
                cmd_cls.name,