From 8fac6062e437c5c3c96bb3cecca8ccab81f0e447 Mon Sep 17 00:00:00 2001
From: LZiBee <2736864745@qq.com>
Date: Mon, 20 Apr 2026 14:00:44 +0800
Subject: [PATCH] Avoid eager benchmark imports in CLI startup

---
 README.md                              |  2 ++
 vllm/entrypoints/cli/__init__.py       | 25 +++++++++--------------
 vllm/entrypoints/cli/benchmark/main.py | 28 ++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 5331776..8689007 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@
   不再直接判定为非 CUDA 平台
 - 改为回退到 `torch.cuda.is_available()` 和
   `torch.cuda.device_count()` 继续判断 CUDA 是否可用
+- 调整 CLI 初始化逻辑，避免 benchmark 可选依赖缺失时阻塞
+  `vllm serve ...` 启动
 
 这个修复用于解决如下启动失败：
 
diff --git a/vllm/entrypoints/cli/__init__.py b/vllm/entrypoints/cli/__init__.py
index 704d94d..ff5a928 100644
--- a/vllm/entrypoints/cli/__init__.py
+++ b/vllm/entrypoints/cli/__init__.py
@@ -1,19 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
-from vllm.entrypoints.cli.benchmark.mm_processor import (
-    BenchmarkMMProcessorSubcommand,
-)
-from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
-from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand
-from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
-from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
 
-__all__: list[str] = [
-    "BenchmarkLatencySubcommand",
-    "BenchmarkMMProcessorSubcommand",
-    "BenchmarkServingSubcommand",
-    "BenchmarkStartupSubcommand",
-    "BenchmarkSweepSubcommand",
-    "BenchmarkThroughputSubcommand",
-]
+# Keep this package init import-free.
+#
+# The `vllm` console script imports `vllm.entrypoints.cli.main`, which causes
+# Python to import this package before loading the `main` submodule.
+# Eagerly importing benchmark subcommands here makes every `vllm serve ...`
+# startup depend on optional benchmark-only modules.
+#
+# Benchmark subcommands are loaded on demand in
+# `vllm.entrypoints.cli.benchmark.main`.
diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py
index 48f34fc..ae490ba 100644
--- a/vllm/entrypoints/cli/benchmark/main.py
+++ b/vllm/entrypoints/cli/benchmark/main.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import argparse
+import importlib
+import logging
 import typing
 
 from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
@@ -13,6 +15,30 @@ if typing.TYPE_CHECKING:
 else:
     FlexibleArgumentParser = argparse.ArgumentParser
 
+logger = logging.getLogger(__name__)
+
+
+def _load_benchmark_subcommands() -> None:
+    modules = [
+        "vllm.entrypoints.cli.benchmark.latency",
+        "vllm.entrypoints.cli.benchmark.mm_processor",
+        "vllm.entrypoints.cli.benchmark.serve",
+        "vllm.entrypoints.cli.benchmark.startup",
+        "vllm.entrypoints.cli.benchmark.sweep",
+        "vllm.entrypoints.cli.benchmark.throughput",
+    ]
+
+    for module_name in modules:
+        try:
+            importlib.import_module(module_name)
+        except ModuleNotFoundError as e:
+            logger.warning(
+                "Skipping benchmark subcommand module %s because an optional "
+                "dependency could not be imported: %r",
+                module_name,
+                e,
+            )
+
 
 class BenchmarkSubcommand(CLISubcommand):
     """The `bench` subcommand for the vLLM CLI."""
@@ -38,6 +64,8 @@ class BenchmarkSubcommand(CLISubcommand):
         )
         bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type")
 
+        _load_benchmark_subcommands()
+
         for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
             cmd_subparser = bench_subparsers.add_parser(
                 cmd_cls.name,