Avoid eager benchmark imports in CLI startup

This commit is contained in:
2026-04-20 14:00:44 +08:00
parent 91a15dfef1
commit 8fac6062e4
3 changed files with 39 additions and 16 deletions

View File

@@ -21,6 +21,8 @@
不再直接判定为非 CUDA 平台
- 改为回退到 `torch.cuda.is_available()`
`torch.cuda.device_count()` 继续判断 CUDA 是否可用
- 调整 CLI 初始化逻辑,避免 benchmark 可选依赖缺失时阻塞
`vllm serve ...` 启动
这个修复用于解决如下启动失败:

View File

@@ -1,19 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
from vllm.entrypoints.cli.benchmark.mm_processor import (
BenchmarkMMProcessorSubcommand,
)
from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand
from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
__all__: list[str] = [
"BenchmarkLatencySubcommand",
"BenchmarkMMProcessorSubcommand",
"BenchmarkServingSubcommand",
"BenchmarkStartupSubcommand",
"BenchmarkSweepSubcommand",
"BenchmarkThroughputSubcommand",
]
# Keep this package init import-free.
#
# The `vllm` console script imports `vllm.entrypoints.cli.main`, which causes
# Python to import this package before loading the `main` submodule.
# Eagerly importing benchmark subcommands here makes every `vllm serve ...`
# startup depend on optional benchmark-only modules.
#
# Benchmark subcommands are loaded on demand in
# `vllm.entrypoints.cli.benchmark.main`.

View File

@@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse
import importlib
import logging
import typing
from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
@@ -13,6 +15,30 @@ if typing.TYPE_CHECKING:
else:
FlexibleArgumentParser = argparse.ArgumentParser
logger = logging.getLogger(__name__)
def _load_benchmark_subcommands() -> None:
modules = [
"vllm.entrypoints.cli.benchmark.latency",
"vllm.entrypoints.cli.benchmark.mm_processor",
"vllm.entrypoints.cli.benchmark.serve",
"vllm.entrypoints.cli.benchmark.startup",
"vllm.entrypoints.cli.benchmark.sweep",
"vllm.entrypoints.cli.benchmark.throughput",
]
for module_name in modules:
try:
importlib.import_module(module_name)
except ModuleNotFoundError as e:
logger.warning(
"Skipping benchmark subcommand module %s because an optional "
"dependency could not be imported: %r",
module_name,
e,
)
class BenchmarkSubcommand(CLISubcommand):
"""The `bench` subcommand for the vLLM CLI."""
@@ -38,6 +64,8 @@ class BenchmarkSubcommand(CLISubcommand):
)
bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type")
_load_benchmark_subcommands()
for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
cmd_subparser = bench_subparsers.add_parser(
cmd_cls.name,