[gpt-oss] Add gpt-oss bf16 support

2025-08-13 21:25:57 +08:00
parent 5d2e7edf78
commit 17ea2ec6aa
1232 changed files with 777 additions and 36 deletions
--- a/vllm/entrypoints/cli/benchmark/init.py
+++ b/vllm/entrypoints/cli/benchmark/init.py
--- a/vllm/entrypoints/cli/benchmark/base.py
+++ b/vllm/entrypoints/cli/benchmark/base.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.entrypoints.cli.types import CLISubcommand
+from vllm.utils import FlexibleArgumentParser
+
+
+class BenchmarkSubcommandBase(CLISubcommand):
+    """ The base class of subcommands for vllm bench. """
+
+    @property
+    def help(self) -> str:
+        """The help message of the subcommand."""
+        raise NotImplementedError
+
+    def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
+        """Add the CLI arguments to the parser."""
+        raise NotImplementedError
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        """Run the benchmark.
+
+        Args:
+            args: The arguments to the command.
+        """
+        raise NotImplementedError
+
+    def subparser_init(
+            self,
+            subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
+        parser = subparsers.add_parser(
+            self.name,
+            help=self.help,
+            description=self.help,
+            usage=f"vllm bench {self.name} [options]")
+        self.add_cli_args(parser)
+        return parser
--- a/vllm/entrypoints/cli/benchmark/latency.py
+++ b/vllm/entrypoints/cli/benchmark/latency.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.benchmarks.latency import add_cli_args, main
+from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
+from vllm.entrypoints.cli.types import CLISubcommand
+
+
+class BenchmarkLatencySubcommand(BenchmarkSubcommandBase):
+    """ The `latency` subcommand for vllm bench. """
+
+    def __init__(self):
+        self.name = "latency"
+        super().__init__()
+
+    @property
+    def help(self) -> str:
+        return "Benchmark the latency of a single batch of requests."
+
+    def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
+        add_cli_args(parser)
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        main(args)
+
+
+def cmd_init() -> list[CLISubcommand]:
+    return [BenchmarkLatencySubcommand()]
--- a/vllm/entrypoints/cli/benchmark/main.py
+++ b/vllm/entrypoints/cli/benchmark/main.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+import vllm.entrypoints.cli.benchmark.latency
+import vllm.entrypoints.cli.benchmark.serve
+import vllm.entrypoints.cli.benchmark.throughput
+from vllm.entrypoints.cli.types import CLISubcommand
+from vllm.utils import FlexibleArgumentParser
+
+BENCHMARK_CMD_MODULES = [
+    vllm.entrypoints.cli.benchmark.latency,
+    vllm.entrypoints.cli.benchmark.serve,
+    vllm.entrypoints.cli.benchmark.throughput,
+]
+
+
+class BenchmarkSubcommand(CLISubcommand):
+    """ The `bench` subcommand for the vLLM CLI. """
+
+    def __init__(self):
+        self.name = "bench"
+        super().__init__()
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        args.dispatch_function(args)
+
+    def validate(self, args: argparse.Namespace) -> None:
+        if args.bench_type in self.cmds:
+            self.cmds[args.bench_type].validate(args)
+
+    def subparser_init(
+            self,
+            subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
+        bench_parser = subparsers.add_parser(
+            "bench",
+            help="vLLM bench subcommand.",
+            description="vLLM bench subcommand.",
+            usage="vllm bench <bench_type> [options]")
+        bench_subparsers = bench_parser.add_subparsers(required=True,
+                                                       dest="bench_type")
+        self.cmds = {}
+        for cmd_module in BENCHMARK_CMD_MODULES:
+            new_cmds = cmd_module.cmd_init()
+            for cmd in new_cmds:
+                cmd.subparser_init(bench_subparsers).set_defaults(
+                    dispatch_function=cmd.cmd)
+                self.cmds[cmd.name] = cmd
+        return bench_parser
+
+
+def cmd_init() -> list[CLISubcommand]:
+    return [BenchmarkSubcommand()]
--- a/vllm/entrypoints/cli/benchmark/serve.py
+++ b/vllm/entrypoints/cli/benchmark/serve.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.benchmarks.serve import add_cli_args, main
+from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
+from vllm.entrypoints.cli.types import CLISubcommand
+
+
+class BenchmarkServingSubcommand(BenchmarkSubcommandBase):
+    """ The `serve` subcommand for vllm bench. """
+
+    def __init__(self):
+        self.name = "serve"
+        super().__init__()
+
+    @property
+    def help(self) -> str:
+        return "Benchmark the online serving throughput."
+
+    def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
+        add_cli_args(parser)
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        main(args)
+
+
+def cmd_init() -> list[CLISubcommand]:
+    return [BenchmarkServingSubcommand()]
--- a/vllm/entrypoints/cli/benchmark/throughput.py
+++ b/vllm/entrypoints/cli/benchmark/throughput.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+
+from vllm.benchmarks.throughput import add_cli_args, main
+from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
+from vllm.entrypoints.cli.types import CLISubcommand
+
+
+class BenchmarkThroughputSubcommand(BenchmarkSubcommandBase):
+    """ The `throughput` subcommand for vllm bench. """
+
+    def __init__(self):
+        self.name = "throughput"
+        super().__init__()
+
+    @property
+    def help(self) -> str:
+        return "Benchmark offline inference throughput."
+
+    def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
+        add_cli_args(parser)
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        main(args)
+
+
+def cmd_init() -> list[CLISubcommand]:
+    return [BenchmarkThroughputSubcommand()]