Iluvatar-mrv100 SDK 4.3.0

This commit is contained in:
2025-09-15 14:58:11 +08:00
parent 9efe891f99
commit 8af8290b1d
1052 changed files with 294967 additions and 1 deletions

View File

View File

@@ -0,0 +1,37 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
from vllm.entrypoints.cli.types import CLISubcommand
from vllm.utils import FlexibleArgumentParser
class BenchmarkSubcommandBase(CLISubcommand):
""" The base class of subcommands for vllm bench. """
@property
def help(self) -> str:
"""The help message of the subcommand."""
raise NotImplementedError
def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
"""Add the CLI arguments to the parser."""
raise NotImplementedError
@staticmethod
def cmd(args: argparse.Namespace) -> None:
"""Run the benchmark.
Args:
args: The arguments to the command.
"""
raise NotImplementedError
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
parser = subparsers.add_parser(
self.name,
help=self.help,
usage=f"vllm bench {self.name} [options]")
self.add_cli_args(parser)
return parser

View File

@@ -0,0 +1,50 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
import vllm.entrypoints.cli.benchmark.serve
from vllm.entrypoints.cli.types import CLISubcommand
from vllm.utils import FlexibleArgumentParser
# TODO: Add the rest of the benchmark subcommands here,
# e.g., throughput, latency, etc.
BENCHMARK_CMD_MODULES = [
vllm.entrypoints.cli.benchmark.serve,
]
class BenchmarkSubcommand(CLISubcommand):
""" The `bench` subcommand for the vLLM CLI. """
def __init__(self):
self.name = "bench"
super().__init__()
@staticmethod
def cmd(args: argparse.Namespace) -> None:
args.dispatch_function(args)
def validate(self, args: argparse.Namespace) -> None:
if args.bench_type in self.cmds:
self.cmds[args.bench_type].validate(args)
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
bench_parser = subparsers.add_parser(
"bench",
help="vLLM bench subcommand.",
usage="vllm bench <bench_type> [options]")
bench_subparsers = bench_parser.add_subparsers(required=True,
dest="bench_type")
self.cmds = {}
for cmd_module in BENCHMARK_CMD_MODULES:
new_cmds = cmd_module.cmd_init()
for cmd in new_cmds:
cmd.subparser_init(bench_subparsers).set_defaults(
dispatch_function=cmd.cmd)
self.cmds[cmd.name] = cmd
return bench_parser
def cmd_init() -> list[CLISubcommand]:
return [BenchmarkSubcommand()]

View File

@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
from vllm.benchmarks.serve import add_cli_args, main
from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
from vllm.entrypoints.cli.types import CLISubcommand
class BenchmarkServingSubcommand(BenchmarkSubcommandBase):
""" The `serve` subcommand for vllm bench. """
def __init__(self):
self.name = "serve"
super().__init__()
@property
def help(self) -> str:
return "Benchmark the online serving throughput."
def add_cli_args(self, parser: argparse.ArgumentParser) -> None:
add_cli_args(parser)
@staticmethod
def cmd(args: argparse.Namespace) -> None:
main(args)
def cmd_init() -> list[CLISubcommand]:
return [BenchmarkServingSubcommand()]

View File

@@ -0,0 +1,57 @@
# SPDX-License-Identifier: Apache-2.0
# The CLI entrypoint to vLLM.
import signal
import sys
import vllm.entrypoints.cli.benchmark.main
import vllm.entrypoints.cli.openai
import vllm.entrypoints.cli.serve
import vllm.version
from vllm.entrypoints.utils import cli_env_setup
from vllm.utils import FlexibleArgumentParser
CMD_MODULES = [
vllm.entrypoints.cli.openai,
vllm.entrypoints.cli.serve,
vllm.entrypoints.cli.benchmark.main,
]
def register_signal_handlers():
def signal_handler(sig, frame):
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTSTP, signal_handler)
def main():
cli_env_setup()
parser = FlexibleArgumentParser(description="vLLM CLI")
parser.add_argument('-v',
'--version',
action='version',
version=vllm.version.__version__)
subparsers = parser.add_subparsers(required=False, dest="subparser")
cmds = {}
for cmd_module in CMD_MODULES:
new_cmds = cmd_module.cmd_init()
for cmd in new_cmds:
cmd.subparser_init(subparsers).set_defaults(
dispatch_function=cmd.cmd)
cmds[cmd.name] = cmd
args = parser.parse_args()
if args.subparser in cmds:
cmds[args.subparser].validate(args)
if hasattr(args, "dispatch_function"):
args.dispatch_function(args)
else:
parser.print_help()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,172 @@
# SPDX-License-Identifier: Apache-2.0
# Commands that act as an interactive OpenAI API client
import argparse
import os
import signal
import sys
from typing import Optional
from openai import OpenAI
from openai.types.chat import ChatCompletionMessageParam
from vllm.entrypoints.cli.types import CLISubcommand
from vllm.utils import FlexibleArgumentParser
def _register_signal_handlers():
def signal_handler(sig, frame):
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTSTP, signal_handler)
def _interactive_cli(args: argparse.Namespace) -> tuple[str, OpenAI]:
_register_signal_handlers()
base_url = args.url
api_key = args.api_key or os.environ.get("OPENAI_API_KEY", "EMPTY")
openai_client = OpenAI(api_key=api_key, base_url=base_url)
if args.model_name:
model_name = args.model_name
else:
available_models = openai_client.models.list()
model_name = available_models.data[0].id
print(f"Using model: {model_name}")
return model_name, openai_client
def chat(system_prompt: Optional[str], model_name: str,
client: OpenAI) -> None:
conversation: list[ChatCompletionMessageParam] = []
if system_prompt is not None:
conversation.append({"role": "system", "content": system_prompt})
print("Please enter a message for the chat model:")
while True:
try:
input_message = input("> ")
except EOFError:
return
conversation.append({"role": "user", "content": input_message})
chat_completion = client.chat.completions.create(model=model_name,
messages=conversation)
response_message = chat_completion.choices[0].message
output = response_message.content
conversation.append(response_message) # type: ignore
print(output)
def _add_query_options(
parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
"--url",
type=str,
default="http://localhost:8000/v1",
help="url of the running OpenAI-Compatible RESTful API server")
parser.add_argument(
"--model-name",
type=str,
default=None,
help=("The model name used in prompt completion, default to "
"the first model in list models API call."))
parser.add_argument(
"--api-key",
type=str,
default=None,
help=(
"API key for OpenAI services. If provided, this api key "
"will overwrite the api key obtained through environment variables."
))
return parser
class ChatCommand(CLISubcommand):
"""The `chat` subcommand for the vLLM CLI. """
def __init__(self):
self.name = "chat"
super().__init__()
@staticmethod
def cmd(args: argparse.Namespace) -> None:
model_name, client = _interactive_cli(args)
system_prompt = args.system_prompt
conversation: list[ChatCompletionMessageParam] = []
if system_prompt is not None:
conversation.append({"role": "system", "content": system_prompt})
print("Please enter a message for the chat model:")
while True:
try:
input_message = input("> ")
except EOFError:
return
conversation.append({"role": "user", "content": input_message})
chat_completion = client.chat.completions.create(
model=model_name, messages=conversation)
response_message = chat_completion.choices[0].message
output = response_message.content
conversation.append(response_message) # type: ignore
print(output)
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
chat_parser = subparsers.add_parser(
"chat",
help="Generate chat completions via the running API server",
usage="vllm chat [options]")
_add_query_options(chat_parser)
chat_parser.add_argument(
"--system-prompt",
type=str,
default=None,
help=("The system prompt to be added to the chat template, "
"used for models that support system prompts."))
return chat_parser
class CompleteCommand(CLISubcommand):
"""The `complete` subcommand for the vLLM CLI. """
def __init__(self):
self.name = "complete"
super().__init__()
@staticmethod
def cmd(args: argparse.Namespace) -> None:
model_name, client = _interactive_cli(args)
print("Please enter prompt to complete:")
while True:
input_prompt = input("> ")
completion = client.completions.create(model=model_name,
prompt=input_prompt)
output = completion.choices[0].text
print(output)
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
complete_parser = subparsers.add_parser(
"complete",
help=("Generate text completions based on the given prompt "
"via the running API server"),
usage="vllm complete [options]")
_add_query_options(complete_parser)
return complete_parser
def cmd_init() -> list[CLISubcommand]:
return [ChatCommand(), CompleteCommand()]

View File

@@ -0,0 +1,58 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
import uvloop
from vllm.entrypoints.cli.types import CLISubcommand
from vllm.entrypoints.openai.api_server import run_server
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
validate_parsed_serve_args)
from vllm.utils import FlexibleArgumentParser
class ServeSubcommand(CLISubcommand):
"""The `serve` subcommand for the vLLM CLI. """
def __init__(self):
self.name = "serve"
super().__init__()
@staticmethod
def cmd(args: argparse.Namespace) -> None:
# If model is specified in CLI (as positional arg), it takes precedence
if hasattr(args, 'model_tag') and args.model_tag is not None:
args.model = args.model_tag
uvloop.run(run_server(args))
def validate(self, args: argparse.Namespace) -> None:
validate_parsed_serve_args(args)
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
serve_parser = subparsers.add_parser(
"serve",
help="Start the vLLM OpenAI Compatible API server",
usage="vllm serve [model_tag] [options]")
serve_parser.add_argument("model_tag",
type=str,
nargs='?',
help="The model tag to serve "
"(optional if specified in config)")
serve_parser.add_argument(
"--config",
type=str,
default='',
required=False,
help="Read CLI options from a config file."
"Must be a YAML with the following options:"
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#cli-reference"
)
return make_arg_parser(serve_parser)
def cmd_init() -> list[CLISubcommand]:
return [ServeSubcommand()]

View File

@@ -0,0 +1,24 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
from vllm.utils import FlexibleArgumentParser
class CLISubcommand:
"""Base class for CLI argument handlers."""
name: str
@staticmethod
def cmd(args: argparse.Namespace) -> None:
raise NotImplementedError("Subclasses should implement this method")
def validate(self, args: argparse.Namespace) -> None:
# No validation by default
pass
def subparser_init(
self,
subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
raise NotImplementedError("Subclasses should implement this method")