Add minimal vLLM 0.16.1 build repo for BI-V150

This commit is contained in:
2026-04-18 10:56:22 +08:00
commit d69657327e
1895 changed files with 615301 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

View File

@@ -0,0 +1,187 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import os
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
from openai.types.responses.response_function_tool_call_output_item import (
ResponseFunctionToolCallOutputItem,
)
from openai_harmony import Author, Message, Role, TextContent
from vllm.logger import init_logger
from vllm.utils import random_uuid
if TYPE_CHECKING:
# Avoid circular import.
from vllm.entrypoints.openai.responses.context import ConversationContext
logger = init_logger(__name__)
MIN_GPT_OSS_VERSION = "0.0.7"
def validate_gpt_oss_install():
"""
Check if the gpt-oss is installed and its version is at least 0.0.7.
If not, raise an ImportError.
"""
from importlib.metadata import PackageNotFoundError, version
from packaging.version import InvalidVersion, Version
try:
pkg_version_str = version("gpt_oss")
pkg_version = Version(pkg_version_str)
except PackageNotFoundError:
raise ImportError("Package 'gpt_oss' is not installed.") from None
except InvalidVersion as e:
raise ImportError(f"Invalid version string for 'gpt_oss': {e}") from None
if pkg_version < Version(MIN_GPT_OSS_VERSION):
raise ImportError(
f"gpt_oss >= {MIN_GPT_OSS_VERSION} is required, "
f"but {pkg_version} is installed."
) from None
class Tool(ABC):
@abstractmethod
async def get_result(self, context: "ConversationContext") -> Any:
pass
@abstractmethod
async def get_result_parsable_context(self, context: "ConversationContext") -> Any:
pass
class HarmonyBrowserTool(Tool):
def __init__(self):
self.enabled = True
exa_api_key = os.getenv("EXA_API_KEY")
if not exa_api_key:
self.enabled = False
logger.warning_once("EXA_API_KEY is not set, browsing is disabled")
return
try:
validate_gpt_oss_install()
from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import ExaBackend
except ImportError as e:
self.enabled = False
logger.warning_once(
"gpt_oss is not installed properly (%s), browsing is disabled", e
)
return
browser_backend = ExaBackend(source="web", api_key=exa_api_key)
self.browser_tool = SimpleBrowserTool(backend=browser_backend)
logger.info_once("Browser tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from vllm.entrypoints.openai.responses.context import HarmonyContext
assert isinstance(context, HarmonyContext)
last_msg = context.messages[-1]
tool_output_msgs = []
async for msg in self.browser_tool.process(last_msg):
tool_output_msgs.append(msg)
return tool_output_msgs
async def get_result_parsable_context(self, context: "ConversationContext") -> Any:
raise NotImplementedError("Not implemented yet")
@property
def tool_config(self) -> Any:
return self.browser_tool.tool_config
class HarmonyPythonTool(Tool):
def __init__(self):
self.enabled = True
try:
validate_gpt_oss_install()
from gpt_oss.tools.python_docker.docker_tool import PythonTool
except ImportError as e:
self.enabled = False
logger.warning_once(
"gpt_oss is not installed properly (%s), code interpreter is disabled",
e,
)
return
self.python_tool = PythonTool()
async def validate(self):
if not self.enabled:
return
try:
message = Message(
author=Author(role=Role.ASSISTANT),
content=[TextContent(text="print('Hello, world!')")],
channel="analysis",
recipient="python",
content_type="code",
)
msgs = []
async for msg in self.python_tool.process(message):
msgs.append(msg)
assert msgs[0].content[0].text == "Hello, world!\n"
except Exception as e:
self.enabled = False
logger.warning_once(
"Code interpreter tool failed to initialize (%s), code "
"interpreter is disabled",
e,
)
return
logger.info_once("Code interpreter tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from vllm.entrypoints.openai.responses.context import HarmonyContext
assert isinstance(context, HarmonyContext)
last_msg = context.messages[-1]
tool_output_msgs = []
async for msg in self.python_tool.process(last_msg):
tool_output_msgs.append(msg)
return tool_output_msgs
async def get_result_parsable_context(self, context: "ConversationContext") -> Any:
"""
This function converts parsable context types to harmony and
back so we can use GPTOSS demo python tool
"""
from vllm.entrypoints.openai.responses.context import ParsableContext
assert isinstance(context, ParsableContext)
last_msg = context.parser.response_messages[-1]
args = json.loads(last_msg.arguments)
last_msg_harmony = Message(
author=Author(role="assistant", name=None),
content=[TextContent(text=args["code"])],
channel="analysis",
recipient="python",
content_type="code",
)
tool_output_msgs = []
async for msg in self.python_tool.process(last_msg_harmony):
processed = ResponseFunctionToolCallOutputItem(
id=f"fco_{random_uuid()}",
type="function_call_output",
call_id=f"call_{random_uuid()}",
output=msg.content[0].text,
status="completed",
)
tool_output_msgs.append(processed)
return tool_output_msgs
@property
def tool_config(self) -> Any:
return self.python_tool.tool_config

View File

@@ -0,0 +1,234 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import ABC, abstractmethod
from contextlib import AbstractAsyncContextManager, asynccontextmanager
from typing import TYPE_CHECKING, Any
from openai_harmony import ToolDescription, ToolNamespaceConfig
from vllm.entrypoints.mcp.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
from vllm.logger import init_logger
logger = init_logger(__name__)
if TYPE_CHECKING:
from mcp.types import ListToolsResult
async def list_server_and_tools(server_url: str):
from mcp import ClientSession
from mcp.client.sse import sse_client
async with (
sse_client(url=server_url) as streams,
ClientSession(*streams) as session,
):
initialize_response = await session.initialize()
list_tools_response = await session.list_tools()
return initialize_response, list_tools_response
def trim_schema(schema: dict) -> dict:
# Turn JSON Schema from MCP generated into Harmony's variant.
if "title" in schema:
del schema["title"]
if "default" in schema and schema["default"] is None:
del schema["default"]
if "anyOf" in schema:
# Turn "anyOf": [{"type": "type-1"}, {"type": "type-2"}]
# into "type": ["type-1", "type-2"]
# if there's more than 1 types, also remove "null" type as Harmony will
# just ignore it
types = [
type_dict["type"]
for type_dict in schema["anyOf"]
if type_dict["type"] != "null"
]
schema["type"] = types
del schema["anyOf"]
if "properties" in schema:
schema["properties"] = {
k: trim_schema(v) for k, v in schema["properties"].items()
}
return schema
def post_process_tools_description(
list_tools_result: "ListToolsResult",
) -> "ListToolsResult":
# Adapt the MCP tool result for Harmony
for tool in list_tools_result.tools:
tool.inputSchema = trim_schema(tool.inputSchema)
# Some tools schema don't need to be part of the prompt (e.g. simple text
# in text out for Python)
list_tools_result.tools = [
tool
for tool in list_tools_result.tools
if getattr(tool.annotations, "include_in_prompt", True)
]
return list_tools_result
class ToolServer(ABC):
@abstractmethod
def has_tool(self, tool_name: str) -> bool:
"""
Return True if the tool is supported, False otherwise.
"""
pass
@abstractmethod
def get_tool_description(
self, tool_name: str, allowed_tools: list[str] | None = None
) -> ToolNamespaceConfig | None:
"""
Return the tool description for the given tool name.
If the tool is not supported, return None.
"""
pass
@abstractmethod
def new_session(
self, tool_name: str, session_id: str, headers: dict[str, str] | None = None
) -> AbstractAsyncContextManager[Any]:
"""
Create a session for the tool.
"""
...
class MCPToolServer(ToolServer):
def __init__(self):
try:
import mcp # noqa: F401
except ImportError:
raise ImportError(
"mcp is not installed. Please run `pip install mcp` to use "
"MCPToolServer."
) from None
self.harmony_tool_descriptions = {}
async def add_tool_server(self, server_url: str):
tool_urls = server_url.split(",")
self.harmony_tool_descriptions = {}
self.urls: dict[str, str] = {}
for url in tool_urls:
url = f"http://{url}/sse"
initialize_response, list_tools_response = await list_server_and_tools(url)
list_tools_response = post_process_tools_description(list_tools_response)
tool_from_mcp = ToolNamespaceConfig(
name=initialize_response.serverInfo.name,
description=initialize_response.instructions,
tools=[
ToolDescription.new(
name=tool.name,
description=tool.description,
parameters=tool.inputSchema,
)
for tool in list_tools_response.tools
],
)
self.harmony_tool_descriptions[tool_from_mcp.name] = tool_from_mcp
if tool_from_mcp.name not in self.urls:
self.urls[tool_from_mcp.name] = url
else:
logger.warning(
"Tool %s already exists. Ignoring duplicate tool server %s",
tool_from_mcp.name,
url,
)
logger.info(
"MCPToolServer initialized with tools: %s",
list(self.harmony_tool_descriptions.keys()),
)
def has_tool(self, tool_name: str):
return tool_name in self.harmony_tool_descriptions
def get_tool_description(
self,
server_label: str,
allowed_tools: list[str] | None = None,
) -> ToolNamespaceConfig | None:
cfg = self.harmony_tool_descriptions.get(server_label)
if cfg is None:
return None
# No restrictions: all tools from this MCP server
if allowed_tools is None:
return cfg
filtered = [t for t in cfg.tools if t.name in allowed_tools]
if not filtered:
return None
return ToolNamespaceConfig(
name=cfg.name,
description=cfg.description,
tools=filtered,
)
@asynccontextmanager
async def new_session(
self, tool_name: str, session_id: str, headers: dict[str, str] | None = None
):
from mcp import ClientSession
from mcp.client.sse import sse_client
url = self.urls.get(tool_name)
request_headers = {"x-session-id": session_id}
if headers is not None:
request_headers.update(headers)
if not url:
raise KeyError(f"Tool '{tool_name}' is not supported")
async with (
sse_client(url=url, headers=request_headers) as streams,
ClientSession(*streams) as session,
):
await session.initialize()
yield session
class DemoToolServer(ToolServer):
def __init__(self):
self.tools: dict[str, Tool] = {}
async def init_and_validate(self):
browser_tool = HarmonyBrowserTool()
python_tool = HarmonyPythonTool()
await python_tool.validate()
if browser_tool.enabled:
self.tools["browser"] = browser_tool
if python_tool.enabled:
self.tools["python"] = python_tool
logger.info(
"DemoToolServer initialized with tools: %s", list(self.tools.keys())
)
def has_tool(self, tool_name: str) -> bool:
return tool_name in self.tools
def get_tool_description(
self, tool_name: str, allowed_tools: list[str] | None = None
) -> ToolNamespaceConfig | None:
if tool_name not in self.tools:
return None
if tool_name == "browser":
return ToolNamespaceConfig.browser()
elif tool_name == "python":
return ToolNamespaceConfig.python()
else:
raise ValueError(f"Unknown tool {tool_name}")
@asynccontextmanager
async def new_session(
self, tool_name: str, session_id: str, headers: dict[str, str] | None = None
):
if tool_name not in self.tools:
raise KeyError(f"Tool '{tool_name}' is not supported")
yield self.tools[tool_name]