Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@@ -0,0 +1,243 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import importlib.metadata
+import importlib.util
+import logging
+import sys
+import traceback
+from argparse import SUPPRESS, Action, HelpFormatter
+from collections.abc import Iterable
+from importlib.machinery import ModuleSpec
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal
+from unittest.mock import MagicMock, patch
+
+from pydantic_core import core_schema
+
+logger = logging.getLogger("mkdocs")
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent
+ARGPARSE_DOC_DIR = ROOT_DIR / "docs/argparse"
+
+sys.path.insert(0, str(ROOT_DIR))
+
+
+def mock_if_no_torch(mock_module: str, mock: MagicMock):
+    if not importlib.util.find_spec("torch"):
+        sys.modules[mock_module] = mock
+
+
+# Mock custom op code
+class MockCustomOp:
+    @staticmethod
+    def register(name):
+        def decorator(cls):
+            return cls
+
+        return decorator
+
+
+mock_if_no_torch("vllm._C", MagicMock())
+mock_if_no_torch("vllm.model_executor.custom_op", MagicMock(CustomOp=MockCustomOp))
+mock_if_no_torch(
+    "vllm.utils.torch_utils", MagicMock(direct_register_custom_op=lambda *a, **k: None)
+)
+
+
+# Mock any version checks by reading from compiled CI requirements
+with open(ROOT_DIR / "requirements/test.txt") as f:
+    VERSIONS = dict(line.strip().split("==") for line in f if "==" in line)
+importlib.metadata.version = lambda name: VERSIONS.get(name) or "0.0.0"
+
+
+# Make torch.nn.Parameter safe to inherit from
+mock_if_no_torch("torch.nn", MagicMock(Parameter=object))
+
+
+class PydanticMagicMock(MagicMock):
+    """`MagicMock` that's able to generate pydantic-core schemas."""
+
+    def __init__(self, *args, **kwargs):
+        name = kwargs.pop("name", None)
+        super().__init__(*args, **kwargs)
+        self.__spec__ = ModuleSpec(name, None)
+
+    def __get_pydantic_core_schema__(self, source_type, handler):
+        return core_schema.any_schema()
+
+
+def auto_mock(module_name: str, attr: str, max_mocks: int = 100):
+    """Function that automatically mocks missing modules during imports."""
+    logger.info("Importing %s from %s", attr, module_name)
+
+    for _ in range(max_mocks):
+        try:
+            module = importlib.import_module(module_name)
+
+            # First treat attr as an attr, then as a submodule
+            if hasattr(module, attr):
+                return getattr(module, attr)
+
+            return importlib.import_module(f"{module_name}.{attr}")
+        except ModuleNotFoundError as e:
+            assert e.name is not None
+            logger.info("Mocking %s for argparse doc generation", e.name)
+            sys.modules[e.name] = PydanticMagicMock(name=e.name)
+        except Exception:
+            logger.exception("Failed to import %s.%s: %s", module_name, attr)
+
+    raise ImportError(
+        f"Failed to import {module_name}.{attr} after mocking {max_mocks} imports"
+    )
+
+
+bench_latency = auto_mock("vllm.benchmarks", "latency")
+bench_serve = auto_mock("vllm.benchmarks", "serve")
+bench_sweep_plot = auto_mock("vllm.benchmarks.sweep.plot", "SweepPlotArgs")
+bench_sweep_plot_pareto = auto_mock(
+    "vllm.benchmarks.sweep.plot_pareto", "SweepPlotParetoArgs"
+)
+bench_sweep_serve = auto_mock("vllm.benchmarks.sweep.serve", "SweepServeArgs")
+bench_sweep_serve_sla = auto_mock(
+    "vllm.benchmarks.sweep.serve_sla", "SweepServeSLAArgs"
+)
+bench_throughput = auto_mock("vllm.benchmarks", "throughput")
+AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
+EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
+ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
+CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
+openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
+openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
+
+if TYPE_CHECKING:
+    from vllm.utils.argparse_utils import FlexibleArgumentParser
+else:
+    FlexibleArgumentParser = auto_mock(
+        "vllm.utils.argparse_utils", "FlexibleArgumentParser"
+    )
+
+
+class MarkdownFormatter(HelpFormatter):
+    """Custom formatter that generates markdown for argument groups."""
+
+    def __init__(self, prog: str, starting_heading_level: int = 3):
+        super().__init__(prog, max_help_position=sys.maxsize, width=sys.maxsize)
+
+        self._section_heading_prefix = "#" * starting_heading_level
+        self._argument_heading_prefix = "#" * (starting_heading_level + 1)
+        self._markdown_output = []
+
+    def start_section(self, heading: str):
+        if heading not in {"positional arguments", "options"}:
+            heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
+            self._markdown_output.append(heading_md)
+
+    def end_section(self):
+        pass
+
+    def add_text(self, text: str):
+        if text:
+            self._markdown_output.append(f"{text.strip()}\n\n")
+
+    def add_usage(self, usage, actions, groups, prefix=None):
+        pass
+
+    def add_arguments(self, actions: Iterable[Action]):
+        for action in actions:
+            if len(action.option_strings) == 0 or "--help" in action.option_strings:
+                continue
+
+            option_strings = f"`{'`, `'.join(action.option_strings)}`"
+            heading_md = f"{self._argument_heading_prefix} {option_strings}\n\n"
+            self._markdown_output.append(heading_md)
+
+            if choices := action.choices:
+                choices = f"`{'`, `'.join(str(c) for c in choices)}`"
+                self._markdown_output.append(f"Possible choices: {choices}\n\n")
+            elif (metavar := action.metavar) and isinstance(metavar, (list, tuple)):
+                metavar = f"`{'`, `'.join(str(m) for m in metavar)}`"
+                self._markdown_output.append(f"Possible choices: {metavar}\n\n")
+
+            if action.help:
+                self._markdown_output.append(f"{action.help}\n\n")
+
+            if (default := action.default) != SUPPRESS:
+                # Make empty string defaults visible
+                if default == "":
+                    default = '""'
+                self._markdown_output.append(f"Default: `{default}`\n\n")
+
+    def format_help(self):
+        """Return the formatted help as markdown."""
+        return "".join(self._markdown_output)
+
+
+def create_parser(add_cli_args, **kwargs) -> FlexibleArgumentParser:
+    """Create a parser for the given class with markdown formatting.
+
+    Args:
+        cls: The class to create a parser for
+        **kwargs: Additional keyword arguments to pass to `cls.add_cli_args`.
+
+    Returns:
+        FlexibleArgumentParser: A parser with markdown formatting for the class.
+    """
+    try:
+        parser = FlexibleArgumentParser(add_json_tip=False)
+        parser.formatter_class = MarkdownFormatter
+        with patch("vllm.config.DeviceConfig.__post_init__"):
+            _parser = add_cli_args(parser, **kwargs)
+    except ModuleNotFoundError as e:
+        # Auto-mock runtime imports
+        if tb_list := traceback.extract_tb(e.__traceback__):
+            path = Path(tb_list[-1].filename).relative_to(ROOT_DIR)
+            auto_mock(module_name=".".join(path.parent.parts), attr=path.stem)
+            return create_parser(add_cli_args, **kwargs)
+        else:
+            raise e
+    # add_cli_args might be in-place so return parser if _parser is None
+    return _parser or parser
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    logger.info("Generating argparse documentation")
+    logger.debug("Root directory: %s", ROOT_DIR.resolve())
+    logger.debug("Output directory: %s", ARGPARSE_DOC_DIR.resolve())
+
+    # Create the ARGPARSE_DOC_DIR if it doesn't exist
+    if not ARGPARSE_DOC_DIR.exists():
+        ARGPARSE_DOC_DIR.mkdir(parents=True)
+
+    # Create parsers to document
+    parsers = {
+        # Engine args
+        "engine_args": create_parser(EngineArgs.add_cli_args),
+        "async_engine_args": create_parser(
+            AsyncEngineArgs.add_cli_args, async_args_only=True
+        ),
+        # CLI
+        "serve": create_parser(openai_cli_args.make_arg_parser),
+        "chat": create_parser(ChatCommand.add_cli_args),
+        "complete": create_parser(CompleteCommand.add_cli_args),
+        "run-batch": create_parser(openai_run_batch.make_arg_parser),
+        # Benchmark CLI
+        "bench_latency": create_parser(bench_latency.add_cli_args),
+        "bench_serve": create_parser(bench_serve.add_cli_args),
+        "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
+        "bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),
+        "bench_sweep_serve": create_parser(bench_sweep_serve.add_cli_args),
+        "bench_sweep_serve_sla": create_parser(bench_sweep_serve_sla.add_cli_args),
+        "bench_throughput": create_parser(bench_throughput.add_cli_args),
+    }
+
+    # Generate documentation for each parser
+    for stem, parser in parsers.items():
+        doc_path = ARGPARSE_DOC_DIR / f"{stem}.inc.md"
+        # Specify encoding for building on Windows
+        with open(doc_path, "w", encoding="utf-8") as f:
+            f.write(super(type(parser), parser).format_help())
+        logger.info("Argparse generated: %s", doc_path.relative_to(ROOT_DIR))
+
+
+if __name__ == "__main__":
+    on_startup("build", False)
--- a/docs/mkdocs/hooks/generate_examples.py
+++ b/docs/mkdocs/hooks/generate_examples.py
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import itertools
+import logging
+from dataclasses import dataclass
+from functools import cached_property
+from pathlib import Path
+from typing import Literal
+
+import regex as re
+
+logger = logging.getLogger("mkdocs")
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent
+ROOT_DIR_RELATIVE = "../../../../.."
+EXAMPLE_DIR = ROOT_DIR / "examples"
+EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples"
+
+
+def title(text: str) -> str:
+    # Default title case
+    text = text.replace("_", " ").replace("/", " - ").title()
+    # Custom substitutions
+    subs = {
+        "io": "IO",
+        "api": "API",
+        "cli": "CLI",
+        "cpu": "CPU",
+        "llm": "LLM",
+        "mae": "MAE",
+        "ner": "NER",
+        "tpu": "TPU",
+        "gguf": "GGUF",
+        "lora": "LoRA",
+        "rlhf": "RLHF",
+        "vllm": "vLLM",
+        "openai": "OpenAI",
+        "lmcache": "LMCache",
+        "multilora": "MultiLoRA",
+        "mlpspeculator": "MLPSpeculator",
+        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
+        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
+    }
+    for pattern, repl in subs.items():
+        text = re.sub(rf"\b{pattern}\b", repl, text, flags=re.IGNORECASE)
+    return text
+
+
+@dataclass
+class Example:
+    """
+    Example class for generating documentation content from a given path.
+
+    Attributes:
+        path (Path): The path to the main directory or file.
+        category (str): The category of the document.
+
+    Properties::
+        main_file() -> Path | None: Determines the main file in the given path.
+        other_files() -> list[Path]: Determines other files in the directory excluding
+        the main file.
+        title() -> str: Determines the title of the document.
+
+    Methods:
+        generate() -> str: Generates the documentation content.
+    """
+
+    path: Path
+    category: str
+
+    @cached_property
+    def main_file(self) -> Path | None:
+        """Determines the main file in the given path.
+
+        If path is a file, it returns the path itself. If path is a directory, it
+        searches for Markdown files (*.md) in the directory and returns the first one
+        found. If no Markdown files are found, it returns None."""
+        # Single file example
+        if self.path.is_file():
+            return self.path
+        # Multi file example with a README
+        if md_paths := list(self.path.glob("*.md")):
+            return md_paths[0]
+        # Multi file example without a README
+        return None
+
+    @cached_property
+    def other_files(self) -> list[Path]:
+        """Determine other files in the directory excluding the main file.
+
+        If path is a file, it returns an empty list. Otherwise, it returns every file
+        in the directory except the main file in a list."""
+        # Single file example
+        if self.path.is_file():
+            return []
+        # Multi file example
+        is_other_file = lambda file: file.is_file() and file != self.main_file
+        return sorted(file for file in self.path.rglob("*") if is_other_file(file))
+
+    @cached_property
+    def is_code(self) -> bool:
+        return self.main_file is not None and self.main_file.suffix != ".md"
+
+    @cached_property
+    def title(self) -> str:
+        # Generate title from filename if no main md file found
+        if self.main_file is None or self.is_code:
+            return title(self.path.stem)
+        # Specify encoding for building on Windows
+        with open(self.main_file, encoding="utf-8") as f:
+            first_line = f.readline().strip()
+        match = re.match(r"^#\s+(?P<title>.+)$", first_line)
+        if match:
+            return match.group("title")
+        raise ValueError(f"Title not found in {self.main_file}")
+
+    def fix_relative_links(self, content: str) -> str:
+        """
+        Fix relative links in markdown content by converting them to gh-file
+        format.
+
+        Args:
+            content (str): The markdown content to process
+
+        Returns:
+            str: Content with relative links converted to gh-file format
+        """
+        # Regex to match markdown links [text](relative_path)
+        # This matches links that don't start with http, https, ftp, or #
+        link_pattern = r"\[([^\]]*)\]\((?!(?:https?|ftp)://|#)([^)]+)\)"
+
+        def replace_link(match):
+            link_text = match.group(1)
+            relative_path = match.group(2)
+
+            # Make relative to repo root
+            gh_file = (self.main_file.parent / relative_path).resolve()
+            gh_file = gh_file.relative_to(ROOT_DIR)
+
+            # Make GitHub URL
+            url = "https://github.com/vllm-project/vllm/"
+            url += "tree/main" if self.path.is_dir() else "blob/main"
+            gh_url = f"{url}/{gh_file}"
+
+            return f"[{link_text}]({gh_url})"
+
+        return re.sub(link_pattern, replace_link, content)
+
+    def generate(self) -> str:
+        content = f"# {self.title}\n\n"
+        url = "https://github.com/vllm-project/vllm/"
+        url += "tree/main" if self.path.is_dir() else "blob/main"
+        content += f"Source <{url}/{self.path.relative_to(ROOT_DIR)}>.\n\n"
+
+        # Use long code fence to avoid issues with
+        # included files containing code fences too
+        code_fence = "``````"
+
+        if self.main_file is not None:
+            # Single file example or multi file example with a README
+            if self.is_code:
+                content += (
+                    f"{code_fence}{self.main_file.suffix[1:]}\n"
+                    f'--8<-- "{self.main_file}"\n'
+                    f"{code_fence}\n"
+                )
+            else:
+                with open(self.main_file, encoding="utf-8") as f:
+                    # Skip the title from md snippets as it's been included above
+                    main_content = f.readlines()[1:]
+                content += self.fix_relative_links("".join(main_content))
+            content += "\n"
+        else:
+            # Multi file example without a README
+            for file in self.other_files:
+                file_title = title(str(file.relative_to(self.path).with_suffix("")))
+                content += f"## {file_title}\n\n"
+                content += (
+                    f'{code_fence}{file.suffix[1:]}\n--8<-- "{file}"\n{code_fence}\n\n'
+                )
+            return content
+
+        if not self.other_files:
+            return content
+
+        content += "## Example materials\n\n"
+        for file in self.other_files:
+            content += f'??? abstract "{file.relative_to(self.path)}"\n'
+            if file.suffix != ".md":
+                content += f"    {code_fence}{file.suffix[1:]}\n"
+            content += f'    --8<-- "{file}"\n'
+            if file.suffix != ".md":
+                content += f"    {code_fence}\n"
+
+        return content
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    logger.info("Generating example documentation")
+    logger.debug("Root directory: %s", ROOT_DIR.resolve())
+    logger.debug("Example directory: %s", EXAMPLE_DIR.resolve())
+    logger.debug("Example document directory: %s", EXAMPLE_DOC_DIR.resolve())
+
+    # Create the EXAMPLE_DOC_DIR if it doesn't exist
+    if not EXAMPLE_DOC_DIR.exists():
+        EXAMPLE_DOC_DIR.mkdir(parents=True)
+
+    categories = sorted(p for p in EXAMPLE_DIR.iterdir() if p.is_dir())
+
+    examples = []
+    glob_patterns = ["*.py", "*.md", "*.sh"]
+    # Find categorised examples
+    for category in categories:
+        logger.info("Processing category: %s", category.stem)
+        globs = [category.glob(pattern) for pattern in glob_patterns]
+        for path in itertools.chain(*globs):
+            examples.append(Example(path, category.stem))
+        # Find examples in subdirectories
+        globs = [category.glob(f"*/{pattern}") for pattern in glob_patterns]
+        for path in itertools.chain(*globs):
+            examples.append(Example(path.parent, category.stem))
+
+    # Generate the example documentation
+    for example in sorted(examples, key=lambda e: e.path.stem):
+        example_name = f"{example.path.stem}.md"
+        doc_path = EXAMPLE_DOC_DIR / example.category / example_name
+        if not doc_path.parent.exists():
+            doc_path.parent.mkdir(parents=True)
+        # Specify encoding for building on Windows
+        with open(doc_path, "w+", encoding="utf-8") as f:
+            f.write(example.generate())
+        logger.debug("Example generated: %s", doc_path.relative_to(ROOT_DIR))
+    logger.info("Total examples generated: %d", len(examples))
--- a/docs/mkdocs/hooks/generate_metrics.py
+++ b/docs/mkdocs/hooks/generate_metrics.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import ast
+import logging
+from pathlib import Path
+from typing import Literal
+
+logger = logging.getLogger("mkdocs")
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent
+DOCS_DIR = ROOT_DIR / "docs"
+GENERATED_METRICS_DIR = DOCS_DIR / "generated" / "metrics"
+
+# Files to scan for metric definitions - each will generate a separate table
+METRIC_SOURCE_FILES = [
+    {"path": "vllm/v1/metrics/loggers.py", "output": "general.md"},
+    {
+        "path": "vllm/v1/spec_decode/metrics.py",
+        "output": "spec_decode.md",
+    },
+    {
+        "path": "vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py",
+        "output": "nixl_connector.md",
+    },
+]
+
+
+class MetricExtractor(ast.NodeVisitor):
+    """AST visitor to extract metric definitions."""
+
+    def __init__(self):
+        self.metrics: list[dict[str, str]] = []
+
+    def visit_Call(self, node: ast.Call) -> None:
+        """Visit function calls to find metric class instantiations."""
+        metric_type = self._get_metric_type(node)
+        if metric_type:
+            name = self._extract_kwarg(node, "name")
+            documentation = self._extract_kwarg(node, "documentation")
+
+            if name:
+                self.metrics.append(
+                    {
+                        "name": name,
+                        "type": metric_type,
+                        "documentation": documentation or "",
+                    }
+                )
+
+        self.generic_visit(node)
+
+    def _get_metric_type(self, node: ast.Call) -> str | None:
+        """Determine if this call creates a metric and return its type."""
+        metric_type_map = {
+            "_gauge_cls": "gauge",
+            "_counter_cls": "counter",
+            "_histogram_cls": "histogram",
+        }
+        if isinstance(node.func, ast.Attribute):
+            return metric_type_map.get(node.func.attr)
+        return None
+
+    def _extract_kwarg(self, node: ast.Call, key: str) -> str | None:
+        """Extract a keyword argument value from a function call."""
+        for keyword in node.keywords:
+            if keyword.arg == key:
+                return self._get_string_value(keyword.value)
+        return None
+
+    def _get_string_value(self, node: ast.AST) -> str | None:
+        """Extract string value from an AST node."""
+        if isinstance(node, ast.Constant):
+            return str(node.value) if node.value is not None else None
+        return None
+
+
+def extract_metrics_from_file(filepath: Path) -> list[dict[str, str]]:
+    """Parse a Python file and extract all metric definitions."""
+    try:
+        with open(filepath, encoding="utf-8") as f:
+            source = f.read()
+
+        tree = ast.parse(source, filename=str(filepath))
+        extractor = MetricExtractor()
+        extractor.visit(tree)
+        return extractor.metrics
+    except Exception as e:
+        raise RuntimeError(f"Failed to parse {filepath}: {e}") from e
+
+
+def generate_markdown_table(metrics: list[dict[str, str]]) -> str:
+    """Generate a markdown table from extracted metrics."""
+    if not metrics:
+        return "No metrics found.\n"
+
+    # Sort by type, then by name
+    metrics_sorted = sorted(metrics, key=lambda m: (m["type"], m["name"]))
+
+    lines = []
+    lines.append("| Metric Name | Type | Description |")
+    lines.append("|-------------|------|-------------|")
+
+    for metric in metrics_sorted:
+        name = metric["name"]
+        metric_type = metric["type"].capitalize()
+        doc = metric["documentation"].replace("\n", " ").strip()
+        lines.append(f"| `{name}` | {metric_type} | {doc} |")
+
+    return "\n".join(lines) + "\n"
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    """Generate metrics documentation tables from source files."""
+    logger.info("Generating metrics documentation")
+
+    # Create generated directory if it doesn't exist
+    GENERATED_METRICS_DIR.mkdir(parents=True, exist_ok=True)
+
+    total_metrics = 0
+    for source_config in METRIC_SOURCE_FILES:
+        source_path = source_config["path"]
+        output_file = source_config["output"]
+
+        filepath = ROOT_DIR / source_path
+        if not filepath.exists():
+            raise FileNotFoundError(f"Metrics source file not found: {filepath}")
+
+        logger.debug("Extracting metrics from: %s", source_path)
+        metrics = extract_metrics_from_file(filepath)
+        logger.debug("Found %d metrics in %s", len(metrics), source_path)
+
+        # Generate and write the markdown table for this source
+        table_content = generate_markdown_table(metrics)
+        output_path = GENERATED_METRICS_DIR / output_file
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(table_content)
+
+        total_metrics += len(metrics)
+        logger.info(
+            "Generated metrics table: %s (%d metrics)",
+            output_path.relative_to(ROOT_DIR),
+            len(metrics),
+        )
+
+    logger.info(
+        "Total metrics generated: %d across %d files",
+        total_metrics,
+        len(METRIC_SOURCE_FILES),
+    )
--- a/docs/mkdocs/hooks/remove_announcement.py
+++ b/docs/mkdocs/hooks/remove_announcement.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+from pathlib import Path
+from typing import Literal
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    # see https://docs.readthedocs.io/en/stable/reference/environment-variables.html # noqa
+    if os.getenv("READTHEDOCS_VERSION_TYPE") == "tag":
+        # remove the warning banner if the version is a tagged release
+        mkdocs_dir = Path(__file__).parent.parent
+        announcement_path = mkdocs_dir / "overrides/main.html"
+        # The file might be removed already if the build is triggered multiple
+        # times (readthedocs build both HTML and PDF versions separately)
+        if announcement_path.exists():
+            os.remove(announcement_path)
--- a/docs/mkdocs/hooks/url_schemes.py
+++ b/docs/mkdocs/hooks/url_schemes.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+MkDocs hook to enable the following links to render correctly:
+
+- Relative file links outside of the `docs/` directory, e.g.:
+    - [Text](../some_file.py)
+    - [Directory](../../some_directory/)
+- GitHub URLs for issues, pull requests, and projects, e.g.:
+    - Adds GitHub icon before links
+    - Replaces raw links with descriptive text,
+        e.g. <...pull/123> -> [Pull Request #123](.../pull/123)
+    - Works for external repos too by including the `owner/repo` in the link title
+
+The goal is to simplify cross-referencing common GitHub resources
+in project docs.
+"""
+
+from pathlib import Path
+
+import regex as re
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
+DOC_DIR = ROOT_DIR / "docs"
+
+
+gh_icon = ":octicons-mark-github-16:"
+
+# Regex pieces
+TITLE = r"(?P<title>[^\[\]<>]+?)"
+REPO = r"(?P<repo>.+?/.+?)"
+TYPE = r"(?P<type>issues|pull|projects)"
+NUMBER = r"(?P<number>\d+)"
+FRAGMENT = r"(?P<fragment>#[^\s]+)?"
+URL = f"https://github.com/{REPO}/{TYPE}/{NUMBER}{FRAGMENT}"
+RELATIVE = r"(?!(https?|ftp)://|#)(?P<path>[^\s]+?)"
+
+# Common titles to use for GitHub links when none is provided in the link.
+TITLES = {"issues": "Issue ", "pull": "Pull Request ", "projects": "Project "}
+
+# Regex to match GitHub issue, PR, and project links with optional titles.
+github_link = re.compile(rf"(\[{TITLE}\]\(|<){URL}(\)|>)")
+# Regex to match relative file links with optional titles.
+relative_link = re.compile(rf"\[{TITLE}\]\({RELATIVE}\)")
+
+
+def on_page_markdown(
+    markdown: str, *, page: Page, config: MkDocsConfig, files: Files
+) -> str:
+    def replace_relative_link(match: re.Match) -> str:
+        """Replace relative file links with URLs if they point outside the docs dir."""
+        title = match.group("title")
+        path = match.group("path")
+        path = (Path(page.file.abs_src_path).parent / path).resolve()
+
+        # Check if the path exists and is outside the docs dir
+        if not path.exists() or path.is_relative_to(DOC_DIR):
+            return match.group(0)
+
+        # Files and directories have different URL schemes on GitHub
+        slug = "tree/main" if path.is_dir() else "blob/main"
+
+        path = path.relative_to(ROOT_DIR)
+        url = f"https://github.com/vllm-project/vllm/{slug}/{path}"
+        return f"[{gh_icon} {title}]({url})"
+
+    def replace_github_link(match: re.Match) -> str:
+        """Replace GitHub issue, PR, and project links with enhanced Markdown links."""
+        repo = match.group("repo")
+        type = match.group("type")
+        number = match.group("number")
+        # Title and fragment could be None
+        title = match.group("title") or ""
+        fragment = match.group("fragment") or ""
+
+        # Use default titles for raw links
+        if not title:
+            title = TITLES[type]
+            if "vllm-project" not in repo:
+                title += repo
+            title += f"#{number}"
+
+        url = f"https://github.com/{repo}/{type}/{number}{fragment}"
+        return f"[{gh_icon} {title}]({url})"
+
+    markdown = relative_link.sub(replace_relative_link, markdown)
+    markdown = github_link.sub(replace_github_link, markdown)
+
+    if "interface" in str(page.file.abs_src_path):
+        print(markdown)
+
+    return markdown
--- a/docs/mkdocs/javascript/edit_and_feedback.js
+++ b/docs/mkdocs/javascript/edit_and_feedback.js
@@ -0,0 +1,47 @@
+/**
+ * edit_and_feedback.js
+ *
+ * Enhances MkDocs Material docs pages by:
+ *
+ * 1. Adding a "Question? Give us feedback" link
+ *    below the "Edit" button.
+ *
+ *    - The link opens a GitHub issue with a template,
+ *      auto-filled with the current page URL and path.
+ *
+ * 2. Ensuring the edit button opens in a new tab
+ *    with target="_blank" and rel="noopener".
+ */
+document.addEventListener("DOMContentLoaded", function () {
+  const url = window.location.href;
+  const page = document.body.dataset.mdUrl || location.pathname;
+
+  const feedbackLink = document.createElement("a");
+  feedbackLink.href = `https://github.com/vllm-project/vllm/issues/new?template=100-documentation.yml&title=${encodeURIComponent(
+    `[Docs] Feedback for \`${page}\``
+  )}&body=${encodeURIComponent(`📄 **Reference:**\n${url}\n\n📝 **Feedback:**\n_Your response_`)}`;
+  feedbackLink.target = "_blank";
+  feedbackLink.rel = "noopener";
+  feedbackLink.title = "Provide feedback";
+  feedbackLink.className = "md-content__button";
+  feedbackLink.innerHTML = `
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    height="24px"
+    viewBox="0 -960 960 960"
+    width="24px"
+    fill="currentColor"
+  >
+    <path d="M280-280h280v-80H280v80Zm0-160h400v-80H280v80Zm0-160h400v-80H280v80Zm-80 480q-33 0-56.5-23.5T120-200v-560q0-33 23.5-56.5T200-840h560q33 0 56.5 23.5T840-760v560q0 33-23.5 56.5T760-120H200Zm0-80h560v-560H200v560Zm0-560v560-560Z"/>
+  </svg>
+`;
+
+  const editButton = document.querySelector('.md-content__button[href*="edit"]');
+
+  if (editButton && editButton.parentNode) {
+    editButton.insertAdjacentElement("beforebegin", feedbackLink);
+
+    editButton.setAttribute("target", "_blank");
+    editButton.setAttribute("rel", "noopener");
+  }
+});
--- a/docs/mkdocs/javascript/mathjax.js
+++ b/docs/mkdocs/javascript/mathjax.js
@@ -0,0 +1,20 @@
+// Enables MathJax rendering
+window.MathJax = {
+  tex: {
+    inlineMath: [["\\(", "\\)"]],
+    displayMath: [["\\[", "\\]"]],
+    processEscapes: true,
+    processEnvironments: true
+  },
+  options: {
+    ignoreHtmlClass: ".*|",
+    processHtmlClass: "arithmatex"
+  }
+};
+
+document$.subscribe(() => { 
+  MathJax.startup.output.clearCache()
+  MathJax.typesetClear()
+  MathJax.texReset()
+  MathJax.typesetPromise()
+})
--- a/docs/mkdocs/javascript/run_llm_widget.js
+++ b/docs/mkdocs/javascript/run_llm_widget.js
@@ -0,0 +1,19 @@
+// Add RunLLM widget
+document.addEventListener("DOMContentLoaded", function () {
+    var script = document.createElement("script");
+    script.type = "module";
+    script.id = "runllm-widget-script"
+  
+    script.src = "https://widget.runllm.com";
+  
+    script.setAttribute("version", "stable");
+    script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget.
+    script.setAttribute("runllm-name", "vLLM");
+    script.setAttribute("runllm-position", "BOTTOM_RIGHT");
+    script.setAttribute("runllm-position-y", "120px");
+    script.setAttribute("runllm-position-x", "20px");
+    script.setAttribute("runllm-assistant-id", "207");
+  
+    script.async = true;
+    document.head.appendChild(script);
+  });
--- a/docs/mkdocs/javascript/slack_and_forum.js
+++ b/docs/mkdocs/javascript/slack_and_forum.js
@@ -0,0 +1,56 @@
+/**
+ * slack_and_forum.js
+ *
+ * Adds a custom Slack and Forum button to the MkDocs Material header.
+ *
+ */
+
+window.addEventListener('DOMContentLoaded', () => {
+  const headerInner = document.querySelector('.md-header__inner');
+
+  if (headerInner) {
+    const slackButton = document.createElement('button');
+    slackButton.className = 'slack-button';
+    slackButton.title = 'Join us on Slack';
+    slackButton.style.border = 'none';
+    slackButton.style.background = 'transparent';
+    slackButton.style.cursor = 'pointer';
+
+    slackButton.innerHTML = `
+      <img src="https://a.slack-edge.com/80588/marketing/img/icons/icon_slack_hash_colored.png" 
+           style="height: 1.1rem;" 
+           alt="Slack">
+    `;
+
+    slackButton.addEventListener('click', () => {
+      window.open('https://slack.vllm.ai', '_blank', 'noopener');
+    });
+
+    const forumButton = document.createElement('button');
+    forumButton.className = 'forum-button';
+    forumButton.title = 'Join the Forum';
+    forumButton.style.border = 'none';
+    forumButton.style.background = 'transparent';
+    forumButton.style.cursor = 'pointer';
+
+    forumButton.innerHTML = `
+      <svg
+        xmlns="http://www.w3.org/2000/svg"
+        viewBox="0 -960 960 960"
+        fill="currentColor"
+      >
+        <path d="M817.85-198.15 698.46-317.54H320q-24.48 0-41.47-16.99T261.54-376v-11.69h424.61q25.39 0 43.47-18.08 18.07-18.08 18.07-43.46v-268.92h11.69q24.48 0 41.47 16.99 17 16.99 17 41.47v461.54ZM179.08-434.69l66.84-66.85h363.31q10.77 0 17.69-6.92 6.93-6.92 6.93-17.69v-246.77q0-10.77-6.93-17.7-6.92-6.92-17.69-6.92H203.69q-10.77 0-17.69 6.92-6.92 6.93-6.92 17.7v338.23Zm-36.93 89.46v-427.69q0-25.39 18.08-43.46 18.08-18.08 43.46-18.08h405.54q25.39 0 43.46 18.08 18.08 18.07 18.08 43.46v246.77q0 25.38-18.08 43.46-18.07 18.07-43.46 18.07H261.54L142.15-345.23Zm36.93-180.92V-797.54v271.39Z"/>
+      </svg>
+    `;
+
+    forumButton.addEventListener('click', () => {
+      window.open('https://discuss.vllm.ai/', '_blank', 'noopener');
+    });
+
+    const githubSource = document.querySelector('.md-header__source');
+    if (githubSource) {
+      githubSource.parentNode.insertBefore(slackButton, githubSource.nextSibling);
+      githubSource.parentNode.insertBefore(forumButton, slackButton.nextSibling);
+    }
+  }
+});
--- a/docs/mkdocs/overrides/main.html
+++ b/docs/mkdocs/overrides/main.html
@@ -0,0 +1,5 @@
+{% extends "base.html" %}
+
+{% block announce %}
+  <p>You are viewing the latest developer preview docs. <a href="https://docs.vllm.ai/en/stable/">Click here</a> to view docs for the latest stable release.</p>
+{% endblock %}
--- a/docs/mkdocs/overrides/partials/toc-item.html
+++ b/docs/mkdocs/overrides/partials/toc-item.html
@@ -0,0 +1,21 @@
+<!-- Enables the use of toc_depth in document frontmatter https://github.com/squidfunk/mkdocs-material/issues/4827#issuecomment-1869812019 -->
+<li class="md-nav__item">
+    <a href="{{ toc_item.url }}" class="md-nav__link">
+      <span class="md-ellipsis">
+        {{ toc_item.title }}
+      </span>
+    </a>
+  
+    <!-- Table of contents list -->
+    {% if toc_item.children %}
+      <nav class="md-nav" aria-label="{{ toc_item.title | striptags }}">
+        <ul class="md-nav__list">
+          {% for toc_item in toc_item.children %}
+          {% if not page.meta.toc_depth or toc_item.level <= page.meta.toc_depth %}
+            {% include "partials/toc-item.html" %}
+          {% endif %}
+          {% endfor %}
+        </ul>
+      </nav>
+    {% endif %}
+  </li>
--- a/docs/mkdocs/stylesheets/extra.css
+++ b/docs/mkdocs/stylesheets/extra.css
@@ -0,0 +1,192 @@
+/* Warning for latest docs */
+.md-banner {
+    background-color: var(--md-warning-bg-color);
+    color: var(--md-warning-fg-color);
+}
+
+/* https://christianoliff.com/blog/styling-external-links-with-an-icon-in-css/ */
+a:not(:has(svg)):not(.md-icon):not(.autorefs-external) {
+    align-items: center;
+
+    &[href^="//"]::after,
+    &[href^="http://"]::after,
+    &[href^="https://"]::after {
+        content: "";
+        width: 12px;
+        height: 12px;
+        margin-left: 4px;
+        background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' stroke='gray' viewBox='0 0 16 16'%3E%3Cpath fill-rule='evenodd' d='M8.636 3.5a.5.5 0 0 0-.5-.5H1.5A1.5 1.5 0 0 0 0 4.5v10A1.5 1.5 0 0 0 1.5 16h10a1.5 1.5 0 0 0 1.5-1.5V7.864a.5.5 0 0 0-1 0V14.5a.5.5 0 0 1-.5.5h-10a.5.5 0 0 1-.5-.5v-10a.5.5 0 0 1 .5-.5h6.636a.5.5 0 0 0 .5-.5z'/%3E%3Cpath fill-rule='evenodd' d='M16 .5a.5.5 0 0 0-.5-.5h-5a.5.5 0 0 0 0 1h3.793L6.146 9.146a.5.5 0 1 0 .708.708L15 1.707V5.5a.5.5 0 0 0 1 0v-5z'/%3E%3C/svg%3E");
+        background-position: center;
+        background-repeat: no-repeat;
+        background-size: contain;
+        display: inline-block;
+    }
+}
+
+a[href*="localhost"]::after,
+a[href*="127.0.0.1"]::after,
+a[href*="org.readthedocs.build"]::after,
+a[href*="docs.vllm.ai"]::after {
+    display: none !important;
+}
+
+/* Light mode: darker section titles */
+body[data-md-color-scheme="default"] .md-nav__item--section > label.md-nav__link .md-ellipsis {
+  color: rgba(0, 0, 0, 0.7) !important;
+  font-weight: 700;
+}
+
+/* Dark mode: lighter gray section titles */
+body[data-md-color-scheme="slate"] .md-nav__item--section > label.md-nav__link .md-ellipsis {
+  color: rgba(255, 255, 255, 0.75) !important;
+  font-weight: 700;
+}
+
+/* Custom admonitions */
+:root {
+  --md-admonition-icon--announcement: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M3.25 9a.75.75 0 0 1 .75.75c0 2.142.456 3.828.733 4.653a.122.122 0 0 0 .05.064.212.212 0 0 0 .117.033h1.31c.085 0 .18-.042.258-.152a.45.45 0 0 0 .075-.366A16.743 16.743 0 0 1 6 9.75a.75.75 0 0 1 1.5 0c0 1.588.25 2.926.494 3.85.293 1.113-.504 2.4-1.783 2.4H4.9c-.686 0-1.35-.41-1.589-1.12A16.4 16.4 0 0 1 2.5 9.75.75.75 0 0 1 3.25 9Z"></path><path d="M0 6a4 4 0 0 1 4-4h2.75a.75.75 0 0 1 .75.75v6.5a.75.75 0 0 1-.75.75H4a4 4 0 0 1-4-4Zm4-2.5a2.5 2.5 0 1 0 0 5h2v-5Z"></path><path d="M15.59.082A.75.75 0 0 1 16 .75v10.5a.75.75 0 0 1-1.189.608l-.002-.001h.001l-.014-.01a5.775 5.775 0 0 0-.422-.25 10.63 10.63 0 0 0-1.469-.64C11.576 10.484 9.536 10 6.75 10a.75.75 0 0 1 0-1.5c2.964 0 5.174.516 6.658 1.043.423.151.787.302 1.092.443V2.014c-.305.14-.669.292-1.092.443C11.924 2.984 9.713 3.5 6.75 3.5a.75.75 0 0 1 0-1.5c2.786 0 4.826-.484 6.155-.957.665-.236 1.154-.47 1.47-.64.144-.077.284-.161.421-.25l.014-.01a.75.75 0 0 1 .78-.061Z"></path></svg>');
+  --md-admonition-icon--important: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M4.47.22A.749.749 0 0 1 5 0h6c.199 0 .389.079.53.22l4.25 4.25c.141.14.22.331.22.53v6a.749.749 0 0 1-.22.53l-4.25 4.25A.749.749 0 0 1 11 16H5a.749.749 0 0 1-.53-.22L.22 11.53A.749.749 0 0 1 0 11V5c0-.199.079-.389.22-.53Zm.84 1.28L1.5 5.31v5.38l3.81 3.81h5.38l3.81-3.81V5.31L10.69 1.5ZM8 4a.75.75 0 0 1 .75.75v3.5a.75.75 0 0 1-1.5 0v-3.5A.75.75 0 0 1 8 4Zm0 8a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path></svg>');
+  --md-admonition-icon--code: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215m-6.56 0a.75.75 0 0 1 1.042.018.75.75 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"/></svg>');
+  --md-admonition-icon--console: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path d="M0 2.75C0 1.784.784 1 1.75 1h12.5c.966 0 1.75.784 1.75 1.75v10.5A1.75 1.75 0 0 1 14.25 15H1.75A1.75 1.75 0 0 1 0 13.25Zm1.75-.25a.25.25 0 0 0-.25.25v10.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V2.75a.25.25 0 0 0-.25-.25ZM7.25 8a.75.75 0 0 1-.22.53l-2.25 2.25a.749.749 0 0 1-1.275-.326.75.75 0 0 1 .215-.734L5.44 8 3.72 6.28a.749.749 0 0 1 .326-1.275.75.75 0 0 1 .734.215l2.25 2.25c.141.14.22.331.22.53m1.5 1.5h3a.75.75 0 0 1 0 1.5h-3a.75.75 0 0 1 0-1.5"/></svg>');
+}
+
+.md-typeset .admonition.announcement,
+.md-typeset details.announcement {
+  border-color: rgb(255, 110, 66);
+}
+.md-typeset .admonition.important,
+.md-typeset details.important {
+  border-color: rgb(239, 85, 82);
+}
+.md-typeset .admonition.code,
+.md-typeset details.code {
+  border-color: #64dd17
+}
+.md-typeset .admonition.console,
+.md-typeset details.console {
+  border-color: #64dd17
+}
+
+.md-typeset .announcement > .admonition-title,
+.md-typeset .announcement > summary {
+  background-color: rgb(255, 110, 66, 0.1);
+}
+.md-typeset .important > .admonition-title,
+.md-typeset .important > summary {
+  background-color: rgb(239, 85, 82, 0.1);
+}
+.md-typeset .code > .admonition-title,
+.md-typeset .code > summary {
+  background-color: #64dd171a;
+}
+.md-typeset .console > .admonition-title,
+.md-typeset .console > summary {
+  background-color: #64dd171a;
+}
+
+.md-typeset .announcement > .admonition-title::before,
+.md-typeset .announcement > summary::before {
+  background-color: rgb(239, 85, 82);
+  -webkit-mask-image: var(--md-admonition-icon--announcement);
+          mask-image: var(--md-admonition-icon--announcement);
+}
+.md-typeset .important > .admonition-title::before,
+.md-typeset .important > summary::before {
+  background-color: rgb(239, 85, 82);
+  -webkit-mask-image: var(--md-admonition-icon--important);
+          mask-image: var(--md-admonition-icon--important);
+}
+.md-typeset .code > .admonition-title::before,
+.md-typeset .code > summary::before {
+  background-color: #64dd17;
+  -webkit-mask-image: var(--md-admonition-icon--code);
+          mask-image: var(--md-admonition-icon--code);
+}
+.md-typeset .console > .admonition-title::before,
+.md-typeset .console > summary::before {
+  background-color: #64dd17;
+  -webkit-mask-image: var(--md-admonition-icon--console);
+          mask-image: var(--md-admonition-icon--console);
+}
+
+/* Make label fully visible on hover */
+.md-content__button[href*="edit"]:hover::after {
+  opacity: 1;
+}
+
+/* Hide edit button on generated docs/examples pages */
+@media (min-width: 960px) {
+  .md-content__button[href*="docs/examples/"] {
+    display: none !important;
+  }
+}
+
+.md-content__button-wrapper {
+  position: absolute;
+  top: 0.6rem;
+  right: 0.8rem;
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  gap: 0.4rem;
+  z-index: 1;
+}
+
+.md-content__button-wrapper a {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  height: 24px;
+  width: 24px;
+  color: var(--md-default-fg-color);
+  text-decoration: none;
+}
+
+.md-content__button-wrapper a:hover {
+  color: var(--md-accent-fg-color);
+}
+
+/* Slack and Forum css */
+.slack-button, 
+.forum-button {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  margin-left: 0.4rem;
+  height: 24px;
+}
+
+.slack-button img {
+  height: 18px;
+  filter: none !important;
+}
+
+.slack-button:hover,
+.forum-button:hover {
+  opacity: 0.7;
+}
+
+.forum-button svg {
+  height: 28px;
+  opacity: 0.9;
+  transform: translateY(2px);
+}
+
+/* For logo css */
+[data-md-color-scheme="default"] .logo-dark {
+  display: none;
+}
+
+[data-md-color-scheme="slate"] .logo-light {
+  display: none;
+}
+
+/* Outline for content tabs */
+.md-typeset .tabbed-set {
+  border: 0.075rem solid var(--md-default-fg-color);
+  border-radius: 0.2rem;
+}
+
+.md-typeset .tabbed-content {
+  padding: 0 0.6em;
+}