docs: init readthedocs support (#783)

2024-07-28 16:50:31 +10:00
parent 68e5262699
commit 948625799e
16 changed files with 246 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ curl http://localhost:30000/generate \
    }
  }'
 ```
-Learn more about the argument format [here](docs/sampling_params.md).
+Learn more about the argument format [here](docs/en/sampling_params.md).
 ### OpenAI Compatible API
 In addition, the server supports OpenAI-compatible APIs.
@@ -143,7 +143,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ```
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
 ```
- See [hyperparameter_tuning.md](docs/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
+- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
 - Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
 ```
 # Node 0
@@ -152,7 +152,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 # Node 1
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
 ```
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/custom_chat_template.md).
+- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
 - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
 - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
@@ -195,7 +195,7 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
 - InternLM 2
 - Mistral NeMo
-Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
+Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
 ### Benchmark Performance
--- a/docs/en/.readthedocs.yaml
+++ b/docs/en/.readthedocs.yaml
@@ -0,0 +1,17 @@
 version: 2
 formats: all
 build:
  os: "ubuntu-22.04"
  tools:
    python: "3.12"
 sphinx:
  configuration: docs/en/conf.py
 python:
  install:
    - requirements: docs/requirements.txt
--- a/docs/en/Makefile
+++ b/docs/en/Makefile
@@ -0,0 +1,12 @@
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 .PHONY: help Makefile
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/en/_static/css/readthedocs.css
+++ b/docs/en/_static/css/readthedocs.css
@@ -0,0 +1,9 @@
 table.autosummary td {
  width: 50%
 }
 img.align-center {
  display: block;
  margin-left: auto;
  margin-right: auto;
 }
--- a/docs/en/benchmark_results.md
+++ b/docs/en/benchmark_results.md
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
@@ -0,0 +1,125 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath("../.."))
 version_file = "../../python/sglang/version.py"
 with open(version_file, "r") as f:
    exec(compile(f.read(), version_file, "exec"))
 __version__ = locals()["__version__"]
 project = "SGLang"
 copyright = "2023-2024, SGLang"
 author = "SGLang Team"
 version = __version__
 release = __version__
 extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.autosummary",
    "sphinx.ext.napoleon",
    "sphinx.ext.viewcode",
    "sphinx.ext.autosectionlabel",
    "sphinx.ext.intersphinx",
    "sphinx_tabs.tabs",
    "myst_parser",
    "sphinx_copybutton",
    "sphinxcontrib.mermaid",
 ]
 autosectionlabel_prefix_document = True
 templates_path = ["_templates"]
 source_suffix = {
    ".rst": "restructuredtext",
    ".md": "markdown",
 }
 master_doc = "index"
 language = "en"
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 pygments_style = "sphinx"
 html_theme = "sphinx_book_theme"
 html_logo = "_static/image/logo.png"
 html_title = project
 html_copy_source = True
 html_last_updated_fmt = ""
 html_theme_options = {
    "path_to_docs": "docs/en",
    "repository_url": "https://github.com/sgl-project/sglang",
    "repository_branch": "main",
    "show_navbar_depth": 3,
    "max_navbar_depth": 4,
    "collapse_navbar": True,
    "use_edit_page_button": True,
    "use_source_button": True,
    "use_issues_button": True,
    "use_repository_button": True,
    "use_download_button": True,
    "use_sidenotes": True,
    "show_toc_level": 2,
 }
 html_static_path = ["_static"]
 html_css_files = ["css/readthedocs.css"]
 myst_enable_extensions = [
    "dollarmath",
    "amsmath",
    "deflist",
    "colon_fence",
 ]
 myst_heading_anchors = 5
 htmlhelp_basename = "sglangdoc"
 latex_elements = {}
 latex_documents = [
    (master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
 ]
 man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
 texinfo_documents = [
    (
        master_doc,
        "sglang",
        "sglang Documentation",
        author,
        "sglang",
        "One line description of project.",
        "Miscellaneous",
    ),
 ]
 epub_title = project
 epub_exclude_files = ["search.html"]
 copybutton_prompt_text = r">>> |\.\.\. "
 copybutton_prompt_is_regexp = True
 autodoc_preserve_defaults = True
 navigation_with_keys = False
 autodoc_mock_imports = [
    "torch",
    "transformers",
    "triton",
 ]
 intersphinx_mapping = {
    "python": ("https://docs.python.org/3.12", None),
    "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
    "pillow": ("https://pillow.readthedocs.io/en/stable", None),
    "numpy": ("https://numpy.org/doc/stable", None),
    "torch": ("https://pytorch.org/docs/stable", None),
 }
--- a/docs/en/custom_chat_template.md
+++ b/docs/en/custom_chat_template.md
--- a/docs/en/hyperparameter_tuning.md
+++ b/docs/en/hyperparameter_tuning.md
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -0,0 +1,65 @@
 Welcome to SGLang's tutorials!
 ====================================
 .. figure:: ./_static/image/logo.png
  :width: 50%
  :align: center
  :alt: SGLang
  :class: no-scaled-link
 .. raw:: html
   <p style="text-align:center">
   <strong>SGLang is yet another fast serving framework for large language models and vision language models.
   </strong>
   </p>
   <p style="text-align:center">
   <script async defer src="https://buttons.github.io/buttons.js"></script>
   <a class="github-button" href="https://github.com/sgl-project/sglang" data-show-count="true" data-size="large" aria-label="Star">Star</a>
   <a class="github-button" href="https://github.com/sgl-project/sglang/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
   <a class="github-button" href="https://github.com/sgl-project/sglang/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
   </p>
 SGLang has the following core features:
 * **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, flashinfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin).
 * **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions.
 Documentation
 -------------
 .. _hyperparameter_tuning:
 .. toctree::
   :maxdepth: 1
   :caption: Hyperparameter Tuning
   hyperparameter_tuning.md
 .. _custom_chat_template:
 .. toctree::
   :maxdepth: 1
   :caption: Custom Chat Template
   custom_chat_template.md
 .. _model_support:
 .. toctree::
   :maxdepth: 1
   :caption: Model Support
   model_support.md
 .. _sampling_params:
 .. toctree::
   :maxdepth: 1
   :caption: Sampling Params
   sampling_params.md
 Indices and tables
 ==================
 * :ref:`genindex`
 * :ref:`search`
--- a/docs/en/model_support.md
+++ b/docs/en/model_support.md
--- a/docs/en/release_process.md
+++ b/docs/en/release_process.md
--- a/docs/en/sampling_params.md
+++ b/docs/en/sampling_params.md
--- a/docs/en/test_process.md
+++ b/docs/en/test_process.md
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,12 @@
 markdown>=3.4.0
 myst-parser
 sphinx
 sphinx-book-theme
 sphinx-copybutton
 sphinx-tabs
 sphinxcontrib-mermaid
 pillow
 pydantic
 torch
 transformers
 urllib3<2.0.0
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -75,7 +75,7 @@ def gen(
    choices: Optional[List[str]] = None,
    regex: Optional[str] = None,
 ):
-    """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+    """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
    if choices:
        return SglSelect(name, choices, 0.0 if temperature is None else temperature)
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -410,7 +410,7 @@ class SglGen(SglExpr):
        dtype: Optional[type] = None,
        regex: Optional[str] = None,
    ):
-        """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+        """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
        super().__init__()
        self.name = name
        self.sampling_params = SglSamplingParams(