docs: init readthedocs support (#783)

2024-07-28 16:50:31 +10:00
parent 68e5262699
commit 948625799e
16 changed files with 246 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ curl http://localhost:30000/generate \
    }
  }'
 ```
-Learn more about the argument format [here](docs/sampling_params.md).
+Learn more about the argument format [here](docs/en/sampling_params.md).

 ### OpenAI Compatible API
 In addition, the server supports OpenAI-compatible APIs.
@@ -143,7 +143,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ```
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
 ```
- See [hyperparameter_tuning.md](docs/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
+- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
 - Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
 ```
 # Node 0
@@ -152,7 +152,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 # Node 1
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
 ```
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/custom_chat_template.md).
+- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
 - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
 - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.

@@ -195,7 +195,7 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
 - InternLM 2
 - Mistral NeMo

-Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
+Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).

 ### Benchmark Performance

--- a/docs/en/.readthedocs.yaml
+++ b/docs/en/.readthedocs.yaml
@@ -0,0 +1,17 @@
+version: 2
+
+formats: all
+
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.12"
+
+
+sphinx:
+  configuration: docs/en/conf.py
+
+
+python:
+  install:
+    - requirements: docs/requirements.txt
--- a/docs/en/Makefile
+++ b/docs/en/Makefile
@@ -0,0 +1,12 @@
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/en/_static/css/readthedocs.css
+++ b/docs/en/_static/css/readthedocs.css
@@ -0,0 +1,9 @@
+table.autosummary td {
+  width: 50%
+}
+
+img.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
--- a/docs/en/benchmark_results.md
+++ b/docs/en/benchmark_results.md
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
@@ -0,0 +1,125 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+version_file = "../../python/sglang/version.py"
+with open(version_file, "r") as f:
+    exec(compile(f.read(), version_file, "exec"))
+__version__ = locals()["__version__"]
+
+project = "SGLang"
+copyright = "2023-2024, SGLang"
+author = "SGLang Team"
+
+version = __version__
+release = __version__
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.intersphinx",
+    "sphinx_tabs.tabs",
+    "myst_parser",
+    "sphinx_copybutton",
+    "sphinxcontrib.mermaid",
+]
+
+autosectionlabel_prefix_document = True
+
+templates_path = ["_templates"]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+
+master_doc = "index"
+
+language = "en"
+
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+pygments_style = "sphinx"
+
+html_theme = "sphinx_book_theme"
+html_logo = "_static/image/logo.png"
+html_title = project
+html_copy_source = True
+html_last_updated_fmt = ""
+
+html_theme_options = {
+    "path_to_docs": "docs/en",
+    "repository_url": "https://github.com/sgl-project/sglang",
+    "repository_branch": "main",
+    "show_navbar_depth": 3,
+    "max_navbar_depth": 4,
+    "collapse_navbar": True,
+    "use_edit_page_button": True,
+    "use_source_button": True,
+    "use_issues_button": True,
+    "use_repository_button": True,
+    "use_download_button": True,
+    "use_sidenotes": True,
+    "show_toc_level": 2,
+}
+
+html_static_path = ["_static"]
+html_css_files = ["css/readthedocs.css"]
+
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+    "deflist",
+    "colon_fence",
+]
+myst_heading_anchors = 5
+
+htmlhelp_basename = "sglangdoc"
+
+latex_elements = {}
+
+latex_documents = [
+    (master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
+]
+
+man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
+
+texinfo_documents = [
+    (
+        master_doc,
+        "sglang",
+        "sglang Documentation",
+        author,
+        "sglang",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
+
+epub_title = project
+
+epub_exclude_files = ["search.html"]
+
+copybutton_prompt_text = r">>> |\.\.\. "
+copybutton_prompt_is_regexp = True
+
+autodoc_preserve_defaults = True
+navigation_with_keys = False
+
+autodoc_mock_imports = [
+    "torch",
+    "transformers",
+    "triton",
+]
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3.12", None),
+    "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
+    "pillow": ("https://pillow.readthedocs.io/en/stable", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "torch": ("https://pytorch.org/docs/stable", None),
+}
--- a/docs/en/custom_chat_template.md
+++ b/docs/en/custom_chat_template.md
--- a/docs/en/hyperparameter_tuning.md
+++ b/docs/en/hyperparameter_tuning.md
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -0,0 +1,65 @@
+Welcome to SGLang's tutorials!
+====================================
+
+.. figure:: ./_static/image/logo.png
+  :width: 50%
+  :align: center
+  :alt: SGLang
+  :class: no-scaled-link
+
+.. raw:: html
+
+   <p style="text-align:center">
+   <strong>SGLang is yet another fast serving framework for large language models and vision language models.
+   </strong>
+   </p>
+
+   <p style="text-align:center">
+   <script async defer src="https://buttons.github.io/buttons.js"></script>
+   <a class="github-button" href="https://github.com/sgl-project/sglang" data-show-count="true" data-size="large" aria-label="Star">Star</a>
+   <a class="github-button" href="https://github.com/sgl-project/sglang/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
+   <a class="github-button" href="https://github.com/sgl-project/sglang/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
+   </p>
+
+SGLang has the following core features:
+
+* **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, flashinfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin).
+
+* **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions.
+
+Documentation
+-------------
+
+.. _hyperparameter_tuning:
+.. toctree::
+   :maxdepth: 1
+   :caption: Hyperparameter Tuning
+
+   hyperparameter_tuning.md
+
+.. _custom_chat_template:
+.. toctree::
+   :maxdepth: 1
+   :caption: Custom Chat Template
+
+   custom_chat_template.md
+
+.. _model_support:
+.. toctree::
+   :maxdepth: 1
+   :caption: Model Support
+
+   model_support.md
+
+.. _sampling_params:
+.. toctree::
+   :maxdepth: 1
+   :caption: Sampling Params
+
+   sampling_params.md
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
--- a/docs/en/model_support.md
+++ b/docs/en/model_support.md
--- a/docs/en/release_process.md
+++ b/docs/en/release_process.md
--- a/docs/en/sampling_params.md
+++ b/docs/en/sampling_params.md
--- a/docs/en/test_process.md
+++ b/docs/en/test_process.md
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,12 @@
+markdown>=3.4.0
+myst-parser
+sphinx
+sphinx-book-theme
+sphinx-copybutton
+sphinx-tabs
+sphinxcontrib-mermaid
+pillow
+pydantic
+torch
+transformers
+urllib3<2.0.0
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -75,7 +75,7 @@ def gen(
    choices: Optional[List[str]] = None,
    regex: Optional[str] = None,
 ):
-    """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+    """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""

    if choices:
        return SglSelect(name, choices, 0.0 if temperature is None else temperature)
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -410,7 +410,7 @@ class SglGen(SglExpr):
        dtype: Optional[type] = None,
        regex: Optional[str] = None,
    ):
-        """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+        """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
        super().__init__()
        self.name = name
        self.sampling_params = SglSamplingParams(