docs: init readthedocs support (#783)
This commit is contained in:
@@ -96,7 +96,7 @@ curl http://localhost:30000/generate \
|
||||
}
|
||||
}'
|
||||
```
|
||||
Learn more about the argument format [here](docs/sampling_params.md).
|
||||
Learn more about the argument format [here](docs/en/sampling_params.md).
|
||||
|
||||
### OpenAI Compatible API
|
||||
In addition, the server supports OpenAI-compatible APIs.
|
||||
@@ -143,7 +143,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
||||
```
|
||||
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
|
||||
```
|
||||
- See [hyperparameter_tuning.md](docs/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
||||
- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
||||
- Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
|
||||
```
|
||||
# Node 0
|
||||
@@ -152,7 +152,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
||||
# Node 1
|
||||
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
|
||||
```
|
||||
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/custom_chat_template.md).
|
||||
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
|
||||
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
||||
- To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
|
||||
|
||||
@@ -195,7 +195,7 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
|
||||
- InternLM 2
|
||||
- Mistral NeMo
|
||||
|
||||
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
|
||||
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
|
||||
|
||||
### Benchmark Performance
|
||||
|
||||
|
||||
17
docs/en/.readthedocs.yaml
Normal file
17
docs/en/.readthedocs.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
version: 2
|
||||
|
||||
formats: all
|
||||
|
||||
build:
|
||||
os: "ubuntu-22.04"
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
|
||||
sphinx:
|
||||
configuration: docs/en/conf.py
|
||||
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
12
docs/en/Makefile
Normal file
12
docs/en/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
9
docs/en/_static/css/readthedocs.css
Normal file
9
docs/en/_static/css/readthedocs.css
Normal file
@@ -0,0 +1,9 @@
|
||||
table.autosummary td {
|
||||
width: 50%
|
||||
}
|
||||
|
||||
img.align-center {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
125
docs/en/conf.py
Normal file
125
docs/en/conf.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
version_file = "../../python/sglang/version.py"
|
||||
with open(version_file, "r") as f:
|
||||
exec(compile(f.read(), version_file, "exec"))
|
||||
__version__ = locals()["__version__"]
|
||||
|
||||
project = "SGLang"
|
||||
copyright = "2023-2024, SGLang"
|
||||
author = "SGLang Team"
|
||||
|
||||
version = __version__
|
||||
release = __version__
|
||||
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.autosectionlabel",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx_tabs.tabs",
|
||||
"myst_parser",
|
||||
"sphinx_copybutton",
|
||||
"sphinxcontrib.mermaid",
|
||||
]
|
||||
|
||||
autosectionlabel_prefix_document = True
|
||||
|
||||
templates_path = ["_templates"]
|
||||
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
".md": "markdown",
|
||||
}
|
||||
|
||||
master_doc = "index"
|
||||
|
||||
language = "en"
|
||||
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
|
||||
pygments_style = "sphinx"
|
||||
|
||||
html_theme = "sphinx_book_theme"
|
||||
html_logo = "_static/image/logo.png"
|
||||
html_title = project
|
||||
html_copy_source = True
|
||||
html_last_updated_fmt = ""
|
||||
|
||||
html_theme_options = {
|
||||
"path_to_docs": "docs/en",
|
||||
"repository_url": "https://github.com/sgl-project/sglang",
|
||||
"repository_branch": "main",
|
||||
"show_navbar_depth": 3,
|
||||
"max_navbar_depth": 4,
|
||||
"collapse_navbar": True,
|
||||
"use_edit_page_button": True,
|
||||
"use_source_button": True,
|
||||
"use_issues_button": True,
|
||||
"use_repository_button": True,
|
||||
"use_download_button": True,
|
||||
"use_sidenotes": True,
|
||||
"show_toc_level": 2,
|
||||
}
|
||||
|
||||
html_static_path = ["_static"]
|
||||
html_css_files = ["css/readthedocs.css"]
|
||||
|
||||
myst_enable_extensions = [
|
||||
"dollarmath",
|
||||
"amsmath",
|
||||
"deflist",
|
||||
"colon_fence",
|
||||
]
|
||||
myst_heading_anchors = 5
|
||||
|
||||
htmlhelp_basename = "sglangdoc"
|
||||
|
||||
latex_elements = {}
|
||||
|
||||
latex_documents = [
|
||||
(master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
|
||||
]
|
||||
|
||||
man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
|
||||
|
||||
texinfo_documents = [
|
||||
(
|
||||
master_doc,
|
||||
"sglang",
|
||||
"sglang Documentation",
|
||||
author,
|
||||
"sglang",
|
||||
"One line description of project.",
|
||||
"Miscellaneous",
|
||||
),
|
||||
]
|
||||
|
||||
epub_title = project
|
||||
|
||||
epub_exclude_files = ["search.html"]
|
||||
|
||||
copybutton_prompt_text = r">>> |\.\.\. "
|
||||
copybutton_prompt_is_regexp = True
|
||||
|
||||
autodoc_preserve_defaults = True
|
||||
navigation_with_keys = False
|
||||
|
||||
autodoc_mock_imports = [
|
||||
"torch",
|
||||
"transformers",
|
||||
"triton",
|
||||
]
|
||||
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3.12", None),
|
||||
"typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
|
||||
"pillow": ("https://pillow.readthedocs.io/en/stable", None),
|
||||
"numpy": ("https://numpy.org/doc/stable", None),
|
||||
"torch": ("https://pytorch.org/docs/stable", None),
|
||||
}
|
||||
65
docs/en/index.rst
Normal file
65
docs/en/index.rst
Normal file
@@ -0,0 +1,65 @@
|
||||
Welcome to SGLang's tutorials!
|
||||
====================================
|
||||
|
||||
.. figure:: ./_static/image/logo.png
|
||||
:width: 50%
|
||||
:align: center
|
||||
:alt: SGLang
|
||||
:class: no-scaled-link
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<p style="text-align:center">
|
||||
<strong>SGLang is yet another fast serving framework for large language models and vision language models.
|
||||
</strong>
|
||||
</p>
|
||||
|
||||
<p style="text-align:center">
|
||||
<script async defer src="https://buttons.github.io/buttons.js"></script>
|
||||
<a class="github-button" href="https://github.com/sgl-project/sglang" data-show-count="true" data-size="large" aria-label="Star">Star</a>
|
||||
<a class="github-button" href="https://github.com/sgl-project/sglang/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
|
||||
<a class="github-button" href="https://github.com/sgl-project/sglang/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
|
||||
</p>
|
||||
|
||||
SGLang has the following core features:
|
||||
|
||||
* **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, flashinfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin).
|
||||
|
||||
* **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions.
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
.. _hyperparameter_tuning:
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Hyperparameter Tuning
|
||||
|
||||
hyperparameter_tuning.md
|
||||
|
||||
.. _custom_chat_template:
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Custom Chat Template
|
||||
|
||||
custom_chat_template.md
|
||||
|
||||
.. _model_support:
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Model Support
|
||||
|
||||
model_support.md
|
||||
|
||||
.. _sampling_params:
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Sampling Params
|
||||
|
||||
sampling_params.md
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`search`
|
||||
12
docs/requirements.txt
Normal file
12
docs/requirements.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
markdown>=3.4.0
|
||||
myst-parser
|
||||
sphinx
|
||||
sphinx-book-theme
|
||||
sphinx-copybutton
|
||||
sphinx-tabs
|
||||
sphinxcontrib-mermaid
|
||||
pillow
|
||||
pydantic
|
||||
torch
|
||||
transformers
|
||||
urllib3<2.0.0
|
||||
@@ -75,7 +75,7 @@ def gen(
|
||||
choices: Optional[List[str]] = None,
|
||||
regex: Optional[str] = None,
|
||||
):
|
||||
"""Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
|
||||
"""Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
|
||||
|
||||
if choices:
|
||||
return SglSelect(name, choices, 0.0 if temperature is None else temperature)
|
||||
|
||||
@@ -410,7 +410,7 @@ class SglGen(SglExpr):
|
||||
dtype: Optional[type] = None,
|
||||
regex: Optional[str] = None,
|
||||
):
|
||||
"""Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
|
||||
"""Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.sampling_params = SglSamplingParams(
|
||||
|
||||
Reference in New Issue
Block a user