diff --git a/README.md b/README.md
index 4715852fe..477392350 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ curl http://localhost:30000/generate \
     }
   }'
 ```
-Learn more about the argument format [here](docs/sampling_params.md).
+Learn more about the argument format [here](docs/en/sampling_params.md).
 
 ### OpenAI Compatible API
 In addition, the server supports OpenAI-compatible APIs.
@@ -143,7 +143,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ```
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
 ```
-- See [hyperparameter_tuning.md](docs/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
+- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
 - Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
 ```
 # Node 0
@@ -152,7 +152,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 # Node 1
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
 ```
-- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/custom_chat_template.md).
+- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
 - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
 - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
 
@@ -195,7 +195,7 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
 - InternLM 2
 - Mistral NeMo
 
-Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
+Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
 
 ### Benchmark Performance
 
diff --git a/docs/en/.readthedocs.yaml b/docs/en/.readthedocs.yaml
new file mode 100644
index 000000000..94f52e9a0
--- /dev/null
+++ b/docs/en/.readthedocs.yaml
@@ -0,0 +1,17 @@
+version: 2
+
+formats: all
+
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.12"
+
+
+sphinx:
+  configuration: docs/en/conf.py
+
+
+python:
+  install:
+    - requirements: docs/requirements.txt
diff --git a/docs/en/Makefile b/docs/en/Makefile
new file mode 100644
index 000000000..9ad4b38e0
--- /dev/null
+++ b/docs/en/Makefile
@@ -0,0 +1,12 @@
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/en/_static/css/readthedocs.css b/docs/en/_static/css/readthedocs.css
new file mode 100644
index 000000000..aca6649b4
--- /dev/null
+++ b/docs/en/_static/css/readthedocs.css
@@ -0,0 +1,9 @@
+table.autosummary td {
+  width: 50%
+}
+
+img.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
diff --git a/docs/benchmark_results.md b/docs/en/benchmark_results.md
similarity index 100%
rename from docs/benchmark_results.md
rename to docs/en/benchmark_results.md
diff --git a/docs/en/conf.py b/docs/en/conf.py
new file mode 100644
index 000000000..5a7ed2dbf
--- /dev/null
+++ b/docs/en/conf.py
@@ -0,0 +1,125 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+version_file = "../../python/sglang/version.py"
+with open(version_file, "r") as f:
+    exec(compile(f.read(), version_file, "exec"))
+__version__ = locals()["__version__"]
+
+project = "SGLang"
+copyright = "2023-2024, SGLang"
+author = "SGLang Team"
+
+version = __version__
+release = __version__
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.intersphinx",
+    "sphinx_tabs.tabs",
+    "myst_parser",
+    "sphinx_copybutton",
+    "sphinxcontrib.mermaid",
+]
+
+autosectionlabel_prefix_document = True
+
+templates_path = ["_templates"]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+
+master_doc = "index"
+
+language = "en"
+
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+pygments_style = "sphinx"
+
+html_theme = "sphinx_book_theme"
+html_logo = "_static/image/logo.png"
+html_title = project
+html_copy_source = True
+html_last_updated_fmt = ""
+
+html_theme_options = {
+    "path_to_docs": "docs/en",
+    "repository_url": "https://github.com/sgl-project/sglang",
+    "repository_branch": "main",
+    "show_navbar_depth": 3,
+    "max_navbar_depth": 4,
+    "collapse_navbar": True,
+    "use_edit_page_button": True,
+    "use_source_button": True,
+    "use_issues_button": True,
+    "use_repository_button": True,
+    "use_download_button": True,
+    "use_sidenotes": True,
+    "show_toc_level": 2,
+}
+
+html_static_path = ["_static"]
+html_css_files = ["css/readthedocs.css"]
+
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+    "deflist",
+    "colon_fence",
+]
+myst_heading_anchors = 5
+
+htmlhelp_basename = "sglangdoc"
+
+latex_elements = {}
+
+latex_documents = [
+    (master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
+]
+
+man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
+
+texinfo_documents = [
+    (
+        master_doc,
+        "sglang",
+        "sglang Documentation",
+        author,
+        "sglang",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
+
+epub_title = project
+
+epub_exclude_files = ["search.html"]
+
+copybutton_prompt_text = r">>> |\.\.\. "
+copybutton_prompt_is_regexp = True
+
+autodoc_preserve_defaults = True
+navigation_with_keys = False
+
+autodoc_mock_imports = [
+    "torch",
+    "transformers",
+    "triton",
+]
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3.12", None),
+    "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
+    "pillow": ("https://pillow.readthedocs.io/en/stable", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "torch": ("https://pytorch.org/docs/stable", None),
+}
diff --git a/docs/custom_chat_template.md b/docs/en/custom_chat_template.md
similarity index 100%
rename from docs/custom_chat_template.md
rename to docs/en/custom_chat_template.md
diff --git a/docs/hyperparameter_tuning.md b/docs/en/hyperparameter_tuning.md
similarity index 100%
rename from docs/hyperparameter_tuning.md
rename to docs/en/hyperparameter_tuning.md
diff --git a/docs/en/index.rst b/docs/en/index.rst
new file mode 100644
index 000000000..4621b838b
--- /dev/null
+++ b/docs/en/index.rst
@@ -0,0 +1,65 @@
+Welcome to SGLang's tutorials!
+====================================
+
+.. figure:: ./_static/image/logo.png
+  :width: 50%
+  :align: center
+  :alt: SGLang
+  :class: no-scaled-link
+
+.. raw:: html
+
+   <p style="text-align:center">
+   <strong>SGLang is yet another fast serving framework for large language models and vision language models.
+   </strong>
+   </p>
+
+   <p style="text-align:center">
+   <script async defer src="https://buttons.github.io/buttons.js"></script>
+   <a class="github-button" href="https://github.com/sgl-project/sglang" data-show-count="true" data-size="large" aria-label="Star">Star</a>
+   <a class="github-button" href="https://github.com/sgl-project/sglang/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
+   <a class="github-button" href="https://github.com/sgl-project/sglang/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
+   </p>
+
+SGLang has the following core features:
+
+* **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, flashinfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin).
+
+* **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions.
+
+Documentation
+-------------
+
+.. _hyperparameter_tuning:
+.. toctree::
+   :maxdepth: 1
+   :caption: Hyperparameter Tuning
+
+   hyperparameter_tuning.md
+
+.. _custom_chat_template:
+.. toctree::
+   :maxdepth: 1
+   :caption: Custom Chat Template
+
+   custom_chat_template.md
+
+.. _model_support:
+.. toctree::
+   :maxdepth: 1
+   :caption: Model Support
+
+   model_support.md
+
+.. _sampling_params:
+.. toctree::
+   :maxdepth: 1
+   :caption: Sampling Params
+
+   sampling_params.md
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/docs/model_support.md b/docs/en/model_support.md
similarity index 100%
rename from docs/model_support.md
rename to docs/en/model_support.md
diff --git a/docs/release_process.md b/docs/en/release_process.md
similarity index 100%
rename from docs/release_process.md
rename to docs/en/release_process.md
diff --git a/docs/sampling_params.md b/docs/en/sampling_params.md
similarity index 100%
rename from docs/sampling_params.md
rename to docs/en/sampling_params.md
diff --git a/docs/test_process.md b/docs/en/test_process.md
similarity index 100%
rename from docs/test_process.md
rename to docs/en/test_process.md
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..826a34bc1
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,12 @@
+markdown>=3.4.0
+myst-parser
+sphinx
+sphinx-book-theme
+sphinx-copybutton
+sphinx-tabs
+sphinxcontrib-mermaid
+pillow
+pydantic
+torch
+transformers
+urllib3<2.0.0
diff --git a/python/sglang/api.py b/python/sglang/api.py
index c32943963..70f992b14 100644
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -75,7 +75,7 @@ def gen(
     choices: Optional[List[str]] = None,
     regex: Optional[str] = None,
 ):
-    """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+    """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
 
     if choices:
         return SglSelect(name, choices, 0.0 if temperature is None else temperature)
diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py
index e5d5e837a..23537f350 100644
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -410,7 +410,7 @@ class SglGen(SglExpr):
         dtype: Optional[type] = None,
         regex: Optional[str] = None,
     ):
-        """Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
+        """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
         super().__init__()
         self.name = name
         self.sampling_params = SglSamplingParams(