diff --git a/docs/Makefile b/docs/Makefile
index 36ad781d0..7c1888f2f 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -39,4 +39,6 @@ compile:
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 clean:
-	rm -rf $(BUILDDIR)/* logs/timing.log
+	find . -name "*.ipynb" -exec nbstripout {} \;
+	rm -rf $(BUILDDIR)
+	rm -rf logs
diff --git a/docs/README.md b/docs/README.md
index 9cd59bda5..2e4fb53e4 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,19 +20,16 @@ Update your Jupyter notebooks in the appropriate subdirectories under `docs/`. I
 # 1) Compile all Jupyter notebooks
 make compile
 
-# 2) Generate static HTML
-make html
-
-# 3) Preview documentation locally
+# 2) Compile and Preview documentation locally
 # Open your browser at the displayed port to view the docs
 bash serve.sh
 
-# 4) Clean notebook outputs
+# 3) Clean notebook outputs
 # nbstripout removes notebook outputs so your PR stays clean
 pip install nbstripout
 find . -name '*.ipynb' -exec nbstripout {} \;
 
-# 5) Pre-commit checks and create a PR
+# 4) Pre-commit checks and create a PR
 # After these checks pass, push your changes and open a PR on your branch
 pre-commit run --all-files
 ```
diff --git a/docs/references/custom_chat_template.md b/docs/backend/custom_chat_template.md
similarity index 97%
rename from docs/references/custom_chat_template.md
rename to docs/backend/custom_chat_template.md
index a4418533a..43befa951 100644
--- a/docs/references/custom_chat_template.md
+++ b/docs/backend/custom_chat_template.md
@@ -1,4 +1,4 @@
-# Custom Chat Template in SGLang Runtime
+# Custom Chat Template
 
 **NOTE**: There are two chat template systems in SGLang project. This document is about setting a custom chat template for the OpenAI-compatible API server (defined at [conversation.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/conversation.py)). It is NOT related to the chat template used in the SGLang language frontend (defined at [chat_template.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/lang/chat_template.py)).
 
diff --git a/docs/references/hyperparameter_tuning.md b/docs/backend/hyperparameter_tuning.md
similarity index 98%
rename from docs/references/hyperparameter_tuning.md
rename to docs/backend/hyperparameter_tuning.md
index 92830f644..4b4d7a490 100644
--- a/docs/references/hyperparameter_tuning.md
+++ b/docs/backend/hyperparameter_tuning.md
@@ -1,4 +1,4 @@
-# Guide on Hyperparameter Tuning
+# Hyperparameter Tuning
 
 ## Achieving Peak Throughput
 Achieving a large batch size is the most important thing for attaining high throughput.
diff --git a/docs/backend/native_api.ipynb b/docs/backend/native_api.ipynb
index 7f3a67c80..4e0daa0df 100644
--- a/docs/backend/native_api.ipynb
+++ b/docs/backend/native_api.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Native APIs\n",
+    "# SGLang Native APIs\n",
     "\n",
     "Apart from the OpenAI compatible APIs, the SGLang Runtime also provides its native server APIs. We introduce these following APIs:\n",
     "\n",
diff --git a/docs/references/quantization.md b/docs/backend/quantization.md
similarity index 100%
rename from docs/references/quantization.md
rename to docs/backend/quantization.md
diff --git a/docs/references/sampling_params.md b/docs/backend/sampling_params.md
similarity index 99%
rename from docs/references/sampling_params.md
rename to docs/backend/sampling_params.md
index 279171eb8..91df324f4 100644
--- a/docs/references/sampling_params.md
+++ b/docs/backend/sampling_params.md
@@ -1,4 +1,4 @@
-# Sampling Parameters in SGLang Runtime
+# Sampling Parameters
 
 This doc describes the sampling parameters of the SGLang Runtime.
 It is the low-level endpoint of the runtime.
diff --git a/docs/backend/send_request.ipynb b/docs/backend/send_request.ipynb
index 610538760..6b51b42bf 100644
--- a/docs/backend/send_request.ipynb
+++ b/docs/backend/send_request.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Quick Start: Sending Requests\n",
+    "# Sending Requests\n",
     "This notebook provides a quick-start guide to use SGLang in chat completions after installation.\n",
     "\n",
     "- For Vision Language Models, see [OpenAI APIs - Vision](../backend/openai_api_vision.ipynb).\n",
@@ -16,16 +16,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Launch A Server\n",
-    "\n",
-    "This code block is equivalent to executing \n",
-    "\n",
-    "```bash\n",
-    "python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
-    " --host 0.0.0.0\n",
-    "```\n",
-    "\n",
-    "in your terminal and wait for the server to be ready. Once the server is running, you can send test requests using curl or requests. The server implements the [OpenAI-compatible APIs](https://platform.openai.com/docs/api-reference/chat)."
+    "## Launch A Server"
    ]
   },
   {
@@ -42,6 +33,9 @@
     "else:\n",
     "    from sglang.utils import launch_server_cmd\n",
     "\n",
+    "# This is equivalent to running the following command in your terminal\n",
+    "\n",
+    "# python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0\n",
     "\n",
     "server_process, port = launch_server_cmd(\n",
     "    \"\"\"\n",
diff --git a/docs/frontend/frontend.md b/docs/frontend/frontend.md
index 8b56fa487..1a02d6adb 100644
--- a/docs/frontend/frontend.md
+++ b/docs/frontend/frontend.md
@@ -1,4 +1,4 @@
-# Frontend: Structured Generation Language (SGLang)
+# Structured Generation Language
 The frontend language can be used with local models or API models. It is an alternative to the OpenAI API. You may find it easier to use for complex prompting workflow.
 
 ## Quick Start
diff --git a/docs/index.rst b/docs/index.rst
index 62c7383da..a3fbd3c10 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -12,7 +12,7 @@ The core features include:
 
 .. toctree::
    :maxdepth: 1
-   :caption: Getting Started
+   :caption: Installation
 
    start/install.md
 
@@ -26,10 +26,20 @@ The core features include:
    backend/openai_api_embeddings.ipynb
    backend/native_api.ipynb
    backend/offline_engine_api.ipynb
-   backend/structured_outputs.ipynb
-   backend/speculative_decoding.ipynb
-   backend/function_calling.ipynb
    backend/server_arguments.md
+   backend/sampling_params.md
+   backend/hyperparameter_tuning.md
+
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Advanced Features
+
+   backend/speculative_decoding.ipynb
+   backend/structured_outputs.ipynb
+   backend/function_calling.ipynb
+   backend/custom_chat_template.md
+   backend/quantization.md
 
 .. toctree::
    :maxdepth: 1
@@ -44,48 +54,11 @@ The core features include:
 
    router/router.md
 
-
-References
-==========
-
-General
----------------------
 .. toctree::
-   :maxdepth: 1
+      :maxdepth: 1
+      :caption: References
 
-   references/supported_models.md
-   references/contribution_guide.md
-   references/troubleshooting.md
-   references/faq.md
-   references/learn_more.md
-
-Hardware
---------------------------
-.. toctree::
-   :maxdepth: 1
-
-   references/AMD.md
-   references/amd_configure.md
-   references/nvidia_jetson.md
-
-Advanced Models & Deployment
-------------------------------
-.. toctree::
-   :maxdepth: 1
-
-   references/deepseek.md
-   references/multi_node.md
-   references/multi_node_inference_k8s_lws.md
-   references/modelscope.md
-
-Performance & Tuning
---------------------
-.. toctree::
-   :maxdepth: 1
-
-   references/sampling_params.md
-   references/hyperparameter_tuning.md
-   references/benchmark_and_profiling.md
-   references/accuracy_evaluation.md
-   references/custom_chat_template.md
-   references/quantization.md
+      references/general
+      references/hardware
+      references/advanced_deploy
+      references/performance_tuning
diff --git a/docs/references/advanced_deploy.rst b/docs/references/advanced_deploy.rst
new file mode 100644
index 000000000..24f46c4aa
--- /dev/null
+++ b/docs/references/advanced_deploy.rst
@@ -0,0 +1,8 @@
+Multi-Node Deployment
+==========================
+.. toctree::
+   :maxdepth: 1
+
+   deepseek.md
+   multi_node.md
+   k8s.md
diff --git a/docs/references/general.rst b/docs/references/general.rst
new file mode 100644
index 000000000..fedb2be76
--- /dev/null
+++ b/docs/references/general.rst
@@ -0,0 +1,13 @@
+
+General Guidance
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   supported_models.md
+   contribution_guide.md
+   troubleshooting.md
+   faq.md
+   learn_more.md
+   modelscope.md
diff --git a/docs/references/hardware.rst b/docs/references/hardware.rst
new file mode 100644
index 000000000..0500e5435
--- /dev/null
+++ b/docs/references/hardware.rst
@@ -0,0 +1,7 @@
+Hardware Supports
+==========
+.. toctree::
+   :maxdepth: 1
+
+   amd.md
+   nvidia_jetson.md
diff --git a/docs/references/multi_node_inference_k8s_lws.md b/docs/references/k8s.md
similarity index 99%
rename from docs/references/multi_node_inference_k8s_lws.md
rename to docs/references/k8s.md
index 56fa68113..07bc5341b 100644
--- a/docs/references/multi_node_inference_k8s_lws.md
+++ b/docs/references/k8s.md
@@ -1,4 +1,6 @@
-# Deploying a RoCE Network-Based SGLANG Two-Node Inference Service on a Kubernetes (K8S) Cluster
+# Kubernetes
+
+This docs is for deploying a RoCE Network-Based SGLANG Two-Node Inference Service on a Kubernetes (K8S) Cluster.
 
 LeaderWorkerSet (LWS) is a Kubernetes API that aims to address common deployment patterns of AI/ML inference workloads. A major use case is for multi-host/multi-node distributed inference.
 
diff --git a/docs/references/multi_node.md b/docs/references/multi_node.md
index 8d5f3d4d3..fbf803f23 100644
--- a/docs/references/multi_node.md
+++ b/docs/references/multi_node.md
@@ -1,4 +1,4 @@
-# Run Multi-Node Inference
+# Multi-Node Deployment
 
 ## Llama 3.1 405B
 
diff --git a/docs/references/performance_tuning.rst b/docs/references/performance_tuning.rst
new file mode 100644
index 000000000..6cc20e061
--- /dev/null
+++ b/docs/references/performance_tuning.rst
@@ -0,0 +1,7 @@
+Performance Tuning
+====================
+.. toctree::
+   :maxdepth: 1
+
+   benchmark_and_profiling.md
+   accuracy_evaluation.md
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 948d5427c..59c475775 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -13,6 +13,7 @@ sphinx
 sphinx-book-theme
 sphinx-copybutton
 sphinx-tabs
+nbstripout
 sphinxcontrib-mermaid
 urllib3<2.0.0
 gguf>=0.10.0
diff --git a/docs/serve.sh b/docs/serve.sh
index 42c639678..5a12e9093 100644
--- a/docs/serve.sh
+++ b/docs/serve.sh
@@ -1 +1,3 @@
+make clean
+make html
 python3 -m http.server --d _build/html