diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
index 6aa908c0c..e229eae42 100644
--- a/.github/workflows/deploy-docs.yml
+++ b/.github/workflows/deploy-docs.yml
@@ -1,20 +1,15 @@
 name: Build Documentation
 
 on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
   workflow_dispatch:
 
 jobs:
   execute-notebooks:
     runs-on: 1-gpu-runner
-    if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
     steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
+      - name: Checkout code
+        uses: actions/checkout@v3
 
       - name: Set up Python
         uses: actions/setup-python@v4
@@ -23,22 +18,14 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
+          bash scripts/ci_install_dependency.sh
           pip install -r docs/requirements.txt
-          pip install nbconvert jupyter_client ipykernel ipywidgets matplotlib
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
 
       - name: Setup Jupyter Kernel
         run: |
           python -m ipykernel install --user --name python3 --display-name "Python 3"
 
       - name: Execute notebooks
-        env:
-          HF_HOME: /hf_home
-          SGLANG_IS_IN_CI: true
-          CUDA_VISIBLE_DEVICES: 0
         run: |
           cd docs/en
           for nb in *.ipynb; do
@@ -54,34 +41,18 @@ jobs:
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
     runs-on: 1-gpu-runner
     steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
+      - name: Checkout code
+        uses: actions/checkout@v3
 
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.9'
 
-      - name: Cache Python dependencies
-        uses: actions/cache@v3
-        with:
-          path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-pip-
-
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
+          bash scripts/ci_install_dependency.sh
           pip install -r docs/requirements.txt
-          pip install nbconvert jupyter_client ipykernel ipywidgets matplotlib
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-      - name: Install Pandoc
-        run: |
           apt-get update
           apt-get install -y pandoc
 
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 4857f844f..2ca571f48 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -8,10 +8,10 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
+      - name: Set up Python
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: '3.9'
 
       - name: Install pre-commit hook
         run: |
diff --git a/.github/workflows/nightly-eval.yml b/.github/workflows/nightly-eval.yml
index 4ac911c9a..afc29beee 100644
--- a/.github/workflows/nightly-eval.yml
+++ b/.github/workflows/nightly-eval.yml
@@ -24,9 +24,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Nightly gsm8k Accuracy
         timeout-minutes: 60
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index a9ec66794..c1bf8da5b 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -27,10 +27,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 10
@@ -47,10 +44,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -67,10 +61,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -87,10 +78,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -107,10 +95,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -127,10 +112,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Benchmark Single Latency
         timeout-minutes: 10
@@ -165,10 +147,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Benchmark Offline Throughput (w/o RadixAttention)
         timeout-minutes: 10
@@ -197,10 +176,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
       - name: Benchmark Offline Throughput (TP=2)
         timeout-minutes: 10
@@ -229,10 +205,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
           git clone https://github.com/merrymercy/human-eval.git
           cd human-eval
@@ -253,10 +226,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install transformers==4.45.2
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+          bash scripts/ci_install_dependency.sh
 
           git clone https://github.com/merrymercy/human-eval.git
           cd human-eval
diff --git a/.github/workflows/release-github.yml b/.github/workflows/release-github.yml
deleted file mode 100644
index 12a2309a6..000000000
--- a/.github/workflows/release-github.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Release GitHub
-on:
-  workflow_dispatch:
-jobs:
-  publish:
-    if: github.repository == 'sgl-project/sglang'
-    runs-on: ubuntu-latest
-    environment: 'prod'
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      
-      - name: Get version
-        id: get_version
-        run: |
-          version=$(cat python/sglang/version.py | cut -d'"' -f2)
-          echo "TAG=v$version" >> $GITHUB_OUTPUT
-
-      - name: Release
-        uses: softprops/action-gh-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.REPO_TOKEN }}
-        with:
-          name: Release ${{ steps.get_version.outputs.TAG }}
-          tag_name: ${{ steps.get_version.outputs.TAG }}
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index c79e46cb7..cd44696d4 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -13,12 +13,14 @@ jobs:
     runs-on: ubuntu-latest
     environment: 'prod'
     steps:
-      - name: Set up python3.8
+      - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.8'
+          python-version: '3.9'
+
       - name: Checkout repository
         uses: actions/checkout@v3
+
       - name: Upload to pypi
         run: |
           cd python
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 000000000..3e4a1b5e5
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,23 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean:
+	rm -rf $(BUILDDIR)/*
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..052acbcb0
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,33 @@
+# SGLang Documentation
+
+## Build the documentation website
+
+### Dependency
+```
+pip install -r requirements.txt
+```
+
+### Build
+```
+make html
+```
+
+### Clean
+To remove all generated files:
+```
+make clean
+```
+
+### Serve (preview)
+Run an HTTP server and visit http://localhost:8000 in your browser.
+```
+python3 -m http.server --d _build/html
+```
+
+### Deploy
+Clone [sgl-project.github.io](https://github.com/sgl-project/sgl-project.github.io) and make sure you have write access.
+
+```bash
+export DOC_SITE_PATH=../../sgl-project.github.io   # update this with your path
+python3 deploy.py
+```
\ No newline at end of file
diff --git a/docs/en/_static/css/readthedocs.css b/docs/_static/css/readthedocs.css
similarity index 100%
rename from docs/en/_static/css/readthedocs.css
rename to docs/_static/css/readthedocs.css
diff --git a/docs/en/_static/image/logo.png b/docs/_static/image/logo.png
similarity index 100%
rename from docs/en/_static/image/logo.png
rename to docs/_static/image/logo.png
diff --git a/docs/en/backend.md b/docs/backend.md
similarity index 95%
rename from docs/en/backend.md
rename to docs/backend.md
index 0b1103511..311884091 100644
--- a/docs/en/backend.md
+++ b/docs/backend.md
@@ -20,7 +20,7 @@ curl http://localhost:30000/generate \
   }'
 ```
 
-Learn more about the argument specification, streaming, and multi-modal support [here](https://sglang.readthedocs.io/en/latest/sampling_params.html).
+Learn more about the argument specification, streaming, and multi-modal support [here](https://sgl-project.github.io/sampling_params.html).
 
 ### OpenAI Compatible API
 In addition, the server supports OpenAI-compatible APIs.
@@ -74,7 +74,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ```
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --mem-fraction-static 0.7
 ```
-- See [hyperparameter tuning](https://sglang.readthedocs.io/en/latest/hyperparameter_tuning.html) on tuning hyperparameters for better performance.
+- See [hyperparameter tuning](https://sgl-project.github.io/hyperparameter_tuning.html) on tuning hyperparameters for better performance.
 - If you see out-of-memory errors during prefill for long prompts, try to set a smaller chunked prefill size.
 ```
 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --chunked-prefill-size 4096
@@ -83,7 +83,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 - To enable torchao quantization, add `--torchao-config int4wo-128`. It supports various quantization strategies.
 - To enable fp8 weight quantization, add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
 - To enable fp8 kv cache quantization, add `--kv-cache-dtype fp8_e5m2`.
-- If the model does not have a chat template in the Hugging Face tokenizer, you can specify a [custom chat template](https://sglang.readthedocs.io/en/latest/custom_chat_template.html).
+- If the model does not have a chat template in the Hugging Face tokenizer, you can specify a [custom chat template](https://sgl-project.github.io/custom_chat_template.html).
 - To run tensor parallelism on multiple nodes, add `--nnodes 2`. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port, you can use the following commands. If you meet deadlock, please try to add `--disable-cuda-graph`
 ```
 # Node 0
@@ -158,7 +158,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/
 - gte-Qwen2
   - `python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --is-embedding`
 
-Instructions for supporting a new model are [here](https://sglang.readthedocs.io/en/latest/model_support.html).
+Instructions for supporting a new model are [here](https://sgl-project.github.io/model_support.html).
 
 #### Use Models From ModelScope
 <details>
diff --git a/docs/en/benchmark_and_profiling.md b/docs/benchmark_and_profiling.md
similarity index 100%
rename from docs/en/benchmark_and_profiling.md
rename to docs/benchmark_and_profiling.md
diff --git a/docs/en/choices_methods.md b/docs/choices_methods.md
similarity index 100%
rename from docs/en/choices_methods.md
rename to docs/choices_methods.md
diff --git a/docs/en/conf.py b/docs/conf.py
similarity index 98%
rename from docs/en/conf.py
rename to docs/conf.py
index 5e01613ec..86b467fad 100644
--- a/docs/en/conf.py
+++ b/docs/conf.py
@@ -3,7 +3,7 @@ import sys
 
 sys.path.insert(0, os.path.abspath("../.."))
 
-version_file = "../../python/sglang/version.py"
+version_file = "../python/sglang/version.py"
 with open(version_file, "r") as f:
     exec(compile(f.read(), version_file, "exec"))
 __version__ = locals()["__version__"]
diff --git a/docs/en/contributor_guide.md b/docs/contributor_guide.md
similarity index 56%
rename from docs/en/contributor_guide.md
rename to docs/contributor_guide.md
index 1ebdd0379..a9b25163d 100644
--- a/docs/en/contributor_guide.md
+++ b/docs/contributor_guide.md
@@ -11,4 +11,4 @@ pre-commit run --all-files
 ```
 
 ## Add Unit Tests
-Add unit tests under [sglang/test](../../test). You can learn how to add and run tests from the README.md in that folder.
+Add unit tests under [sglang/test](https://github.com/sgl-project/sglang/tree/main/test). You can learn how to add and run tests from the README.md in that folder.
diff --git a/docs/en/custom_chat_template.md b/docs/custom_chat_template.md
similarity index 76%
rename from docs/en/custom_chat_template.md
rename to docs/custom_chat_template.md
index 3760bbc6a..64b33a0a4 100644
--- a/docs/en/custom_chat_template.md
+++ b/docs/custom_chat_template.md
@@ -1,6 +1,6 @@
 # Custom Chat Template in SGLang Runtime
 
-**NOTE**: There are two chat template systems in SGLang project. This document is about setting a custom chat template for the OpenAI-compatible API server (defined at [conversation.py](../../python/sglang/srt/conversation.py)). It is NOT related to the chat template used in the SGLang language frontend (defined at [chat_template.py](../../python/sglang/lang/chat_template.py)).
+**NOTE**: There are two chat template systems in SGLang project. This document is about setting a custom chat template for the OpenAI-compatible API server (defined at [conversation.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/conversation.py)). It is NOT related to the chat template used in the SGLang language frontend (defined at [chat_template.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/lang/chat_template.py)).
 
 By default, the server uses the chat template specified in the model tokenizer from Hugging Face.
 It should just work for most official models such as Llama-2/Llama-3.
diff --git a/docs/deploy.py b/docs/deploy.py
new file mode 100644
index 000000000..1f1ec087c
--- /dev/null
+++ b/docs/deploy.py
@@ -0,0 +1,22 @@
+#!/usr/bin/python3
+
+import os
+from datetime import datetime
+
+
+def run_cmd(cmd):
+    print(cmd)
+    os.system(cmd)
+
+
+run_cmd("cd $DOC_SITE_PATH; git pull")
+
+# (Optional) Remove old files
+# run_cmd("rm -rf $ALPA_SITE_PATH/*")
+
+run_cmd("cp -r _build/html/* $DOC_SITE_PATH")
+
+cmd_message = f"Update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+run_cmd(
+    f"cd $DOC_SITE_PATH; git add .; git commit -m '{cmd_message}'; git push origin main"
+)
diff --git a/docs/deploy_docs.sh b/docs/deploy_docs.sh
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/en/embedding_model.ipynb b/docs/embedding_model.ipynb
similarity index 100%
rename from docs/en/embedding_model.ipynb
rename to docs/embedding_model.ipynb
diff --git a/docs/en/Makefile b/docs/en/Makefile
deleted file mode 100644
index 9ad4b38e0..000000000
--- a/docs/en/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/en/frontend.md b/docs/frontend.md
similarity index 100%
rename from docs/en/frontend.md
rename to docs/frontend.md
diff --git a/docs/en/hyperparameter_tuning.md b/docs/hyperparameter_tuning.md
similarity index 100%
rename from docs/en/hyperparameter_tuning.md
rename to docs/hyperparameter_tuning.md
diff --git a/docs/en/index.rst b/docs/index.rst
similarity index 97%
rename from docs/en/index.rst
rename to docs/index.rst
index 2f28ad87c..b21236e3d 100644
--- a/docs/en/index.rst
+++ b/docs/index.rst
@@ -10,24 +10,28 @@ The core features include:
 - **Extensive Model Support**: Supports a wide range of generative models (Llama 3, Gemma 2, Mistral, QWen, DeepSeek, LLaVA, etc.) and embedding models (e5-mistral), with easy extensibility for integrating new models.
 - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
 
+
 .. toctree::
    :maxdepth: 1
    :caption: Getting Started
 
    install.md
-   send_request.ipynb
+
 
 .. toctree::
    :maxdepth: 1
    :caption: Backend Tutorial
+
    backend.md
 
 
 .. toctree::
    :maxdepth: 1
    :caption: Frontend Tutorial
+
    frontend.md
 
+
 .. toctree::
    :maxdepth: 1
    :caption: References
@@ -39,4 +43,3 @@ The core features include:
    choices_methods.md
    benchmark_and_profiling.md
    troubleshooting.md
-   embedding_model.ipynb
\ No newline at end of file
diff --git a/docs/en/install.md b/docs/install.md
similarity index 93%
rename from docs/en/install.md
rename to docs/install.md
index fd7f24bbc..5a17d6bd6 100644
--- a/docs/en/install.md
+++ b/docs/install.md
@@ -48,9 +48,9 @@ docker run --gpus all \
 <summary>More</summary>
 
 > This method is recommended if you plan to serve it as a service.
-> A better approach is to use the [k8s-sglang-service.yaml](./docker/k8s-sglang-service.yaml).
+> A better approach is to use the [k8s-sglang-service.yaml](https://github.com/sgl-project/sglang/blob/main/docker/k8s-sglang-service.yaml).
 
-1. Copy the [compose.yml](./docker/compose.yaml) to your local machine
+1. Copy the [compose.yml](https://github.com/sgl-project/sglang/blob/main/docker/compose.yaml) to your local machine
 2. Execute the command `docker compose up -d` in your terminal.
 </details>
 
diff --git a/docs/en/model_support.md b/docs/model_support.md
similarity index 100%
rename from docs/en/model_support.md
rename to docs/model_support.md
diff --git a/docs/en/release_process.md b/docs/release_process.md
similarity index 100%
rename from docs/en/release_process.md
rename to docs/release_process.md
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 03510959b..ad722fd21 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,12 +1,17 @@
+ipykernel
+ipywidgets
+jupyter_client
 markdown>=3.4.0
+matplotlib
 myst-parser
+nbconvert
+nbsphinx
+pandoc
+pillow
+pydantic
 sphinx
 sphinx-book-theme
 sphinx-copybutton
 sphinx-tabs
 sphinxcontrib-mermaid
-pillow
-pydantic
-urllib3<2.0.0
-nbsphinx
-pandoc
\ No newline at end of file
+urllib3<2.0.0
\ No newline at end of file
diff --git a/docs/en/sampling_params.md b/docs/sampling_params.md
similarity index 99%
rename from docs/en/sampling_params.md
rename to docs/sampling_params.md
index 690b206d3..78d5193c2 100644
--- a/docs/en/sampling_params.md
+++ b/docs/sampling_params.md
@@ -194,7 +194,7 @@ Since we compute penalty algorithms through CUDA, the logic stores relevant para
 
 You can run your own benchmark with desired parameters on your own hardware to make sure it's not OOMing before using.
 
-Tuning `--mem-fraction-static` and/or `--max-running-requests` will help. See [here](hyperparameter_tuning.md#minor-tune---max-prefill-tokens---mem-fraction-static---max-running-requests) for more information.
+Tuning `--mem-fraction-static` and/or `--max-running-requests` will help.
 
 ### Benchmarks
 
diff --git a/docs/en/send_request.ipynb b/docs/send_request.ipynb
similarity index 100%
rename from docs/en/send_request.ipynb
rename to docs/send_request.ipynb
diff --git a/docs/serve.sh b/docs/serve.sh
new file mode 100644
index 000000000..42c639678
--- /dev/null
+++ b/docs/serve.sh
@@ -0,0 +1 @@
+python3 -m http.server --d _build/html
diff --git a/docs/en/setup_github_runner.md b/docs/setup_github_runner.md
similarity index 100%
rename from docs/en/setup_github_runner.md
rename to docs/setup_github_runner.md
diff --git a/docs/en/troubleshooting.md b/docs/troubleshooting.md
similarity index 74%
rename from docs/en/troubleshooting.md
rename to docs/troubleshooting.md
index c6c016fd1..02793c959 100644
--- a/docs/en/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -5,9 +5,9 @@ This page lists some common errors and tips for fixing them.
 ## CUDA error: an illegal memory access was encountered
 This error may be due to kernel errors or out-of-memory issues.
 - If it is a kernel error, it is not easy to fix.
-- If it is out-of-memory, sometimes it will report this error instead of "Out-of-memory." In this case, try setting a smaller value for `--mem-fraction-static`. The default value of `--mem-fraction-static` is around 0.8 - 0.9. https://github.com/sgl-project/sglang/blob/1edd4e07d6ad52f4f63e7f6beaa5987c1e1cf621/python/sglang/srt/server_args.py#L92-L102
+- If it is out-of-memory, sometimes it will report this error instead of "Out-of-memory." In this case, try setting a smaller value for `--mem-fraction-static`. The default value of `--mem-fraction-static` is around 0.8 - 0.9.
 
 ## The server hangs
 If the server hangs, try disabling some optimizations when launching the server.
 - Add `--disable-cuda-graph`.
-- Add `--disable-flashinfer-sampling`.
+- Add `--sampling-backend pytorch`.
diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh
new file mode 100644
index 000000000..019609bbb
--- /dev/null
+++ b/scripts/ci_install_dependency.sh
@@ -0,0 +1,4 @@
+pip install --upgrade pip
+pip install -e "python[all]"
+pip install transformers==4.45.2
+pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
diff --git a/test/killall_sglang.sh b/scripts/killall_sglang.sh
similarity index 100%
rename from test/killall_sglang.sh
rename to scripts/killall_sglang.sh