Update links (#1805)
This commit is contained in:
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -10,6 +10,6 @@
|
|||||||
|
|
||||||
## Checklist
|
## Checklist
|
||||||
|
|
||||||
- [ ] Format your code according to the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/en/contributor_guide.md).
|
- [ ] Format your code according to the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/contributor_guide.md).
|
||||||
- [ ] Add unit tests as outlined in the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/en/contributor_guide.md).
|
- [ ] Add unit tests as outlined in the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/contributor_guide.md).
|
||||||
- [ ] Update documentation as needed, including docstrings or example tutorials.
|
- [ ] Update documentation as needed, including docstrings or example tutorials.
|
||||||
10
.github/workflows/deploy-docs.yml
vendored
10
.github/workflows/deploy-docs.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Execute notebooks
|
- name: Execute notebooks
|
||||||
run: |
|
run: |
|
||||||
cd docs/en
|
cd docs
|
||||||
for nb in *.ipynb; do
|
for nb in *.ipynb; do
|
||||||
if [ -f "$nb" ]; then
|
if [ -f "$nb" ]; then
|
||||||
echo "Executing $nb"
|
echo "Executing $nb"
|
||||||
@@ -38,7 +38,7 @@ jobs:
|
|||||||
done
|
done
|
||||||
|
|
||||||
build-and-deploy:
|
build-and-deploy:
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
if: github.repository == 'sgl-project/sglang'
|
||||||
runs-on: 1-gpu-runner
|
runs-on: 1-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -58,21 +58,21 @@ jobs:
|
|||||||
|
|
||||||
- name: Build documentation
|
- name: Build documentation
|
||||||
run: |
|
run: |
|
||||||
cd docs/en
|
cd docs
|
||||||
make html
|
make html
|
||||||
|
|
||||||
- name: Push to sgl-project.github.io
|
- name: Push to sgl-project.github.io
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
cd docs/en/_build/html
|
cd docs/_build/html
|
||||||
git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io
|
git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io
|
||||||
cp -r * ../sgl-project.github.io
|
cp -r * ../sgl-project.github.io
|
||||||
cd ../sgl-project.github.io
|
cd ../sgl-project.github.io
|
||||||
git config user.name "zhaochenyang20"
|
git config user.name "zhaochenyang20"
|
||||||
git config user.email "zhaochenyang20@gmail.com"
|
git config user.email "zhaochenyang20@gmail.com"
|
||||||
git add .
|
git add .
|
||||||
git commit -m "$(date +'%Y-%m-%d %H:%M:%S') - Update documentation"
|
git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')"
|
||||||
git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
|
git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
|
||||||
cd ..
|
cd ..
|
||||||
rm -rf sgl-project.github.io
|
rm -rf sgl-project.github.io
|
||||||
|
|||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -167,7 +167,7 @@ cython_debug/
|
|||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
# Documentation
|
# Documentation
|
||||||
docs/en/_build
|
docs/_build
|
||||||
|
|
||||||
# SGL
|
# SGL
|
||||||
benchmark/mmlu/data
|
benchmark/mmlu/data
|
||||||
@@ -185,7 +185,4 @@ tmp*.txt
|
|||||||
work_dirs/
|
work_dirs/
|
||||||
*.csv
|
*.csv
|
||||||
|
|
||||||
!logo.png
|
!logo.png
|
||||||
|
|
||||||
# docs
|
|
||||||
/docs/en/_build
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
version: 2
|
|
||||||
|
|
||||||
formats: all
|
|
||||||
|
|
||||||
build:
|
|
||||||
os: "ubuntu-22.04"
|
|
||||||
tools:
|
|
||||||
python: "3.12"
|
|
||||||
|
|
||||||
|
|
||||||
sphinx:
|
|
||||||
configuration: docs/en/conf.py
|
|
||||||
|
|
||||||
|
|
||||||
python:
|
|
||||||
install:
|
|
||||||
- requirements: docs/requirements.txt
|
|
||||||
@@ -171,7 +171,7 @@ curl http://localhost:30000/generate \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Learn more about the argument specification, streaming, and multi-modal support [here](docs/en/sampling_params.md).
|
Learn more about the argument specification, streaming, and multi-modal support [here](docs/sampling_params.md).
|
||||||
|
|
||||||
### OpenAI Compatible API
|
### OpenAI Compatible API
|
||||||
In addition, the server supports OpenAI-compatible APIs.
|
In addition, the server supports OpenAI-compatible APIs.
|
||||||
@@ -225,7 +225,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --mem-fraction-static 0.7
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --mem-fraction-static 0.7
|
||||||
```
|
```
|
||||||
- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
- See [hyperparameter_tuning.md](docs/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
||||||
- If you see out-of-memory errors during prefill for long prompts, try to set a smaller chunked prefill size.
|
- If you see out-of-memory errors during prefill for long prompts, try to set a smaller chunked prefill size.
|
||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --chunked-prefill-size 4096
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --chunked-prefill-size 4096
|
||||||
@@ -235,7 +235,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|||||||
- To enable torchao quantization, add `--torchao-config int4wo-128`. It supports various quantization strategies.
|
- To enable torchao quantization, add `--torchao-config int4wo-128`. It supports various quantization strategies.
|
||||||
- To enable fp8 weight quantization, add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
- To enable fp8 weight quantization, add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
||||||
- To enable fp8 kv cache quantization, add `--kv-cache-dtype fp8_e5m2`.
|
- To enable fp8 kv cache quantization, add `--kv-cache-dtype fp8_e5m2`.
|
||||||
- If the model does not have a chat template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
|
- If the model does not have a chat template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/custom_chat_template.md).
|
||||||
- To run tensor parallelism on multiple nodes, add `--nnodes 2`. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port, you can use the following commands. If you meet deadlock, please try to add `--disable-cuda-graph`
|
- To run tensor parallelism on multiple nodes, add `--nnodes 2`. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port, you can use the following commands. If you meet deadlock, please try to add `--disable-cuda-graph`
|
||||||
```
|
```
|
||||||
# Node 0
|
# Node 0
|
||||||
@@ -311,7 +311,7 @@ You can view the full example [here](https://github.com/sgl-project/sglang/tree/
|
|||||||
- gte-Qwen2
|
- gte-Qwen2
|
||||||
- `python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --is-embedding`
|
- `python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --is-embedding`
|
||||||
|
|
||||||
Instructions for supporting a new model are [here](docs/en/model_support.md).
|
Instructions for supporting a new model are [here](docs/model_support.md).
|
||||||
|
|
||||||
#### Use Models From ModelScope
|
#### Use Models From ModelScope
|
||||||
<details>
|
<details>
|
||||||
|
|||||||
@@ -55,7 +55,6 @@ html_copy_source = True
|
|||||||
html_last_updated_fmt = ""
|
html_last_updated_fmt = ""
|
||||||
|
|
||||||
html_theme_options = {
|
html_theme_options = {
|
||||||
"path_to_docs": "docs/en",
|
|
||||||
"repository_url": "https://github.com/sgl-project/sglang",
|
"repository_url": "https://github.com/sgl-project/sglang",
|
||||||
"repository_branch": "main",
|
"repository_branch": "main",
|
||||||
"show_navbar_depth": 3,
|
"show_navbar_depth": 3,
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ def gen(
|
|||||||
regex: Optional[str] = None,
|
regex: Optional[str] = None,
|
||||||
json_schema: Optional[str] = None,
|
json_schema: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
|
"""Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
|
||||||
|
|
||||||
if choices:
|
if choices:
|
||||||
return SglSelect(
|
return SglSelect(
|
||||||
|
|||||||
@@ -445,7 +445,7 @@ class SglGen(SglExpr):
|
|||||||
regex: Optional[str] = None,
|
regex: Optional[str] = None,
|
||||||
json_schema: Optional[str] = None,
|
json_schema: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md"""
|
"""Call the model to generate. See the meaning of the arguments in docs/sampling_params.md"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = name
|
self.name = name
|
||||||
self.sampling_params = SglSamplingParams(
|
self.sampling_params = SglSamplingParams(
|
||||||
|
|||||||
Reference in New Issue
Block a user