minor: Add basic editorconfig and pre-commit hooks to enforce style for whitespaces (#1926)
This commit is contained in:
@@ -31,4 +31,4 @@ Clone [sgl-project.github.io](https://github.com/sgl-project/sgl-project.github.
|
||||
```bash
|
||||
export DOC_SITE_PATH=../../sgl-project.github.io # update this with your path
|
||||
python3 deploy.py
|
||||
```
|
||||
```
|
||||
|
||||
6
docs/_static/css/custom_log.css
vendored
6
docs/_static/css/custom_log.css
vendored
@@ -5,13 +5,13 @@
|
||||
table.autosummary td {
|
||||
width: 50%
|
||||
}
|
||||
|
||||
|
||||
img.align-center {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
|
||||
.output_area.stderr {
|
||||
color: #d3d3d3 !important;
|
||||
}
|
||||
@@ -26,4 +26,4 @@ div.output_area.stderr {
|
||||
|
||||
div.output_area.stdout {
|
||||
color: #d3d3d3 !important;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ docker run --gpus all \
|
||||
lmsysorg/sglang:latest \
|
||||
python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 30000
|
||||
```
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
## Example: Run Llama 3.1 405B
|
||||
|
||||
@@ -198,4 +198,4 @@ nbsphinx_prolog = """
|
||||
color: #d3d3d3 !important; /* light gray */
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
"""
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
# Deploy the documents
|
||||
# Deploy the documents
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
def run_cmd(cmd):
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
run_cmd("cd $DOC_SITE_PATH; git pull")
|
||||
run_cmd("cd $DOC_SITE_PATH; git pull")
|
||||
|
||||
# (Optional) Remove old files
|
||||
# run_cmd("rm -rf $ALPA_SITE_PATH/*")
|
||||
# (Optional) Remove old files
|
||||
# run_cmd("rm -rf $ALPA_SITE_PATH/*")
|
||||
|
||||
run_cmd("cp -r _build/html/* $DOC_SITE_PATH")
|
||||
run_cmd("cp -r _build/html/* $DOC_SITE_PATH")
|
||||
|
||||
cmd_message = f"Update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
run_cmd(
|
||||
f"cd $DOC_SITE_PATH; git add .; git commit -m '{cmd_message}'; git push origin main"
|
||||
)
|
||||
cmd_message = f"Update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
run_cmd(
|
||||
f"cd $DOC_SITE_PATH; git add .; git commit -m '{cmd_message}'; git push origin main"
|
||||
)
|
||||
|
||||
@@ -74,4 +74,4 @@ def example(s):
|
||||
choices_method=sgl.unconditional_likelihood_normalized,
|
||||
)
|
||||
)
|
||||
```
|
||||
```
|
||||
|
||||
@@ -37,4 +37,4 @@ You can also use the Jinja template format, defined by Hugging Face transformers
|
||||
|
||||
```
|
||||
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template ./my_model_template.jinja
|
||||
```
|
||||
```
|
||||
|
||||
@@ -25,9 +25,9 @@ If you see `decode out of memory happened` occasionally but not frequently, it i
|
||||
Data parallelism is better for throughput. When there is enough GPU memory, always favor data parallelism for throughput.
|
||||
|
||||
### Avoid out-of-memory by Tuning `--chunked-prefill-size`, `--mem-fraction-static`, `--max-running-requests`
|
||||
If you see out of memory (OOM) errors, you can try to tune the following parameters.
|
||||
If OOM happens during prefill, try to decrease `--chunked-prefill-size` to `4096` or `2048`.
|
||||
If OOM happens during decoding, try to decrease `--max-running-requests`.
|
||||
If you see out of memory (OOM) errors, you can try to tune the following parameters.
|
||||
If OOM happens during prefill, try to decrease `--chunked-prefill-size` to `4096` or `2048`.
|
||||
If OOM happens during decoding, try to decrease `--max-running-requests`.
|
||||
You can also try to decrease `--mem-fraction-static`, which reduces the memory usage of the KV cache memory pool and helps both prefill and decoding.
|
||||
|
||||
### Try Advanced Options
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
# Learn more
|
||||
|
||||
You can find more blogs, slides, and videos about SGLang at [https://github.com/sgl-project/sgl-learning-materials](https://github.com/sgl-project/sgl-learning-materials).
|
||||
You can find more blogs, slides, and videos about SGLang at [https://github.com/sgl-project/sgl-learning-materials](https://github.com/sgl-project/sgl-learning-materials).
|
||||
|
||||
@@ -223,4 +223,4 @@ response = requests.post(
|
||||
},
|
||||
)
|
||||
print(response.json())
|
||||
```
|
||||
```
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
This page lists some common errors and tips for fixing them.
|
||||
|
||||
## CUDA out of memory
|
||||
If you see out of memory (OOM) errors, you can try to tune the following parameters.
|
||||
If OOM happens during prefill, try to decrease `--chunked-prefill-size` to `4096` or `2048`.
|
||||
If OOM happens during decoding, try to decrease `--max-running-requests`.
|
||||
If you see out of memory (OOM) errors, you can try to tune the following parameters.
|
||||
If OOM happens during prefill, try to decrease `--chunked-prefill-size` to `4096` or `2048`.
|
||||
If OOM happens during decoding, try to decrease `--max-running-requests`.
|
||||
You can also try to decrease `--mem-fraction-static`, which reduces the memory usage of the KV cache memory pool and helps both prefill and decoding.
|
||||
|
||||
## CUDA error: an illegal memory access was encountered
|
||||
|
||||
@@ -14,4 +14,4 @@ sphinx-book-theme
|
||||
sphinx-copybutton
|
||||
sphinx-tabs
|
||||
sphinxcontrib-mermaid
|
||||
urllib3<2.0.0
|
||||
urllib3<2.0.0
|
||||
|
||||
Reference in New Issue
Block a user