sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct
This commit is contained in:
10
examples/frontend_language/usage/triton/Dockerfile
Normal file
10
examples/frontend_language/usage/triton/Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM nvcr.io/nvidia/tritonserver:24.01-py3
|
||||
|
||||
WORKDIR /opt
|
||||
|
||||
RUN git clone https://github.com/sgl-project/sglang.git
|
||||
|
||||
WORKDIR /opt/sglang
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install -e "python[all]" && \
|
||||
pip install datasets
|
||||
35
examples/frontend_language/usage/triton/README.md
Normal file
35
examples/frontend_language/usage/triton/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# sglang_triton
|
||||
|
||||
Build the docker image:
|
||||
```
|
||||
docker build -t sglang-triton .
|
||||
```
|
||||
|
||||
Then do:
|
||||
```
|
||||
docker run -ti --gpus=all --network=host --name sglang-triton -v ./models:/mnt/models sglang-triton
|
||||
```
|
||||
|
||||
inside the docker container:
|
||||
```
|
||||
cd sglang
|
||||
python3 -m sglang.launch_server --model-path mistralai/Mistral-7B-Instruct-v0.2 --port 30000 --mem-fraction-static 0.9
|
||||
```
|
||||
|
||||
with another shell, inside the docker container:
|
||||
```
|
||||
docker exec -ti sglang-triton /bin/bash
|
||||
cd /mnt
|
||||
tritonserver --model-repository=/mnt/models
|
||||
```
|
||||
|
||||
|
||||
Send request to the server:
|
||||
```
|
||||
curl -X POST http://localhost:8000/v2/models/character_generation/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"INPUT_TEXT": ["harry"]
|
||||
}'
|
||||
|
||||
```
|
||||
@@ -0,0 +1,23 @@
|
||||
name: "character_generation"
|
||||
backend: "python"
|
||||
input [
|
||||
{
|
||||
name: "INPUT_TEXT"
|
||||
data_type: TYPE_STRING
|
||||
dims: [ -1 ]
|
||||
}
|
||||
]
|
||||
output [
|
||||
{
|
||||
name: "OUTPUT_TEXT"
|
||||
data_type: TYPE_STRING
|
||||
dims: [ -1 ]
|
||||
}
|
||||
]
|
||||
instance_group [
|
||||
{
|
||||
count: 1
|
||||
kind: KIND_GPU
|
||||
gpus: [ 0 ]
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user