sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

This commit is contained in:
maxiao1
2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
FROM nvcr.io/nvidia/tritonserver:24.01-py3
WORKDIR /opt
RUN git clone https://github.com/sgl-project/sglang.git
WORKDIR /opt/sglang
RUN pip install --upgrade pip && \
pip install -e "python[all]" && \
pip install datasets

View File

@@ -0,0 +1,35 @@
# sglang_triton
Build the docker image:
```
docker build -t sglang-triton .
```
Then do:
```
docker run -ti --gpus=all --network=host --name sglang-triton -v ./models:/mnt/models sglang-triton
```
inside the docker container:
```
cd sglang
python3 -m sglang.launch_server --model-path mistralai/Mistral-7B-Instruct-v0.2 --port 30000 --mem-fraction-static 0.9
```
with another shell, inside the docker container:
```
docker exec -ti sglang-triton /bin/bash
cd /mnt
tritonserver --model-repository=/mnt/models
```
Send request to the server:
```
curl -X POST http://localhost:8000/v2/models/character_generation/generate \
-H "Content-Type: application/json" \
-d '{
"INPUT_TEXT": ["harry"]
}'
```

View File

@@ -0,0 +1,23 @@
name: "character_generation"
backend: "python"
input [
{
name: "INPUT_TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "OUTPUT_TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [ 0 ]
}
]