adding the triton docker build minimal example (#242)
This commit is contained in:
10
examples/usage/triton/Dockerfile
Normal file
10
examples/usage/triton/Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
FROM nvcr.io/nvidia/tritonserver:24.01-py3
|
||||||
|
|
||||||
|
WORKDIR /opt
|
||||||
|
|
||||||
|
RUN git clone https://github.com/sgl-project/sglang.git
|
||||||
|
|
||||||
|
WORKDIR /opt/sglang
|
||||||
|
RUN pip install --upgrade pip && \
|
||||||
|
pip install -e "python[all]" && \
|
||||||
|
pip install datasets
|
||||||
41
examples/usage/triton/README.md
Normal file
41
examples/usage/triton/README.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# sglang_triton
|
||||||
|
|
||||||
|
Build the docker image:
|
||||||
|
```
|
||||||
|
docker build -t sglang-triton .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then do:
|
||||||
|
```
|
||||||
|
docker run -ti --gpus=all --network=host --name sglang-triton -v ./models:/mnt/models sglang-triton
|
||||||
|
```
|
||||||
|
|
||||||
|
inside the docker container:
|
||||||
|
```
|
||||||
|
cd sglang
|
||||||
|
python3 -m sglang.launch_server --model-path mistralai/Mistral-7B-Instruct-v0.2 --port 30000 --mem-fraction-static 0.9
|
||||||
|
```
|
||||||
|
|
||||||
|
with another shell, inside the docker container:
|
||||||
|
```
|
||||||
|
docker exec -ti sglang-triton /bin/bash
|
||||||
|
cd /mnt
|
||||||
|
tritonserver --model-repository=/mnt/models
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Send request to the server:
|
||||||
|
```
|
||||||
|
curl -X POST http://localhost:8000/v2/models/character_generation/generate \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "INPUT_TEXT",
|
||||||
|
"datatype": "STRING",
|
||||||
|
"shape": [1],
|
||||||
|
"data": ["Name1"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
45
examples/usage/triton/models/character_generation/1/model.py
Normal file
45
examples/usage/triton/models/character_generation/1/model.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import triton_python_backend_utils as pb_utils
|
||||||
|
import numpy
|
||||||
|
import sglang as sgl
|
||||||
|
from sglang import function, set_default_backend
|
||||||
|
from sglang.srt.constrained import build_regex_from_object
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
|
||||||
|
|
||||||
|
class Character(BaseModel):
|
||||||
|
name: str
|
||||||
|
eye_color: str
|
||||||
|
house: str
|
||||||
|
|
||||||
|
@function
|
||||||
|
def character_gen(s, name):
|
||||||
|
s += (
|
||||||
|
name
|
||||||
|
+ " is a character in Harry Potter. Please fill in the following information about this character.\n"
|
||||||
|
)
|
||||||
|
s += sgl.gen("json_output", max_tokens=256, regex=build_regex_from_object(Character))
|
||||||
|
|
||||||
|
|
||||||
|
class TritonPythonModel:
|
||||||
|
def initialize(self, args):
|
||||||
|
print("Initialized.")
|
||||||
|
def execute(self, requests):
|
||||||
|
responses = []
|
||||||
|
for request in requests:
|
||||||
|
tensor_in = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT")
|
||||||
|
if tensor_in is None:
|
||||||
|
return pb_utils.InferenceResponse(output_tensors=[])
|
||||||
|
|
||||||
|
input_list_names = [i.decode('utf-8') if isinstance(i, bytes) else i for i in tensor_in.as_numpy().tolist()]
|
||||||
|
|
||||||
|
input_list_dicts = [{"name":i} for i in input_list_names]
|
||||||
|
|
||||||
|
states = character_gen.run_batch(input_list_dicts)
|
||||||
|
character_strs = [state.text() for state in states]
|
||||||
|
|
||||||
|
tensor_out = pb_utils.Tensor("OUTPUT_TEXT", numpy.array(character_strs, dtype=object))
|
||||||
|
|
||||||
|
responses.append(pb_utils.InferenceResponse(output_tensors = [tensor_out]))
|
||||||
|
return responses
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
name: "character_generation"
|
||||||
|
backend: "python"
|
||||||
|
input [
|
||||||
|
{
|
||||||
|
name: "INPUT_TEXT"
|
||||||
|
data_type: TYPE_STRING
|
||||||
|
dims: [ -1 ]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
output [
|
||||||
|
{
|
||||||
|
name: "OUTPUT_TEXT"
|
||||||
|
data_type: TYPE_STRING
|
||||||
|
dims: [ -1 ]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
instance_group [
|
||||||
|
{
|
||||||
|
count: 1
|
||||||
|
kind: KIND_GPU
|
||||||
|
gpus: [ 0 ]
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user