From eb4308c4c9e3efbd58e86ec63e4f88dc36c363a8 Mon Sep 17 00:00:00 2001 From: Arsalan <41029759+amirarsalan90@users.noreply.github.com> Date: Tue, 12 Mar 2024 03:16:06 -0400 Subject: [PATCH] adding the triton docker build minimal example (#242) --- examples/usage/triton/Dockerfile | 10 +++++ examples/usage/triton/README.md | 41 +++++++++++++++++ .../models/character_generation/1/model.py | 45 +++++++++++++++++++ .../models/character_generation/config.pbtxt | 23 ++++++++++ 4 files changed, 119 insertions(+) create mode 100644 examples/usage/triton/Dockerfile create mode 100644 examples/usage/triton/README.md create mode 100644 examples/usage/triton/models/character_generation/1/model.py create mode 100644 examples/usage/triton/models/character_generation/config.pbtxt diff --git a/examples/usage/triton/Dockerfile b/examples/usage/triton/Dockerfile new file mode 100644 index 000000000..d97342e26 --- /dev/null +++ b/examples/usage/triton/Dockerfile @@ -0,0 +1,10 @@ +FROM nvcr.io/nvidia/tritonserver:24.01-py3 + +WORKDIR /opt + +RUN git clone https://github.com/sgl-project/sglang.git + +WORKDIR /opt/sglang +RUN pip install --upgrade pip && \ + pip install -e "python[all]" && \ + pip install datasets \ No newline at end of file diff --git a/examples/usage/triton/README.md b/examples/usage/triton/README.md new file mode 100644 index 000000000..48a9c8354 --- /dev/null +++ b/examples/usage/triton/README.md @@ -0,0 +1,41 @@ +# sglang_triton + +Build the docker image: +``` +docker build -t sglang-triton . +``` + +Then do: +``` +docker run -ti --gpus=all --network=host --name sglang-triton -v ./models:/mnt/models sglang-triton +``` + +inside the docker container: +``` +cd sglang +python3 -m sglang.launch_server --model-path mistralai/Mistral-7B-Instruct-v0.2 --port 30000 --mem-fraction-static 0.9 +``` + +with another shell, inside the docker container: +``` +docker exec -ti sglang-triton /bin/bash +cd /mnt +tritonserver --model-repository=/mnt/models +``` + + +Send request to the server: +``` +curl -X POST http://localhost:8000/v2/models/character_generation/generate \ + -H "Content-Type: application/json" \ + -d '{ + "inputs": [ + { + "name": "INPUT_TEXT", + "datatype": "STRING", + "shape": [1], + "data": ["Name1"] + } + ] + }' +``` \ No newline at end of file diff --git a/examples/usage/triton/models/character_generation/1/model.py b/examples/usage/triton/models/character_generation/1/model.py new file mode 100644 index 000000000..e76992f95 --- /dev/null +++ b/examples/usage/triton/models/character_generation/1/model.py @@ -0,0 +1,45 @@ +import triton_python_backend_utils as pb_utils +import numpy +import sglang as sgl +from sglang import function, set_default_backend +from sglang.srt.constrained import build_regex_from_object + +from pydantic import BaseModel + +sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000")) + +class Character(BaseModel): + name: str + eye_color: str + house: str + +@function +def character_gen(s, name): + s += ( + name + + " is a character in Harry Potter. Please fill in the following information about this character.\n" + ) + s += sgl.gen("json_output", max_tokens=256, regex=build_regex_from_object(Character)) + + +class TritonPythonModel: + def initialize(self, args): + print("Initialized.") + def execute(self, requests): + responses = [] + for request in requests: + tensor_in = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT") + if tensor_in is None: + return pb_utils.InferenceResponse(output_tensors=[]) + + input_list_names = [i.decode('utf-8') if isinstance(i, bytes) else i for i in tensor_in.as_numpy().tolist()] + + input_list_dicts = [{"name":i} for i in input_list_names] + + states = character_gen.run_batch(input_list_dicts) + character_strs = [state.text() for state in states] + + tensor_out = pb_utils.Tensor("OUTPUT_TEXT", numpy.array(character_strs, dtype=object)) + + responses.append(pb_utils.InferenceResponse(output_tensors = [tensor_out])) + return responses \ No newline at end of file diff --git a/examples/usage/triton/models/character_generation/config.pbtxt b/examples/usage/triton/models/character_generation/config.pbtxt new file mode 100644 index 000000000..7546f993a --- /dev/null +++ b/examples/usage/triton/models/character_generation/config.pbtxt @@ -0,0 +1,23 @@ +name: "character_generation" +backend: "python" +input [ + { + name: "INPUT_TEXT" + data_type: TYPE_STRING + dims: [ -1 ] + } +] +output [ + { + name: "OUTPUT_TEXT" + data_type: TYPE_STRING + dims: [ -1 ] + } +] +instance_group [ + { + count: 1 + kind: KIND_GPU + gpus: [ 0 ] + } +]