adapt to sglang v0.5.2rc1 on dcu
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
import numpy
|
||||
import triton_python_backend_utils as pb_utils
|
||||
from pydantic import BaseModel
|
||||
|
||||
import sglang as sgl
|
||||
from sglang import function
|
||||
from sglang.srt.constrained.outlines_backend import build_regex_from_object
|
||||
|
||||
sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
|
||||
|
||||
|
||||
class Character(BaseModel):
|
||||
name: str
|
||||
eye_color: str
|
||||
house: str
|
||||
|
||||
|
||||
@function
|
||||
def character_gen(s, name):
|
||||
s += (
|
||||
name
|
||||
+ " is a character in Harry Potter. Please fill in the following information about this character.\n"
|
||||
)
|
||||
s += sgl.gen(
|
||||
"json_output", max_tokens=256, regex=build_regex_from_object(Character)
|
||||
)
|
||||
|
||||
|
||||
class TritonPythonModel:
|
||||
def initialize(self, args):
|
||||
print("Initialized.")
|
||||
|
||||
def execute(self, requests):
|
||||
responses = []
|
||||
for request in requests:
|
||||
tensor_in = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT")
|
||||
if tensor_in is None:
|
||||
return pb_utils.InferenceResponse(output_tensors=[])
|
||||
|
||||
input_list_names = [
|
||||
i.decode("utf-8") if isinstance(i, bytes) else i
|
||||
for i in tensor_in.as_numpy().tolist()
|
||||
]
|
||||
|
||||
input_list_dicts = [{"name": i} for i in input_list_names]
|
||||
|
||||
states = character_gen.run_batch(input_list_dicts)
|
||||
character_strs = [state.text() for state in states]
|
||||
|
||||
tensor_out = pb_utils.Tensor(
|
||||
"OUTPUT_TEXT", numpy.array(character_strs, dtype=object)
|
||||
)
|
||||
|
||||
responses.append(pb_utils.InferenceResponse(output_tensors=[tensor_out]))
|
||||
return responses
|
||||
@@ -0,0 +1,23 @@
|
||||
name: "character_generation"
|
||||
backend: "python"
|
||||
input [
|
||||
{
|
||||
name: "INPUT_TEXT"
|
||||
data_type: TYPE_STRING
|
||||
dims: [ -1 ]
|
||||
}
|
||||
]
|
||||
output [
|
||||
{
|
||||
name: "OUTPUT_TEXT"
|
||||
data_type: TYPE_STRING
|
||||
dims: [ -1 ]
|
||||
}
|
||||
]
|
||||
instance_group [
|
||||
{
|
||||
count: 1
|
||||
kind: KIND_GPU
|
||||
gpus: [ 0 ]
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user