Improve structured outputs: fix race condition, server crash, metrics and style (#6188)
This commit is contained in:
@@ -82,7 +82,7 @@ class TestJSONConstrainedOutlinesBackend(CustomTestCase):
|
||||
print(json.dumps(ret))
|
||||
print("=" * 100)
|
||||
|
||||
if not json_schema:
|
||||
if not json_schema or json_schema == "INVALID":
|
||||
return
|
||||
|
||||
# Make sure the json output is valid
|
||||
@@ -97,6 +97,9 @@ class TestJSONConstrainedOutlinesBackend(CustomTestCase):
|
||||
def test_json_generate(self):
|
||||
self.run_decode(json_schema=self.json_schema)
|
||||
|
||||
def test_json_invalid(self):
|
||||
self.run_decode(json_schema="INVALID")
|
||||
|
||||
def test_json_openai(self):
|
||||
client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1")
|
||||
|
||||
@@ -104,7 +107,10 @@ class TestJSONConstrainedOutlinesBackend(CustomTestCase):
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful AI assistant"},
|
||||
{"role": "user", "content": "Introduce the capital of France."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Introduce the capital of France. Return in a JSON format.",
|
||||
},
|
||||
],
|
||||
temperature=0,
|
||||
max_tokens=128,
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestEnableMetrics(CustomTestCase):
|
||||
"sglang:token_usage",
|
||||
"sglang:gen_throughput",
|
||||
"sglang:num_queue_reqs",
|
||||
"sglang:num_grammar_queue_reqs",
|
||||
"sglang:cache_hit_rate",
|
||||
"sglang:spec_accept_length",
|
||||
"sglang:prompt_tokens_total",
|
||||
|
||||
Reference in New Issue
Block a user