Refine OpenAI serving entrypoint to remove batch requests (#7372)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
Co-authored-by: Chang Su <csu272@usc.edu>
This commit is contained in:
Xinyuan Tong
2025-06-20 14:33:43 -07:00
committed by GitHub
parent 794be55af2
commit 0998808009
8 changed files with 488 additions and 645 deletions

View File

@@ -104,52 +104,50 @@ class ServingChatTestCase(unittest.TestCase):
None,
)
adapted, processed = self.chat._convert_to_internal_request(
[self.basic_req], ["rid"]
)
adapted, processed = self.chat._convert_to_internal_request(self.basic_req)
self.assertIsInstance(adapted, GenerateReqInput)
self.assertFalse(adapted.stream)
self.assertEqual(processed, self.basic_req)
# ------------- tool-call branch -------------
def test_tool_call_request_conversion(self):
req = ChatCompletionRequest(
model="x",
messages=[{"role": "user", "content": "Weather?"}],
tools=[
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {"type": "object", "properties": {}},
},
}
],
tool_choice="auto",
)
# # ------------- tool-call branch -------------
# def test_tool_call_request_conversion(self):
# req = ChatCompletionRequest(
# model="x",
# messages=[{"role": "user", "content": "Weather?"}],
# tools=[
# {
# "type": "function",
# "function": {
# "name": "get_weather",
# "parameters": {"type": "object", "properties": {}},
# },
# }
# ],
# tool_choice="auto",
# )
with patch.object(
self.chat,
"_process_messages",
return_value=("Prompt", [1, 2, 3], None, None, [], ["</s>"], None),
):
adapted, _ = self.chat._convert_to_internal_request([req], ["rid"])
self.assertEqual(adapted.rid, "rid")
# with patch.object(
# self.chat,
# "_process_messages",
# return_value=("Prompt", [1, 2, 3], None, None, [], ["</s>"], None),
# ):
# adapted, _ = self.chat._convert_to_internal_request(req, "rid")
# self.assertEqual(adapted.rid, "rid")
def test_tool_choice_none(self):
req = ChatCompletionRequest(
model="x",
messages=[{"role": "user", "content": "Hi"}],
tools=[{"type": "function", "function": {"name": "noop"}}],
tool_choice="none",
)
with patch.object(
self.chat,
"_process_messages",
return_value=("Prompt", [1, 2, 3], None, None, [], ["</s>"], None),
):
adapted, _ = self.chat._convert_to_internal_request([req], ["rid"])
self.assertEqual(adapted.rid, "rid")
# def test_tool_choice_none(self):
# req = ChatCompletionRequest(
# model="x",
# messages=[{"role": "user", "content": "Hi"}],
# tools=[{"type": "function", "function": {"name": "noop"}}],
# tool_choice="none",
# )
# with patch.object(
# self.chat,
# "_process_messages",
# return_value=("Prompt", [1, 2, 3], None, None, [], ["</s>"], None),
# ):
# adapted, _ = self.chat._convert_to_internal_request(req, "rid")
# self.assertEqual(adapted.rid, "rid")
# ------------- multimodal branch -------------
def test_multimodal_request_with_images(self):