Support v1/responses and use harmony in serving_chat (#8837)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
Chang Su
2025-08-06 16:20:34 -07:00
committed by GitHub
parent cbbd685a46
commit 92cc32d9fc
16 changed files with 2878 additions and 43 deletions

View File

@@ -29,6 +29,7 @@ runtime_common = [
"modelscope",
"msgspec",
"ninja",
"openai-harmony==0.0.3",
"orjson",
"outlines==0.1.11",
"packaging",
@@ -96,7 +97,7 @@ srt_cpu = ["sglang[runtime_common]", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"]
openai = ["openai>=1.0", "tiktoken"]
openai = ["openai>=1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
torch_memory_saver = ["torch_memory_saver>=0.0.8"]