Support v1/responses and use harmony in serving_chat (#8837)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
2025-08-06 16:20:34 -07:00
parent cbbd685a46
commit 92cc32d9fc
16 changed files with 2878 additions and 43 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -41,6 +41,7 @@ import tempfile
 import threading
 import time
 import traceback
+import uuid
 import warnings
 from collections import OrderedDict, defaultdict
 from contextlib import contextmanager
@@ -233,6 +234,10 @@ def is_flashinfer_available():
    return importlib.util.find_spec("flashinfer") is not None and is_cuda()


+def random_uuid() -> str:
+    return str(uuid.uuid4().hex)
+
+
 _ENABLE_TORCH_INFERENCE_MODE = get_bool_env_var(
    "SGLANG_ENABLE_TORCH_INFERENCE_MODE", "false"
 )