Support v1/responses and use harmony in serving_chat (#8837)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
@@ -41,6 +41,7 @@ import tempfile
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
import warnings
|
||||
from collections import OrderedDict, defaultdict
|
||||
from contextlib import contextmanager
|
||||
@@ -233,6 +234,10 @@ def is_flashinfer_available():
|
||||
return importlib.util.find_spec("flashinfer") is not None and is_cuda()
|
||||
|
||||
|
||||
def random_uuid() -> str:
|
||||
return str(uuid.uuid4().hex)
|
||||
|
||||
|
||||
_ENABLE_TORCH_INFERENCE_MODE = get_bool_env_var(
|
||||
"SGLANG_ENABLE_TORCH_INFERENCE_MODE", "false"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user