Support v1/responses and use harmony in serving_chat (#8837)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
Chang Su
2025-08-06 16:20:34 -07:00
committed by GitHub
parent cbbd685a46
commit 92cc32d9fc
16 changed files with 2878 additions and 43 deletions

View File

@@ -274,6 +274,9 @@ class ServerArgs:
enable_pdmux: bool = False
sm_group_num: int = 3
# For tool server
tool_server: Optional[str] = None
# Deprecated arguments
enable_ep_moe: bool = False
enable_deepep_moe: bool = False
@@ -1916,6 +1919,14 @@ class ServerArgs:
help="Disable mmap while loading weight using safetensors.",
)
# For tool server
parser.add_argument(
"--tool-server",
type=str,
default=None,
help="Either 'demo' or a comma-separated list of tool server urls to use for the model. If not specified, no tool server will be used.",
)
# Deprecated arguments
parser.add_argument(
"--enable-ep-moe",