Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

This commit is contained in:
Lianmin Zheng
2025-08-08 19:56:50 -07:00
committed by GitHub
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions

View File

@@ -21,6 +21,7 @@ runtime_common = [
"build",
"compressed-tensors",
"datasets",
"einops",
"fastapi",
"hf_transfer",
"huggingface_hub",
@@ -29,6 +30,7 @@ runtime_common = [
"modelscope",
"msgspec",
"ninja",
"openai==1.99.1",
"openai-harmony==0.0.3",
"orjson",
"outlines==0.1.11",
@@ -48,6 +50,7 @@ runtime_common = [
"torchao==0.9.0",
"transformers==4.55.0",
"timm==1.0.16",
"tiktoken",
"uvicorn",
"uvloop",
"xgrammar==0.1.22",
@@ -60,7 +63,6 @@ srt = [
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
"einops",
"flashinfer_python==0.2.10",
]
@@ -71,10 +73,7 @@ blackwell = [
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
"einops",
"flashinfer_python==0.2.10",
"tiktoken",
"openai==1.99.1",
]
# HIP (Heterogeneous-computing Interface for Portability) for AMD
@@ -101,7 +100,7 @@ srt_npu = ["sglang[runtime_common]"]
openai = ["openai==1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
torch_memory_saver = ["torch_memory_saver>=0.0.8"]
torch_memory_saver = ["torch_memory_saver==0.0.8"]
decord = ["decord"]
test = [
"accelerate",