Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

2025-08-08 19:56:50 -07:00
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,6 +21,7 @@ runtime_common = [
    "build",
    "compressed-tensors",
    "datasets",
+    "einops",
    "fastapi",
    "hf_transfer",
    "huggingface_hub",
@@ -29,6 +30,7 @@ runtime_common = [
    "modelscope",
    "msgspec",
    "ninja",
+    "openai==1.99.1",
    "openai-harmony==0.0.3",
    "orjson",
    "outlines==0.1.11",
@@ -48,6 +50,7 @@ runtime_common = [
    "torchao==0.9.0",
    "transformers==4.55.0",
    "timm==1.0.16",
+    "tiktoken",
    "uvicorn",
    "uvloop",
    "xgrammar==0.1.22",
@@ -60,7 +63,6 @@ srt = [
    "torchaudio==2.8.0",
    "torchvision",
    "cuda-python",
-    "einops",
    "flashinfer_python==0.2.10",
 ]

@@ -71,10 +73,7 @@ blackwell = [
    "torchaudio==2.8.0",
    "torchvision",
    "cuda-python",
-    "einops",
    "flashinfer_python==0.2.10",
-    "tiktoken",
-    "openai==1.99.1",
 ]

 # HIP (Heterogeneous-computing Interface for Portability) for AMD
@@ -101,7 +100,7 @@ srt_npu = ["sglang[runtime_common]"]
 openai = ["openai==1.99.1", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]
-torch_memory_saver = ["torch_memory_saver>=0.0.8"]
+torch_memory_saver = ["torch_memory_saver==0.0.8"]
 decord = ["decord"]
 test = [
    "accelerate",