Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

This commit is contained in:
Lianmin Zheng
2025-08-08 19:56:50 -07:00
committed by GitHub
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions

View File

@@ -38,7 +38,6 @@ suites = {
TestFile("openai_server/basic/test_serving_embedding.py", 10),
TestFile("openai_server/basic/test_openai_embedding.py", 141),
TestFile("openai_server/basic/test_openai_server.py", 149),
TestFile("openai_server/features/test_cache_report.py", 100),
TestFile("openai_server/features/test_enable_thinking.py", 70),
TestFile("openai_server/features/test_json_constrained.py", 98),
TestFile("openai_server/features/test_json_mode.py", 90),
@@ -103,7 +102,6 @@ suites = {
TestFile("test_update_weights_from_disk.py", 114),
TestFile("test_update_weights_from_tensor.py", 48),
TestFile("test_utils_update_weights.py", 48),
TestFile("test_vertex_endpoint.py", 31),
TestFile("test_vision_chunked_prefill.py", 175),
TestFile("test_vlm_input_format.py", 300),
TestFile("test_vision_openai_server_a.py", 584),
@@ -167,7 +165,6 @@ suites = {
TestFile("models/lora/test_lora_tp.py", 116),
TestFile("test_data_parallelism.py", 73),
TestFile("test_dp_attention.py", 277),
TestFile("test_mla_tp.py", 170),
TestFile("test_patch_torch.py", 19),
TestFile("test_update_weights_from_distributed.py", 103),
TestFile("test_release_memory_occupation.py", 127),
@@ -175,7 +172,6 @@ suites = {
"per-commit-2-gpu-amd": [
TestFile("models/lora/test_lora_tp.py", 116),
TestFile("test_data_parallelism.py", 73),
TestFile("test_mla_tp.py", 170),
TestFile("test_patch_torch.py", 19),
TestFile("test_update_weights_from_distributed.py", 103),
],