Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)
This commit is contained in:
@@ -38,7 +38,6 @@ suites = {
|
||||
TestFile("openai_server/basic/test_serving_embedding.py", 10),
|
||||
TestFile("openai_server/basic/test_openai_embedding.py", 141),
|
||||
TestFile("openai_server/basic/test_openai_server.py", 149),
|
||||
TestFile("openai_server/features/test_cache_report.py", 100),
|
||||
TestFile("openai_server/features/test_enable_thinking.py", 70),
|
||||
TestFile("openai_server/features/test_json_constrained.py", 98),
|
||||
TestFile("openai_server/features/test_json_mode.py", 90),
|
||||
@@ -103,7 +102,6 @@ suites = {
|
||||
TestFile("test_update_weights_from_disk.py", 114),
|
||||
TestFile("test_update_weights_from_tensor.py", 48),
|
||||
TestFile("test_utils_update_weights.py", 48),
|
||||
TestFile("test_vertex_endpoint.py", 31),
|
||||
TestFile("test_vision_chunked_prefill.py", 175),
|
||||
TestFile("test_vlm_input_format.py", 300),
|
||||
TestFile("test_vision_openai_server_a.py", 584),
|
||||
@@ -167,7 +165,6 @@ suites = {
|
||||
TestFile("models/lora/test_lora_tp.py", 116),
|
||||
TestFile("test_data_parallelism.py", 73),
|
||||
TestFile("test_dp_attention.py", 277),
|
||||
TestFile("test_mla_tp.py", 170),
|
||||
TestFile("test_patch_torch.py", 19),
|
||||
TestFile("test_update_weights_from_distributed.py", 103),
|
||||
TestFile("test_release_memory_occupation.py", 127),
|
||||
@@ -175,7 +172,6 @@ suites = {
|
||||
"per-commit-2-gpu-amd": [
|
||||
TestFile("models/lora/test_lora_tp.py", 116),
|
||||
TestFile("test_data_parallelism.py", 73),
|
||||
TestFile("test_mla_tp.py", 170),
|
||||
TestFile("test_patch_torch.py", 19),
|
||||
TestFile("test_update_weights_from_distributed.py", 103),
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user