Move mem_fraction_static adjustment for multimodal models to server_args.py & Fix session control & Other cleanups (#7748)

This commit is contained in:
Lianmin Zheng
2025-07-04 16:33:33 -07:00
committed by GitHub
parent 975a5ec69c
commit 14229ccf8f
16 changed files with 339 additions and 137 deletions

View File

@@ -11,12 +11,14 @@ class TestPrepareServerArgs(CustomTestCase):
server_args = prepare_server_args(
[
"--model-path",
"model_path",
"meta-llama/Meta-Llama-3.1-8B-Instruct",
"--json-model-override-args",
'{"rope_scaling": {"factor": 2.0, "rope_type": "linear"}}',
]
)
self.assertEqual(server_args.model_path, "model_path")
self.assertEqual(
server_args.model_path, "meta-llama/Meta-Llama-3.1-8B-Instruct"
)
self.assertEqual(
json.loads(server_args.json_model_override_args),
{"rope_scaling": {"factor": 2.0, "rope_type": "linear"}},