Fix mem fraction static for nightly tests (#11076)

2025-09-29 12:57:41 -07:00
parent 4eeaff74a0
commit dda34c2f93
8 changed files with 24 additions and 22 deletions
--- a/test/srt/test_multi_instance_release_memory_occupation.py
+++ b/test/srt/test_multi_instance_release_memory_occupation.py
@@ -216,7 +216,7 @@ def _run_sglang_subprocess(
        del hf_model
        hf_model = None
        torch.cuda.empty_cache()
-        time.sleep(5)
+        time.sleep(3)
        torch.cuda.empty_cache()
        _curr_usage = get_gpu_memory_gb(rank)
        assert (
--- a/test/srt/test_nightly_text_models_gsm8k_eval.py
+++ b/test/srt/test_nightly_text_models_gsm8k_eval.py
@@ -63,10 +63,15 @@ class TestNightlyGsm8KEval(unittest.TestCase):
            for model in model_group:
                model_count += 1
                with self.subTest(model=model):
+                    other_args = ["--tp", "2"] if is_tp2 else []
+
+                    if model == "meta-llama/Llama-3.1-70B-Instruct":
+                        other_args.extend(["--mem-fraction-static", "0.9"])
+
                    process = popen_launch_server(
                        model=model,
+                        other_args=other_args,
                        base_url=self.base_url,
-                        other_args=["--tp", "2"] if is_tp2 else [],
                        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
                    )