Fix mem fraction static for nightly tests (#11076)
This commit is contained in:
@@ -216,7 +216,7 @@ def _run_sglang_subprocess(
|
||||
del hf_model
|
||||
hf_model = None
|
||||
torch.cuda.empty_cache()
|
||||
time.sleep(5)
|
||||
time.sleep(3)
|
||||
torch.cuda.empty_cache()
|
||||
_curr_usage = get_gpu_memory_gb(rank)
|
||||
assert (
|
||||
|
||||
@@ -63,10 +63,15 @@ class TestNightlyGsm8KEval(unittest.TestCase):
|
||||
for model in model_group:
|
||||
model_count += 1
|
||||
with self.subTest(model=model):
|
||||
other_args = ["--tp", "2"] if is_tp2 else []
|
||||
|
||||
if model == "meta-llama/Llama-3.1-70B-Instruct":
|
||||
other_args.extend(["--mem-fraction-static", "0.9"])
|
||||
|
||||
process = popen_launch_server(
|
||||
model=model,
|
||||
other_args=other_args,
|
||||
base_url=self.base_url,
|
||||
other_args=["--tp", "2"] if is_tp2 else [],
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user