Fix CI test OOM issue. (#7799)

This commit is contained in:
Lifu Huang
2025-07-05 15:11:02 -07:00
committed by GitHub
parent 199d621845
commit 01f9873048

View File

@@ -38,6 +38,8 @@ PROMPTS = [
"What are the main components of a computer?", "What are the main components of a computer?",
] ]
MEM_FRACTION_STATIC = 0.8
class OperationType(Enum): class OperationType(Enum):
LOAD = "load" LOAD = "load"
@@ -339,6 +341,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
lora_paths=self.lora_paths, lora_paths=self.lora_paths,
lora_backend=self.lora_backend, lora_backend=self.lora_backend,
torch_dtype=torch.float16, torch_dtype=torch.float16,
mem_fraction_static=MEM_FRACTION_STATIC,
max_loras_per_batch=self.max_loras_per_batch, max_loras_per_batch=self.max_loras_per_batch,
disable_cuda_graph=self.disable_cuda_graph, disable_cuda_graph=self.disable_cuda_graph,
cuda_graph_max_bs=self.cuda_graph_max_bs, cuda_graph_max_bs=self.cuda_graph_max_bs,
@@ -440,6 +443,8 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
"42", "42",
"--max-running-request", "--max-running-request",
"1", "1",
"--mem-fraction-static",
str(MEM_FRACTION_STATIC),
] ]
if self.disable_cuda_graph: if self.disable_cuda_graph:
other_args.append("--disable-cuda-graph") other_args.append("--disable-cuda-graph")