Fix CI test OOM issue. (#7799)
This commit is contained in:
@@ -38,6 +38,8 @@ PROMPTS = [
|
|||||||
"What are the main components of a computer?",
|
"What are the main components of a computer?",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
MEM_FRACTION_STATIC = 0.8
|
||||||
|
|
||||||
|
|
||||||
class OperationType(Enum):
|
class OperationType(Enum):
|
||||||
LOAD = "load"
|
LOAD = "load"
|
||||||
@@ -339,6 +341,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
|
|||||||
lora_paths=self.lora_paths,
|
lora_paths=self.lora_paths,
|
||||||
lora_backend=self.lora_backend,
|
lora_backend=self.lora_backend,
|
||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
|
mem_fraction_static=MEM_FRACTION_STATIC,
|
||||||
max_loras_per_batch=self.max_loras_per_batch,
|
max_loras_per_batch=self.max_loras_per_batch,
|
||||||
disable_cuda_graph=self.disable_cuda_graph,
|
disable_cuda_graph=self.disable_cuda_graph,
|
||||||
cuda_graph_max_bs=self.cuda_graph_max_bs,
|
cuda_graph_max_bs=self.cuda_graph_max_bs,
|
||||||
@@ -440,6 +443,8 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
|
|||||||
"42",
|
"42",
|
||||||
"--max-running-request",
|
"--max-running-request",
|
||||||
"1",
|
"1",
|
||||||
|
"--mem-fraction-static",
|
||||||
|
str(MEM_FRACTION_STATIC),
|
||||||
]
|
]
|
||||||
if self.disable_cuda_graph:
|
if self.disable_cuda_graph:
|
||||||
other_args.append("--disable-cuda-graph")
|
other_args.append("--disable-cuda-graph")
|
||||||
|
|||||||
Reference in New Issue
Block a user