From cd8d4b9dfcefffe254c4e354e3a18c7a644c06bd Mon Sep 17 00:00:00 2001 From: Qiaolin Yu Date: Thu, 15 May 2025 13:09:55 -0400 Subject: [PATCH] Fix lora bench (#6302) --- benchmark/lora/lora_bench.py | 2 ++ python/sglang/srt/lora/lora_manager.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/lora/lora_bench.py b/benchmark/lora/lora_bench.py index 1d9394261..199b3f3ab 100644 --- a/benchmark/lora/lora_bench.py +++ b/benchmark/lora/lora_bench.py @@ -170,6 +170,7 @@ async def benchmark( prompt_len=test_prompt_len, output_len=test_output_len, lora_name="dummy", # the lora_name argument will not be used + image_data=None, extra_request_body=extra_request_body, ) test_output = await request_func(request_func_input=test_input) @@ -194,6 +195,7 @@ async def benchmark( prompt_len=prompt_len, output_len=output_len, lora_name="dummy", + image_data=None, extra_request_body=extra_request_body, ) tasks.append( diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py index 70e6ca838..68b2a3621 100644 --- a/python/sglang/srt/lora/lora_manager.py +++ b/python/sglang/srt/lora/lora_manager.py @@ -170,9 +170,7 @@ class LoRAManager: dim=0, out=self.cuda_graph_batch_info.seg_indptr[1 : bs + 1], ) - self.cuda_graph_batch_info.max_len = int( - torch.max(self.cuda_graph_batch_info.seg_lens[:bs]) - ) + self.cuda_graph_batch_info.max_len = 1 for i, lora_path in enumerate(forward_batch.lora_paths): self.cuda_graph_batch_info.weight_indices[i] = (