chore: improve mmmu benchmark (#7000)

Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
Mick
2025-07-26 16:19:45 +08:00
committed by GitHub
parent e6312d271d
commit 4fa44d63c6
2 changed files with 23 additions and 13 deletions

View File

@@ -125,7 +125,6 @@ async def eval_mmmu(args) -> None:
client = openai.AsyncOpenAI(
api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1"
)
semaphore = asyncio.Semaphore(args.concurrency)
start = time.perf_counter()
base_url = f"http://127.0.0.1:{args.port}"
@@ -139,16 +138,26 @@ async def eval_mmmu(args) -> None:
samples = samples[: args.profile_number]
tasks = [
process_sample_with_semaphore(
semaphore, client, sample, sampling_params, lora_path
)
for sample in samples
]
if args.concurrency == 1:
# For concurrency == 1, run in sequential mode to ensure consistent order
# this is mainly for profiling
for sample in tqdm(samples):
_, response = await process_sample(
client, sample, sampling_params, lora_path
)
process_result(response, sample, answer_dict, out_samples)
else:
semaphore = asyncio.Semaphore(args.concurrency)
tasks = [
process_sample_with_semaphore(
semaphore, client, sample, sampling_params, lora_path
)
for sample in samples
]
for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
sample, response = await coro
process_result(response, sample, answer_dict, out_samples)
for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
sample, response = await coro
process_result(response, sample, answer_dict, out_samples)
if args.profile:
print("Stopping profiler...")