refactor: bug fixes and refactor for vlm (#4661)

2025-03-23 13:48:49 +08:00
parent ca75741e86
commit 11577cedb7
31 changed files with 770 additions and 735 deletions
--- a/benchmark/mmmu/bench_sglang.py
+++ b/benchmark/mmmu/bench_sglang.py
@@ -1,13 +1,14 @@
 """
-    Bench the sglang-hosted vLM with benchmark MMMU
+Bench the sglang-hosted vLM with benchmark MMMU

-    Usage:
-        python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
+Usage:
+    python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl

-    The eval output will be logged
+The eval output will be logged
 """

 import argparse
+import time

 import openai
 from data_utils import save_json
@@ -37,6 +38,7 @@ def eval_mmmu(args):
    # had to use an openai server, since SglImage doesn't support image data
    client = openai.Client(api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1")

+    start = time.time()
    for i, sample in enumerate(tqdm(samples)):
        prompt = sample["final_input_prompt"]
        prefix = prompt.split("<")[0]
@@ -73,6 +75,8 @@ def eval_mmmu(args):
        response = response.choices[0].message.content
        process_result(response, sample, answer_dict, out_samples)

+    print(f"Benchmark time: {time.time() - start}")
+
    args.output_path = f"./val_sglang.json"
    save_json(args.output_path, out_samples)
    eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)