refactor: bug fixes and refactor for vlm (#4661)

This commit is contained in:
Mick
2025-03-23 13:48:49 +08:00
committed by GitHub
parent ca75741e86
commit 11577cedb7
31 changed files with 770 additions and 735 deletions

View File

@@ -1,13 +1,14 @@
"""
Bench the sglang-hosted vLM with benchmark MMMU
Bench the sglang-hosted vLM with benchmark MMMU
Usage:
python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
Usage:
python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
The eval output will be logged
The eval output will be logged
"""
import argparse
import time
import openai
from data_utils import save_json
@@ -37,6 +38,7 @@ def eval_mmmu(args):
# had to use an openai server, since SglImage doesn't support image data
client = openai.Client(api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1")
start = time.time()
for i, sample in enumerate(tqdm(samples)):
prompt = sample["final_input_prompt"]
prefix = prompt.split("<")[0]
@@ -73,6 +75,8 @@ def eval_mmmu(args):
response = response.choices[0].message.content
process_result(response, sample, answer_dict, out_samples)
print(f"Benchmark time: {time.time() - start}")
args.output_path = f"./val_sglang.json"
save_json(args.output_path, out_samples)
eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)