refactor: bug fixes and refactor for vlm (#4661)
This commit is contained in:
@@ -1,13 +1,14 @@
|
||||
"""
|
||||
Bench the sglang-hosted vLM with benchmark MMMU
|
||||
Bench the sglang-hosted vLM with benchmark MMMU
|
||||
|
||||
Usage:
|
||||
python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
|
||||
Usage:
|
||||
python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
|
||||
|
||||
The eval output will be logged
|
||||
The eval output will be logged
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import time
|
||||
|
||||
import openai
|
||||
from data_utils import save_json
|
||||
@@ -37,6 +38,7 @@ def eval_mmmu(args):
|
||||
# had to use an openai server, since SglImage doesn't support image data
|
||||
client = openai.Client(api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1")
|
||||
|
||||
start = time.time()
|
||||
for i, sample in enumerate(tqdm(samples)):
|
||||
prompt = sample["final_input_prompt"]
|
||||
prefix = prompt.split("<")[0]
|
||||
@@ -73,6 +75,8 @@ def eval_mmmu(args):
|
||||
response = response.choices[0].message.content
|
||||
process_result(response, sample, answer_dict, out_samples)
|
||||
|
||||
print(f"Benchmark time: {time.time() - start}")
|
||||
|
||||
args.output_path = f"./val_sglang.json"
|
||||
save_json(args.output_path, out_samples)
|
||||
eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
|
||||
|
||||
Reference in New Issue
Block a user