refactor: move image processors to separate files (#4229)

This commit is contained in:
Mick
2025-03-12 03:35:35 +08:00
committed by GitHub
parent 0f2a2e3c19
commit ff2ce0b86f
22 changed files with 1085 additions and 955 deletions

View File

@@ -11,11 +11,16 @@ import argparse
import random
import torch
from bench_sglang import EvalArgs, prepare_samples
from data_utils import save_json
from eval_utils import eval_result, get_sampling_params, parse_multi_choice_response
from eval_utils import (
EvalArgs,
eval_result,
get_sampling_params,
prepare_samples,
process_result,
)
from tqdm import tqdm
from transformers import AutoModelForImageTextToText, AutoProcessor
from transformers import AutoModelForImageTextToText, AutoProcessor, GenerationConfig
@torch.no_grad()
@@ -28,7 +33,6 @@ def eval_mmmu(args):
trust_remote_code=True,
)
model = model.eval().cuda()
model = torch.compile(model)
processor = AutoProcessor.from_pretrained(
args.model_path, torch_dtype="auto", device_map="auto"
@@ -38,6 +42,10 @@ def eval_mmmu(args):
out_samples = dict()
sampling_params = get_sampling_params(eval_args)
generation_config = GenerationConfig(
max_new_tokens=sampling_params["max_new_tokens"],
do_sample=False,
)
answer_dict = {}
for sample in tqdm(samples):
@@ -62,7 +70,6 @@ def eval_mmmu(args):
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = processor(
text=[text],
images=[image],
@@ -70,13 +77,16 @@ def eval_mmmu(args):
return_tensors="pt",
).to(model.device)
generated_ids = model.generate(**inputs, **sampling_params)
generated_ids = model.generate(
**inputs, generation_config=generation_config
)
response = processor.decode(
generated_ids[0],
skip_special_tokens=True,
clean_up_tokenization_spaces=False,
)[len(text) :]
print(f"response: {response}")
else: # multiple images actually
if sample["question_type"] == "multiple-choice":
all_choices = sample["all_choices"]
@@ -85,24 +95,11 @@ def eval_mmmu(args):
else:
response = "INVALID GENERATION FOR MULTIPLE IMAGE INPUTS"
if sample["question_type"] == "multiple-choice":
pred_ans = parse_multi_choice_response(
response, sample["all_choices"], sample["index2ans"]
)
else: # open question
pred_ans = response
out_samples[sample["id"]] = pred_ans
torch.cuda.empty_cache()
# set ground truth answer
answer_dict[sample["id"]] = {
"question_type": sample["question_type"],
"ground_truth": sample["answer"],
}
process_result(response, sample, answer_dict, out_samples)
args.output_path = f"{args.model_path}_val_hf.json"
save_json(args.output_path, out_samples)
eval_result(output_path=args.output_path, answer_dict=answer_dict)
eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
if __name__ == "__main__":