[GLM4.1V and GLM4.5V] Add vision transformer num_dummy_head support: max tp=4 -> max tp=8 (#9059)

2025-08-18 14:40:13 -07:00
parent 98b44e9e56
commit c2fbf60f39
9 changed files with 150 additions and 102 deletions
--- a/benchmark/mmmu/bench_sglang.py
+++ b/benchmark/mmmu/bench_sglang.py
@@ -187,9 +187,13 @@ async def eval_mmmu(args) -> None:
            print("Profiler stopped")

    print(f"Benchmark time: {time.perf_counter() - start}")
-    args.output_path = f"./val_sglang.json"
+    args.output_path = "./answer_sglang.json"
    save_json(args.output_path, out_samples)
-    eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
+    eval_result(
+        model_answer_path=args.output_path,
+        answer_dict=answer_dict,
+        eval_output_path="./val_sglang.json",
+    )


 def parse_args():