Support glm4.1v and glm4.5v (#8798)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: zRzRzRzRzRzRzR <2448370773@qq.com> Co-authored-by: Minglei Zhu <mingleizhu1122@gmail.com> Co-authored-by: Chang Su <csu272@usc.edu>
This commit is contained in:
@@ -27,6 +27,18 @@ python -m sglang.launch_server --model-path microsoft/Phi-4-multimodal-instruct
|
||||
python -m benchmark/mmmu/bench_sglang.py --concurrency 8 --lora-path vision
|
||||
```
|
||||
|
||||
You can use `--response-answer-regex` to specify how to extract the answer from the response string. E.g.,
|
||||
```
|
||||
python3 -m sglang.launch_server --model-path zai-org/GLM-4.1V-9B-Thinking --reasoning-parser glm45
|
||||
|
||||
python3 bench_sglang.py --response-answer-regex "<\|begin_of_box\|>(.*)<\|end_of_box\|>" --concurrency 64
|
||||
```
|
||||
|
||||
You can use `--extra-request-body` to specify additional OpenAI request parameters. E.g.,
|
||||
```
|
||||
python3 bench_sglang.py --extra-request-body '{"max_new_tokens": 128, "temperature": 0.01}'
|
||||
```
|
||||
|
||||
### Evaluate hf
|
||||
|
||||
```
|
||||
|
||||
@@ -11,6 +11,7 @@ The eval output will be logged
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
@@ -145,7 +146,17 @@ async def eval_mmmu(args) -> None:
|
||||
_, response = await process_sample(
|
||||
client, sample, sampling_params, lora_path
|
||||
)
|
||||
process_result(response, sample, answer_dict, out_samples)
|
||||
answer = (
|
||||
re.search(args.response_answer_regex, response)
|
||||
if response is not None
|
||||
else None
|
||||
)
|
||||
process_result(
|
||||
answer.group(1) if answer else response,
|
||||
sample,
|
||||
answer_dict,
|
||||
out_samples,
|
||||
)
|
||||
else:
|
||||
semaphore = asyncio.Semaphore(args.concurrency)
|
||||
tasks = [
|
||||
@@ -157,7 +168,17 @@ async def eval_mmmu(args) -> None:
|
||||
|
||||
for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
|
||||
sample, response = await coro
|
||||
process_result(response, sample, answer_dict, out_samples)
|
||||
answer = (
|
||||
re.search(args.response_answer_regex, response)
|
||||
if response is not None
|
||||
else None
|
||||
)
|
||||
process_result(
|
||||
answer.group(1) if answer else response,
|
||||
sample,
|
||||
answer_dict,
|
||||
out_samples,
|
||||
)
|
||||
|
||||
if args.profile:
|
||||
print("Stopping profiler...")
|
||||
|
||||
@@ -35,6 +35,7 @@ class EvalArgs:
|
||||
profile: bool = False
|
||||
profile_number: int = 5
|
||||
concurrency: int = 1
|
||||
response_answer_regex: str = "(.*)"
|
||||
lora_path: Optional[str] = None
|
||||
|
||||
@staticmethod
|
||||
@@ -92,6 +93,12 @@ class EvalArgs:
|
||||
default=EvalArgs.concurrency,
|
||||
help="Number of concurrent requests to make during evaluation. Default is 1, which means no concurrency.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--response-answer-regex",
|
||||
type=str,
|
||||
default=EvalArgs.response_answer_regex,
|
||||
help="Specific regex to capture the answer from the response, string",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-path",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user