ci: improve nightly-ci (#11385)
This commit is contained in:
4
.github/workflows/nightly-test.yml
vendored
4
.github/workflows/nightly-test.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
|||||||
|
|
||||||
nightly-test-eval-vlms:
|
nightly-test-eval-vlms:
|
||||||
if: github.repository == 'sgl-project/sglang'
|
if: github.repository == 'sgl-project/sglang'
|
||||||
runs-on: 1-gpu-runner
|
runs-on: 2-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -79,7 +79,7 @@ jobs:
|
|||||||
|
|
||||||
nightly-test-perf-vlms:
|
nightly-test-perf-vlms:
|
||||||
if: github.repository == 'sgl-project/sglang'
|
if: github.repository == 'sgl-project/sglang'
|
||||||
runs-on: 1-gpu-runner
|
runs-on: 2-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
@@ -25,8 +25,10 @@ from typing import List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from transformers import AutoProcessor, PreTrainedTokenizer
|
||||||
|
|
||||||
from sglang.bench_serving import (
|
from sglang.bench_serving import (
|
||||||
|
get_processor,
|
||||||
get_tokenizer,
|
get_tokenizer,
|
||||||
sample_mmmu_requests,
|
sample_mmmu_requests,
|
||||||
sample_random_requests,
|
sample_random_requests,
|
||||||
@@ -104,8 +106,14 @@ Note: To view the traces through perfetto-ui, please:
|
|||||||
if self.profile_links.extend or self.profile_links.decode:
|
if self.profile_links.extend or self.profile_links.decode:
|
||||||
# Create a combined link or use the first available one
|
# Create a combined link or use the first available one
|
||||||
trace_files = [self.profile_links.extend, self.profile_links.decode]
|
trace_files = [self.profile_links.extend, self.profile_links.decode]
|
||||||
|
if any(trace_file is None for trace_file in trace_files):
|
||||||
|
logger.error("Some trace files are None", f"{trace_files=}")
|
||||||
trace_files_relay_links = [
|
trace_files_relay_links = [
|
||||||
f"[trace]({get_perfetto_relay_link_from_trace_file(trace_file)})"
|
(
|
||||||
|
f"[trace]({get_perfetto_relay_link_from_trace_file(trace_file)})"
|
||||||
|
if trace_file
|
||||||
|
else "N/A"
|
||||||
|
)
|
||||||
for trace_file in trace_files
|
for trace_file in trace_files
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -114,30 +122,31 @@ Note: To view the traces through perfetto-ui, please:
|
|||||||
# Build the row
|
# Build the row
|
||||||
return f"| {self.batch_size} | {self.input_len} | {self.latency:.2f} | {self.input_throughput:.2f} | {self.output_throughput:.2f} | {accept_length} | {itl:.2f} | {input_cost:.2f} | {output_cost:.2f} | {profile_link} |\n"
|
return f"| {self.batch_size} | {self.input_len} | {self.latency:.2f} | {self.input_throughput:.2f} | {self.output_throughput:.2f} | {accept_length} | {itl:.2f} | {input_cost:.2f} | {output_cost:.2f} | {profile_link} |\n"
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def generate_markdown_report(
|
|
||||||
cls, trace_dir, results: List["BenchmarkResult"]
|
|
||||||
) -> str:
|
|
||||||
"""Generate a markdown report from a list of BenchmarkResult object from a single run."""
|
|
||||||
import os
|
|
||||||
|
|
||||||
summary = f"### {results[0].model_path}\n"
|
def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str:
|
||||||
|
"""Generate a markdown report from a list of BenchmarkResult object from a single run."""
|
||||||
|
import os
|
||||||
|
|
||||||
# summary += (
|
summary = f"### {results[0].model_path}\n"
|
||||||
# f"Input lens: {result.input_len}. Output lens: {result.output_len}.\n"
|
|
||||||
# )
|
|
||||||
summary += "| batch size | input len | latency (s) | input throughput (tok/s) | output throughput (tok/s) | acc length | ITL (ms) | input cost ($/1M) | output cost ($/1M) | profile (extend) | profile (decode)|\n"
|
|
||||||
summary += "| ---------- | --------- | ----------- | ------------------------- | ------------------------- | ---------- | -------- | ----------------- | ------------------ | --------------- | -------------- |\n"
|
|
||||||
|
|
||||||
# all results should share the same isl & osl
|
# summary += (
|
||||||
for result in results:
|
# f"Input lens: {result.input_len}. Output lens: {result.output_len}.\n"
|
||||||
base_url = os.getenv("TRACE_BASE_URL", "").rstrip("/")
|
# )
|
||||||
relay_base = os.getenv("PERFETTO_RELAY_URL", "").rstrip("/")
|
summary += "| batch size | input len | latency (s) | input throughput (tok/s) | output throughput (tok/s) | acc length | ITL (ms) | input cost ($/1M) | output cost ($/1M) | profile (extend) | profile (decode)|\n"
|
||||||
relay_base = "https://docs.sglang.ai/ci-data/pages/perfetto_relay.html"
|
summary += "| ---------- | --------- | ----------- | ------------------------- | ------------------------- | ---------- | -------- | ----------------- | ------------------ | --------------- | -------------- |\n"
|
||||||
# base_url = "https://github.com/sgl-project/ci-data/traces"
|
|
||||||
summary += result.to_markdown_row(trace_dir, base_url, relay_base)
|
|
||||||
|
|
||||||
return summary
|
# all results should share the same isl & osl
|
||||||
|
for result in results:
|
||||||
|
base_url = os.getenv(
|
||||||
|
"TRACE_BASE_URL", "https://github.com/sgl-project/ci-data/traces"
|
||||||
|
).rstrip("/")
|
||||||
|
relay_base = os.getenv(
|
||||||
|
"PERFETTO_RELAY_URL",
|
||||||
|
"https://docs.sglang.ai/ci-data/pages/perfetto_relay.html",
|
||||||
|
).rstrip("/")
|
||||||
|
summary += result.to_markdown_row(trace_dir, base_url, relay_base)
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -288,7 +297,7 @@ def run_one_case(
|
|||||||
input_len_step_percentage: float,
|
input_len_step_percentage: float,
|
||||||
run_name: str,
|
run_name: str,
|
||||||
result_filename: str,
|
result_filename: str,
|
||||||
tokenizer,
|
tokenizer: PreTrainedTokenizer | AutoProcessor,
|
||||||
dataset_name="",
|
dataset_name="",
|
||||||
profile: bool = False,
|
profile: bool = False,
|
||||||
profile_steps: int = 3,
|
profile_steps: int = 3,
|
||||||
@@ -302,9 +311,8 @@ def run_one_case(
|
|||||||
if dataset_name == "mmmu":
|
if dataset_name == "mmmu":
|
||||||
input_requests = sample_mmmu_requests(
|
input_requests = sample_mmmu_requests(
|
||||||
num_requests=batch_size,
|
num_requests=batch_size,
|
||||||
tokenizer=tokenizer,
|
processor=tokenizer,
|
||||||
fixed_output_len=output_len,
|
fixed_output_len=output_len,
|
||||||
apply_chat_template=True,
|
|
||||||
random_sample=False,
|
random_sample=False,
|
||||||
)
|
)
|
||||||
elif dataset_name == "random":
|
elif dataset_name == "random":
|
||||||
@@ -364,6 +372,8 @@ def run_one_case(
|
|||||||
if dataset_name == "mmmu":
|
if dataset_name == "mmmu":
|
||||||
# vlm
|
# vlm
|
||||||
input_ids = []
|
input_ids = []
|
||||||
|
# for vlms, tokenizer is an instance of AutoProcessor
|
||||||
|
tokenizer = tokenizer.tokenizer
|
||||||
for input_req in input_requests:
|
for input_req in input_requests:
|
||||||
input_ids += [tokenizer.encode(input_req.prompt)]
|
input_ids += [tokenizer.encode(input_req.prompt)]
|
||||||
payload["image_data"] = [req.image_data for req in input_requests]
|
payload["image_data"] = [req.image_data for req in input_requests]
|
||||||
@@ -609,7 +619,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
|
|||||||
tokenizer_path = server_info["tokenizer_path"]
|
tokenizer_path = server_info["tokenizer_path"]
|
||||||
elif "prefill" in server_info:
|
elif "prefill" in server_info:
|
||||||
tokenizer_path = server_info["prefill"][0]["tokenizer_path"]
|
tokenizer_path = server_info["prefill"][0]["tokenizer_path"]
|
||||||
tokenizer = get_tokenizer(tokenizer_path)
|
|
||||||
|
if bench_args.dataset_name == "mmmu":
|
||||||
|
# mmmu implies this is a MLLM
|
||||||
|
tokenizer = get_processor(tokenizer_path)
|
||||||
|
else:
|
||||||
|
tokenizer = get_tokenizer(tokenizer_path)
|
||||||
|
|
||||||
# warmup
|
# warmup
|
||||||
if not bench_args.skip_warmup:
|
if not bench_args.skip_warmup:
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ python3 -m sglang.bench_serving --backend sglang --dataset-name random --num-pro
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@@ -671,7 +670,7 @@ def get_processor(
|
|||||||
if pretrained_model_name_or_path.endswith(
|
if pretrained_model_name_or_path.endswith(
|
||||||
".json"
|
".json"
|
||||||
) or pretrained_model_name_or_path.endswith(".model"):
|
) or pretrained_model_name_or_path.endswith(".model"):
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
return get_processor(pretrained_model_name_or_path)
|
return get_processor(pretrained_model_name_or_path)
|
||||||
|
|
||||||
@@ -935,7 +934,7 @@ async def get_mooncake_request_over_time(
|
|||||||
for i in range(num_rounds):
|
for i in range(num_rounds):
|
||||||
# Add user query for the current round
|
# Add user query for the current round
|
||||||
chat_history.append(
|
chat_history.append(
|
||||||
{"role": "user", "content": f"Round {i+1}: {user_query_base}"}
|
{"role": "user", "content": f"Round {i + 1}: {user_query_base}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Form the full prompt from history
|
# Form the full prompt from history
|
||||||
@@ -964,7 +963,7 @@ async def get_mooncake_request_over_time(
|
|||||||
|
|
||||||
def sample_mmmu_requests(
|
def sample_mmmu_requests(
|
||||||
num_requests: int,
|
num_requests: int,
|
||||||
processor: AutoProcessor,
|
processor: AutoProcessor | AutoTokenizer,
|
||||||
fixed_output_len: Optional[int] = None,
|
fixed_output_len: Optional[int] = None,
|
||||||
random_sample: bool = True,
|
random_sample: bool = True,
|
||||||
) -> List[DatasetRow]:
|
) -> List[DatasetRow]:
|
||||||
@@ -973,9 +972,7 @@ def sample_mmmu_requests(
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
num_requests: Number of requests to sample.
|
num_requests: Number of requests to sample.
|
||||||
tokenizer: Tokenizer to use for token counting.
|
|
||||||
fixed_output_len: If provided, use this fixed output length for all requests.
|
fixed_output_len: If provided, use this fixed output length for all requests.
|
||||||
apply_chat_template: Whether to apply the chat template to the prompt.
|
|
||||||
random_sample: Whether to randomly sample or take the first N.
|
random_sample: Whether to randomly sample or take the first N.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@@ -1282,11 +1279,11 @@ def parse_image_resolution(image_resolution: str) -> Tuple[int, int]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_mm_data_row(text_prompt, images, images_base64, output_len, processor):
|
def create_mm_data_row(text_prompt, images: list, images_base64, output_len, processor):
|
||||||
try:
|
try:
|
||||||
content_items = [
|
content_items = [
|
||||||
{"type": "image_url", "image_url": {"url": img_url}}
|
{"type": "image", "image": {"url": image_base64}}
|
||||||
for img_url in images_base64
|
for image_base64 in images_base64
|
||||||
]
|
]
|
||||||
content_items.append({"type": "text", "text": text_prompt})
|
content_items.append({"type": "text", "text": text_prompt})
|
||||||
prompt_str = processor.apply_chat_template(
|
prompt_str = processor.apply_chat_template(
|
||||||
@@ -1294,7 +1291,9 @@ def create_mm_data_row(text_prompt, images, images_base64, output_len, processor
|
|||||||
add_generation_prompt=True,
|
add_generation_prompt=True,
|
||||||
tokenize=False,
|
tokenize=False,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
# Note (Xinyuan): This is a workaround for an issue where some tokenizers do not support content as a list. (e.g. InternVL)
|
||||||
|
print(f"Error applying chat template: {e}, fallback to <image> tag")
|
||||||
# Some tokenizers do not support list content; fall back to a placeholder in the text
|
# Some tokenizers do not support list content; fall back to a placeholder in the text
|
||||||
prompt_str = f"<image>{text_prompt}"
|
prompt_str = f"<image>{text_prompt}"
|
||||||
|
|
||||||
@@ -1425,7 +1424,7 @@ def sample_image_requests(
|
|||||||
print(f"#Input tokens: {np.sum([x.prompt_len for x in dataset])}")
|
print(f"#Input tokens: {np.sum([x.prompt_len for x in dataset])}")
|
||||||
print(f"#Output tokens: {np.sum([x.output_len for x in dataset])}")
|
print(f"#Output tokens: {np.sum([x.output_len for x in dataset])}")
|
||||||
print(
|
print(
|
||||||
f"\nCreated {len(dataset)} {image_content} {image_format} images with average {total_image_bytes//num_requests} bytes per request"
|
f"\nCreated {len(dataset)} {image_content} {image_format} images with average {total_image_bytes // num_requests} bytes per request"
|
||||||
)
|
)
|
||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import subprocess
|
|||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.bench_one_batch_server import BenchmarkResult
|
from sglang.bench_one_batch_server import BenchmarkResult, generate_markdown_report
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
@@ -41,7 +41,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
|
|||||||
|
|
||||||
def test_bench_one_batch(self):
|
def test_bench_one_batch(self):
|
||||||
all_benchmark_results = []
|
all_benchmark_results = []
|
||||||
|
all_model_succeed = True
|
||||||
for model_setup in self.models:
|
for model_setup in self.models:
|
||||||
benchmark_results = []
|
benchmark_results = []
|
||||||
with self.subTest(model=model_setup.model_path):
|
with self.subTest(model=model_setup.model_path):
|
||||||
@@ -113,19 +113,21 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
|
|||||||
# Clean up JSON file
|
# Clean up JSON file
|
||||||
os.remove(json_output_file)
|
os.remove(json_output_file)
|
||||||
else:
|
else:
|
||||||
|
all_model_succeed = False
|
||||||
print(f"Warning: JSON output file {json_output_file} not found")
|
print(f"Warning: JSON output file {json_output_file} not found")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
kill_process_tree(process.pid)
|
kill_process_tree(process.pid)
|
||||||
|
|
||||||
report_part = BenchmarkResult.generate_markdown_report(
|
report_part = generate_markdown_report(PROFILE_DIR, benchmark_results)
|
||||||
PROFILE_DIR, benchmark_results
|
|
||||||
)
|
|
||||||
self.full_report += report_part + "\n"
|
self.full_report += report_part + "\n"
|
||||||
|
|
||||||
if is_in_ci():
|
if is_in_ci():
|
||||||
write_github_step_summary(self.full_report)
|
write_github_step_summary(self.full_report)
|
||||||
|
|
||||||
|
if not all_model_succeed:
|
||||||
|
raise AssertionError("Some models failed the perf tests.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
from functools import partial
|
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
@@ -26,16 +25,19 @@ MODEL_THRESHOLDS = {
|
|||||||
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
|
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
|
||||||
): ModelEvalMetrics(0.305, 23.8),
|
): ModelEvalMetrics(0.305, 23.8),
|
||||||
ModelLaunchSettings("google/gemma-3-4b-it"): ModelEvalMetrics(0.360, 10.9),
|
ModelLaunchSettings("google/gemma-3-4b-it"): ModelEvalMetrics(0.360, 10.9),
|
||||||
ModelLaunchSettings("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 15.3),
|
ModelLaunchSettings("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 17.7),
|
||||||
ModelLaunchSettings("mistral-community/pixtral-12b"): ModelEvalMetrics(0.360, 16.6),
|
ModelLaunchSettings("mistral-community/pixtral-12b"): ModelEvalMetrics(0.360, 16.6),
|
||||||
ModelLaunchSettings("moonshotai/Kimi-VL-A3B-Instruct"): ModelEvalMetrics(
|
ModelLaunchSettings("moonshotai/Kimi-VL-A3B-Instruct"): ModelEvalMetrics(
|
||||||
0.330, 22.3
|
0.330, 22.3
|
||||||
),
|
),
|
||||||
ModelLaunchSettings("openbmb/MiniCPM-o-2_6"): ModelEvalMetrics(0.330, 29.3),
|
ModelLaunchSettings("openbmb/MiniCPM-o-2_6"): ModelEvalMetrics(0.330, 29.3),
|
||||||
ModelLaunchSettings("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.270, 24.5),
|
ModelLaunchSettings("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.259, 36.3),
|
||||||
ModelLaunchSettings("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 14.0),
|
ModelLaunchSettings("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 17.0),
|
||||||
ModelLaunchSettings("Qwen/Qwen2-VL-7B-Instruct"): ModelEvalMetrics(0.310, 83.3),
|
ModelLaunchSettings("Qwen/Qwen2-VL-7B-Instruct"): ModelEvalMetrics(0.310, 83.3),
|
||||||
ModelLaunchSettings("Qwen/Qwen2.5-VL-7B-Instruct"): ModelEvalMetrics(0.340, 31.9),
|
ModelLaunchSettings("Qwen/Qwen2.5-VL-7B-Instruct"): ModelEvalMetrics(0.340, 31.9),
|
||||||
|
ModelLaunchSettings(
|
||||||
|
"Qwen/Qwen3-VL-30B-A3B-Instruct", extra_args=["--tp=2"]
|
||||||
|
): ModelEvalMetrics(0.29, 29.1),
|
||||||
ModelLaunchSettings(
|
ModelLaunchSettings(
|
||||||
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||||
): ModelEvalMetrics(0.310, 16.7),
|
): ModelEvalMetrics(0.310, 16.7),
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import subprocess
|
|||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from sglang.bench_one_batch_server import BenchmarkResult
|
from sglang.bench_one_batch_server import BenchmarkResult, generate_markdown_report
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
@@ -27,6 +27,7 @@ MODEL_DEFAULTS = [
|
|||||||
ModelLaunchSettings(
|
ModelLaunchSettings(
|
||||||
"google/gemma-3-27b-it",
|
"google/gemma-3-27b-it",
|
||||||
),
|
),
|
||||||
|
ModelLaunchSettings("Qwen/Qwen3-VL-30B-A3B-Instruct", extra_args=["--tp=2"]),
|
||||||
# "OpenGVLab/InternVL2_5-2B",
|
# "OpenGVLab/InternVL2_5-2B",
|
||||||
# buggy in official transformers impl
|
# buggy in official transformers impl
|
||||||
# "openbmb/MiniCPM-V-2_6",
|
# "openbmb/MiniCPM-V-2_6",
|
||||||
@@ -45,9 +46,7 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
|||||||
cls.models = []
|
cls.models = []
|
||||||
model_paths = parse_models(nightly_vlm_models_str)
|
model_paths = parse_models(nightly_vlm_models_str)
|
||||||
for model_path in model_paths:
|
for model_path in model_paths:
|
||||||
cls.models.append(
|
cls.models.append(ModelLaunchSettings(model_path))
|
||||||
ModelLaunchSettings(model_path, extra_args=VLM_EXTRA_ARGS)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
cls.models = MODEL_DEFAULTS
|
cls.models = MODEL_DEFAULTS
|
||||||
|
|
||||||
@@ -60,6 +59,7 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
|||||||
|
|
||||||
def test_bench_one_batch(self):
|
def test_bench_one_batch(self):
|
||||||
all_benchmark_results = []
|
all_benchmark_results = []
|
||||||
|
all_model_succeed = True
|
||||||
|
|
||||||
for model_setup in self.models:
|
for model_setup in self.models:
|
||||||
benchmark_results = []
|
benchmark_results = []
|
||||||
@@ -112,7 +112,6 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
|||||||
f"Error running benchmark for {model_setup.model_path} with batch size:"
|
f"Error running benchmark for {model_setup.model_path} with batch size:"
|
||||||
)
|
)
|
||||||
print(result.stderr)
|
print(result.stderr)
|
||||||
# Continue to next batch size even if one fails
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"Output for {model_setup.model_path} with batch size:")
|
print(f"Output for {model_setup.model_path} with batch size:")
|
||||||
@@ -136,19 +135,24 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
all_model_succeed = False
|
||||||
print(f"Warning: JSON output file {json_output_file} not found")
|
print(f"Warning: JSON output file {json_output_file} not found")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
kill_process_tree(process.pid)
|
kill_process_tree(process.pid)
|
||||||
|
|
||||||
report_part = BenchmarkResult.generate_markdown_report(
|
report_part = generate_markdown_report(
|
||||||
PROFILE_DIR, benchmark_results
|
PROFILE_DIR,
|
||||||
|
benchmark_results,
|
||||||
)
|
)
|
||||||
self.full_report += report_part + "\n"
|
self.full_report += report_part + "\n"
|
||||||
|
|
||||||
if is_in_ci():
|
if is_in_ci():
|
||||||
write_github_step_summary(self.full_report)
|
write_github_step_summary(self.full_report)
|
||||||
|
|
||||||
|
if not all_model_succeed:
|
||||||
|
raise AssertionError("Some models failed the perf tests.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user