ci: improve nightly-ci (#11385)
This commit is contained in:
@@ -3,7 +3,7 @@ import subprocess
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from sglang.bench_one_batch_server import BenchmarkResult
|
||||
from sglang.bench_one_batch_server import BenchmarkResult, generate_markdown_report
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
@@ -41,7 +41,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
|
||||
|
||||
def test_bench_one_batch(self):
|
||||
all_benchmark_results = []
|
||||
|
||||
all_model_succeed = True
|
||||
for model_setup in self.models:
|
||||
benchmark_results = []
|
||||
with self.subTest(model=model_setup.model_path):
|
||||
@@ -113,19 +113,21 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
|
||||
# Clean up JSON file
|
||||
os.remove(json_output_file)
|
||||
else:
|
||||
all_model_succeed = False
|
||||
print(f"Warning: JSON output file {json_output_file} not found")
|
||||
|
||||
finally:
|
||||
kill_process_tree(process.pid)
|
||||
|
||||
report_part = BenchmarkResult.generate_markdown_report(
|
||||
PROFILE_DIR, benchmark_results
|
||||
)
|
||||
report_part = generate_markdown_report(PROFILE_DIR, benchmark_results)
|
||||
self.full_report += report_part + "\n"
|
||||
|
||||
if is_in_ci():
|
||||
write_github_step_summary(self.full_report)
|
||||
|
||||
if not all_model_succeed:
|
||||
raise AssertionError("Some models failed the perf tests.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import json
|
||||
import unittest
|
||||
import warnings
|
||||
from functools import partial
|
||||
from types import SimpleNamespace
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
@@ -26,16 +25,19 @@ MODEL_THRESHOLDS = {
|
||||
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
|
||||
): ModelEvalMetrics(0.305, 23.8),
|
||||
ModelLaunchSettings("google/gemma-3-4b-it"): ModelEvalMetrics(0.360, 10.9),
|
||||
ModelLaunchSettings("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 15.3),
|
||||
ModelLaunchSettings("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 17.7),
|
||||
ModelLaunchSettings("mistral-community/pixtral-12b"): ModelEvalMetrics(0.360, 16.6),
|
||||
ModelLaunchSettings("moonshotai/Kimi-VL-A3B-Instruct"): ModelEvalMetrics(
|
||||
0.330, 22.3
|
||||
),
|
||||
ModelLaunchSettings("openbmb/MiniCPM-o-2_6"): ModelEvalMetrics(0.330, 29.3),
|
||||
ModelLaunchSettings("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.270, 24.5),
|
||||
ModelLaunchSettings("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 14.0),
|
||||
ModelLaunchSettings("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.259, 36.3),
|
||||
ModelLaunchSettings("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 17.0),
|
||||
ModelLaunchSettings("Qwen/Qwen2-VL-7B-Instruct"): ModelEvalMetrics(0.310, 83.3),
|
||||
ModelLaunchSettings("Qwen/Qwen2.5-VL-7B-Instruct"): ModelEvalMetrics(0.340, 31.9),
|
||||
ModelLaunchSettings(
|
||||
"Qwen/Qwen3-VL-30B-A3B-Instruct", extra_args=["--tp=2"]
|
||||
): ModelEvalMetrics(0.29, 29.1),
|
||||
ModelLaunchSettings(
|
||||
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||
): ModelEvalMetrics(0.310, 16.7),
|
||||
|
||||
@@ -3,7 +3,7 @@ import subprocess
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from sglang.bench_one_batch_server import BenchmarkResult
|
||||
from sglang.bench_one_batch_server import BenchmarkResult, generate_markdown_report
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
@@ -27,6 +27,7 @@ MODEL_DEFAULTS = [
|
||||
ModelLaunchSettings(
|
||||
"google/gemma-3-27b-it",
|
||||
),
|
||||
ModelLaunchSettings("Qwen/Qwen3-VL-30B-A3B-Instruct", extra_args=["--tp=2"]),
|
||||
# "OpenGVLab/InternVL2_5-2B",
|
||||
# buggy in official transformers impl
|
||||
# "openbmb/MiniCPM-V-2_6",
|
||||
@@ -45,9 +46,7 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
||||
cls.models = []
|
||||
model_paths = parse_models(nightly_vlm_models_str)
|
||||
for model_path in model_paths:
|
||||
cls.models.append(
|
||||
ModelLaunchSettings(model_path, extra_args=VLM_EXTRA_ARGS)
|
||||
)
|
||||
cls.models.append(ModelLaunchSettings(model_path))
|
||||
else:
|
||||
cls.models = MODEL_DEFAULTS
|
||||
|
||||
@@ -60,6 +59,7 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
||||
|
||||
def test_bench_one_batch(self):
|
||||
all_benchmark_results = []
|
||||
all_model_succeed = True
|
||||
|
||||
for model_setup in self.models:
|
||||
benchmark_results = []
|
||||
@@ -112,7 +112,6 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
||||
f"Error running benchmark for {model_setup.model_path} with batch size:"
|
||||
)
|
||||
print(result.stderr)
|
||||
# Continue to next batch size even if one fails
|
||||
continue
|
||||
|
||||
print(f"Output for {model_setup.model_path} with batch size:")
|
||||
@@ -136,19 +135,24 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
|
||||
)
|
||||
|
||||
else:
|
||||
all_model_succeed = False
|
||||
print(f"Warning: JSON output file {json_output_file} not found")
|
||||
|
||||
finally:
|
||||
kill_process_tree(process.pid)
|
||||
|
||||
report_part = BenchmarkResult.generate_markdown_report(
|
||||
PROFILE_DIR, benchmark_results
|
||||
report_part = generate_markdown_report(
|
||||
PROFILE_DIR,
|
||||
benchmark_results,
|
||||
)
|
||||
self.full_report += report_part + "\n"
|
||||
|
||||
if is_in_ci():
|
||||
write_github_step_summary(self.full_report)
|
||||
|
||||
if not all_model_succeed:
|
||||
raise AssertionError("Some models failed the perf tests.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user