ci: unify the model launch method of nightly ci (#11230)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
import unittest
|
||||
import warnings
|
||||
from functools import partial
|
||||
from types import SimpleNamespace
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
@@ -8,8 +9,8 @@ from sglang.test.run_eval import run_eval
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
ModelDeploySetup,
|
||||
ModelEvalMetrics,
|
||||
ModelLaunchSettings,
|
||||
check_evaluation_test_results,
|
||||
popen_launch_server,
|
||||
write_results_to_json,
|
||||
@@ -17,25 +18,29 @@ from sglang.test.test_utils import (
|
||||
|
||||
MODEL_THRESHOLDS = {
|
||||
# Conservative thresholds on 100 MMMU samples, especially for latency thresholds
|
||||
ModelDeploySetup("deepseek-ai/deepseek-vl2-small"): ModelEvalMetrics(0.330, 56.1),
|
||||
ModelDeploySetup("deepseek-ai/Janus-Pro-7B"): ModelEvalMetrics(0.285, 39.9),
|
||||
ModelDeploySetup("Efficient-Large-Model/NVILA-Lite-2B-hf-0626"): ModelEvalMetrics(
|
||||
0.305, 23.8
|
||||
ModelLaunchSettings("deepseek-ai/deepseek-vl2-small"): ModelEvalMetrics(
|
||||
0.330, 56.1
|
||||
),
|
||||
ModelDeploySetup("google/gemma-3-4b-it"): ModelEvalMetrics(0.360, 10.9),
|
||||
ModelDeploySetup("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 15.3),
|
||||
ModelDeploySetup("mistral-community/pixtral-12b"): ModelEvalMetrics(0.360, 16.6),
|
||||
ModelDeploySetup("moonshotai/Kimi-VL-A3B-Instruct"): ModelEvalMetrics(0.330, 22.3),
|
||||
ModelDeploySetup("openbmb/MiniCPM-o-2_6"): ModelEvalMetrics(0.330, 29.3),
|
||||
ModelDeploySetup("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.270, 24.5),
|
||||
ModelDeploySetup("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 14.0),
|
||||
ModelDeploySetup("Qwen/Qwen2-VL-7B-Instruct"): ModelEvalMetrics(0.310, 83.3),
|
||||
ModelDeploySetup("Qwen/Qwen2.5-VL-7B-Instruct"): ModelEvalMetrics(0.340, 31.9),
|
||||
ModelDeploySetup("unsloth/Mistral-Small-3.1-24B-Instruct-2503"): ModelEvalMetrics(
|
||||
0.310, 16.7
|
||||
ModelLaunchSettings("deepseek-ai/Janus-Pro-7B"): ModelEvalMetrics(0.285, 40.3),
|
||||
ModelLaunchSettings(
|
||||
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
|
||||
): ModelEvalMetrics(0.305, 23.8),
|
||||
ModelLaunchSettings("google/gemma-3-4b-it"): ModelEvalMetrics(0.360, 10.9),
|
||||
ModelLaunchSettings("google/gemma-3n-E4B-it"): ModelEvalMetrics(0.360, 15.3),
|
||||
ModelLaunchSettings("mistral-community/pixtral-12b"): ModelEvalMetrics(0.360, 16.6),
|
||||
ModelLaunchSettings("moonshotai/Kimi-VL-A3B-Instruct"): ModelEvalMetrics(
|
||||
0.330, 22.3
|
||||
),
|
||||
ModelDeploySetup("XiaomiMiMo/MiMo-VL-7B-RL"): ModelEvalMetrics(0.28, 32.0),
|
||||
ModelDeploySetup("zai-org/GLM-4.1V-9B-Thinking"): ModelEvalMetrics(0.280, 30.4),
|
||||
ModelLaunchSettings("openbmb/MiniCPM-o-2_6"): ModelEvalMetrics(0.330, 29.3),
|
||||
ModelLaunchSettings("openbmb/MiniCPM-v-2_6"): ModelEvalMetrics(0.270, 24.5),
|
||||
ModelLaunchSettings("OpenGVLab/InternVL2_5-2B"): ModelEvalMetrics(0.300, 14.0),
|
||||
ModelLaunchSettings("Qwen/Qwen2-VL-7B-Instruct"): ModelEvalMetrics(0.310, 83.3),
|
||||
ModelLaunchSettings("Qwen/Qwen2.5-VL-7B-Instruct"): ModelEvalMetrics(0.340, 31.9),
|
||||
ModelLaunchSettings(
|
||||
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
|
||||
): ModelEvalMetrics(0.310, 16.7),
|
||||
ModelLaunchSettings("XiaomiMiMo/MiMo-VL-7B-RL"): ModelEvalMetrics(0.28, 32.0),
|
||||
ModelLaunchSettings("zai-org/GLM-4.1V-9B-Thinking"): ModelEvalMetrics(0.280, 30.4),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user