### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
| `tests/e2e/310p/multicard/test_vl_model_multicard.py` |
| `tests/e2e/310p/singlecard/test_vl_model_singlecard.py` |
| `tests/e2e/310p/test_utils.py` |
| `tests/e2e/conftest.py` |
| `tests/e2e/model_utils.py` |
| `tests/e2e/models/conftest.py` |
| `tests/e2e/models/test_lm_eval_correctness.py` |
| `tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py` |
| `tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py` |
| `tests/e2e/multicard/2-cards/test_data_parallel.py` |
| `tests/e2e/multicard/2-cards/test_disaggregated_encoder.py` |
| `tests/e2e/multicard/2-cards/test_expert_parallel.py` |
| `tests/e2e/multicard/2-cards/test_external_launcher.py` |
| `tests/e2e/multicard/2-cards/test_full_graph_mode.py` |
| `tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py` |
| `tests/e2e/multicard/2-cards/test_offline_inference_distributed.py` |
| `tests/e2e/multicard/2-cards/test_offline_weight_load.py` |
| `tests/e2e/multicard/2-cards/test_pipeline_parallel.py` |
| `tests/e2e/multicard/2-cards/test_prefix_caching.py` |
| `tests/e2e/multicard/2-cards/test_quantization.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py` |
| `tests/e2e/multicard/2-cards/test_qwen3_performance.py` |
| `tests/e2e/multicard/2-cards/test_shared_expert_dp.py` |
| `tests/e2e/multicard/2-cards/test_single_request_aclgraph.py` |
| `tests/e2e/multicard/2-cards/test_sp_pass.py` |
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
9562912cea
Signed-off-by: MrZ20 <2609716663@qq.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -55,16 +55,12 @@ def report_dir(pytestconfig):
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "config_filename" in metafunc.fixturenames:
|
||||
|
||||
if metafunc.config.getoption("--config-list-file"):
|
||||
rel_path = metafunc.config.getoption("--config-list-file")
|
||||
config_list_file = Path(rel_path).resolve()
|
||||
config_dir = config_list_file.parent
|
||||
with open(config_list_file, encoding="utf-8") as f:
|
||||
configs = [
|
||||
config_dir / line.strip() for line in f
|
||||
if line.strip() and not line.startswith("#")
|
||||
]
|
||||
configs = [config_dir / line.strip() for line in f if line.strip() and not line.startswith("#")]
|
||||
metafunc.parametrize("config_filename", configs)
|
||||
else:
|
||||
single_config = metafunc.config.getoption("--config")
|
||||
|
||||
@@ -24,16 +24,15 @@ class EnvConfig:
|
||||
|
||||
@pytest.fixture
|
||||
def env_config() -> EnvConfig:
|
||||
return EnvConfig(vllm_version=os.getenv('VLLM_VERSION', 'unknown'),
|
||||
vllm_commit=os.getenv('VLLM_COMMIT', 'unknown'),
|
||||
vllm_ascend_version=os.getenv('VLLM_ASCEND_VERSION',
|
||||
'unknown'),
|
||||
vllm_ascend_commit=os.getenv('VLLM_ASCEND_COMMIT',
|
||||
'unknown'),
|
||||
cann_version=os.getenv('CANN_VERSION', 'unknown'),
|
||||
torch_version=os.getenv('TORCH_VERSION', 'unknown'),
|
||||
torch_npu_version=os.getenv('TORCH_NPU_VERSION',
|
||||
'unknown'))
|
||||
return EnvConfig(
|
||||
vllm_version=os.getenv("VLLM_VERSION", "unknown"),
|
||||
vllm_commit=os.getenv("VLLM_COMMIT", "unknown"),
|
||||
vllm_ascend_version=os.getenv("VLLM_ASCEND_VERSION", "unknown"),
|
||||
vllm_ascend_commit=os.getenv("VLLM_ASCEND_COMMIT", "unknown"),
|
||||
cann_version=os.getenv("CANN_VERSION", "unknown"),
|
||||
torch_version=os.getenv("TORCH_VERSION", "unknown"),
|
||||
torch_npu_version=os.getenv("TORCH_NPU_VERSION", "unknown"),
|
||||
)
|
||||
|
||||
|
||||
def build_model_args(eval_config, tp_size):
|
||||
@@ -48,9 +47,13 @@ def build_model_args(eval_config, tp_size):
|
||||
"max_model_len": max_model_len,
|
||||
}
|
||||
for s in [
|
||||
"max_images", "gpu_memory_utilization", "enable_expert_parallel",
|
||||
"tensor_parallel_size", "enforce_eager", "enable_thinking",
|
||||
"quantization"
|
||||
"max_images",
|
||||
"gpu_memory_utilization",
|
||||
"enable_expert_parallel",
|
||||
"tensor_parallel_size",
|
||||
"enforce_eager",
|
||||
"enable_thinking",
|
||||
"quantization",
|
||||
]:
|
||||
val = eval_config.get(s, None)
|
||||
if val is not None:
|
||||
@@ -68,7 +71,7 @@ def generate_report(tp_size, eval_config, report_data, report_dir, env_config):
|
||||
model_args = build_model_args(eval_config, tp_size)
|
||||
|
||||
parallel_mode = f"TP{model_args.get('tensor_parallel_size', 1)}"
|
||||
if model_args.get('enable_expert_parallel', False):
|
||||
if model_args.get("enable_expert_parallel", False):
|
||||
parallel_mode += " + EP"
|
||||
|
||||
execution_model = f"{'Eager' if model_args.get('enforce_eager', False) else 'ACLGraph'}"
|
||||
@@ -93,17 +96,16 @@ def generate_report(tp_size, eval_config, report_data, report_dir, env_config):
|
||||
num_fewshot=eval_config.get("num_fewshot", "N/A"),
|
||||
rows=report_data["rows"],
|
||||
parallel_mode=parallel_mode,
|
||||
execution_model=execution_model)
|
||||
execution_model=execution_model,
|
||||
)
|
||||
|
||||
report_output = os.path.join(
|
||||
report_dir, f"{os.path.basename(eval_config['model_name'])}.md")
|
||||
report_output = os.path.join(report_dir, f"{os.path.basename(eval_config['model_name'])}.md")
|
||||
os.makedirs(os.path.dirname(report_output), exist_ok=True)
|
||||
with open(report_output, 'w', encoding='utf-8') as f:
|
||||
with open(report_output, "w", encoding="utf-8") as f:
|
||||
f.write(report_content)
|
||||
|
||||
|
||||
def test_lm_eval_correctness_param(config_filename, tp_size, report_dir,
|
||||
env_config):
|
||||
def test_lm_eval_correctness_param(config_filename, tp_size, report_dir, env_config):
|
||||
eval_config = yaml.safe_load(config_filename.read_text(encoding="utf-8"))
|
||||
model_args = build_model_args(eval_config, tp_size)
|
||||
success = True
|
||||
@@ -135,25 +137,26 @@ def test_lm_eval_correctness_param(config_filename, tp_size, report_dir,
|
||||
metric_name = metric["name"]
|
||||
ground_truth = metric["value"]
|
||||
measured_value = round(task_result[metric_name], 4)
|
||||
task_success = bool(
|
||||
np.isclose(ground_truth, measured_value, rtol=RTOL))
|
||||
task_success = bool(np.isclose(ground_truth, measured_value, rtol=RTOL))
|
||||
success = success and task_success
|
||||
|
||||
print(f"{task_name} | {metric_name}: "
|
||||
f"ground_truth={ground_truth} | measured={measured_value} | "
|
||||
f"success={'✅' if task_success else '❌'}")
|
||||
print(
|
||||
f"{task_name} | {metric_name}: "
|
||||
f"ground_truth={ground_truth} | measured={measured_value} | "
|
||||
f"success={'✅' if task_success else '❌'}"
|
||||
)
|
||||
|
||||
report_data["rows"].append({
|
||||
"task":
|
||||
task_name,
|
||||
"metric":
|
||||
metric_name,
|
||||
"value":
|
||||
f"✅{measured_value}" if success else f"❌{measured_value}",
|
||||
"stderr":
|
||||
task_result[
|
||||
metric_name.replace(',', '_stderr,') if metric_name ==
|
||||
"acc,none" else metric_name.replace(',', '_stderr,')]
|
||||
})
|
||||
report_data["rows"].append(
|
||||
{
|
||||
"task": task_name,
|
||||
"metric": metric_name,
|
||||
"value": f"✅{measured_value}" if success else f"❌{measured_value}",
|
||||
"stderr": task_result[
|
||||
metric_name.replace(",", "_stderr,")
|
||||
if metric_name == "acc,none"
|
||||
else metric_name.replace(",", "_stderr,")
|
||||
],
|
||||
}
|
||||
)
|
||||
generate_report(tp_size, eval_config, report_data, report_dir, env_config)
|
||||
assert success
|
||||
|
||||
Reference in New Issue
Block a user