[CI] Migrate e2e test runner to hk (#5344)

### What this PR does / why we need it?
This patch add new runner labels for the HK region, and e2e single-card
testing has been migrated to this runner.

- vLLM version: release/v0.13.0
- vLLM main:
bc0a5a0c08

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2026-01-26 09:00:51 +08:00
committed by GitHub
parent 99bdd7363c
commit ca297eb57f
8 changed files with 40 additions and 23 deletions

View File

@@ -400,9 +400,14 @@ def test_llama_qwen_eagle_acceptance(
compilation_config=compilation_config,
async_scheduling=async_scheduling,
) as llm:
_ = llm.generate(prompts, sampling_params)
outputs = llm.model.generate(prompts, sampling_params)
metrics = llm.model.get_metrics()
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
output_tokens = output.outputs[0].token_ids
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
print(f"Output tokens: {output_tokens}")
num_drafts = 0
num_accepted_tokens_per_pos = [0] * num_speculative_tokens
for metric in metrics:
@@ -418,7 +423,10 @@ def test_llama_qwen_eagle_acceptance(
num_accepted_tokens / num_drafts
for num_accepted_tokens in num_accepted_tokens_per_pos
]
golden = BASELINES[method]
if method == "eagle":
golden = [0.7313432835820896, 0.373134328358209, 0.19402985074626866]
else:
golden = [0.68, 0.40, 0.18]
match = all(abs(a - b) < 0.08 for a, b in zip(acceptance_per_pos, golden))
if not match: