[CI] Migrate e2e test runner to hk (#5344)
### What this PR does / why we need it?
This patch add new runner labels for the HK region, and e2e single-card
testing has been migrated to this runner.
- vLLM version: release/v0.13.0
- vLLM main:
bc0a5a0c08
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -400,9 +400,14 @@ def test_llama_qwen_eagle_acceptance(
|
||||
compilation_config=compilation_config,
|
||||
async_scheduling=async_scheduling,
|
||||
) as llm:
|
||||
_ = llm.generate(prompts, sampling_params)
|
||||
outputs = llm.model.generate(prompts, sampling_params)
|
||||
metrics = llm.model.get_metrics()
|
||||
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
output_tokens = output.outputs[0].token_ids
|
||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
||||
print(f"Output tokens: {output_tokens}")
|
||||
num_drafts = 0
|
||||
num_accepted_tokens_per_pos = [0] * num_speculative_tokens
|
||||
for metric in metrics:
|
||||
@@ -418,7 +423,10 @@ def test_llama_qwen_eagle_acceptance(
|
||||
num_accepted_tokens / num_drafts
|
||||
for num_accepted_tokens in num_accepted_tokens_per_pos
|
||||
]
|
||||
golden = BASELINES[method]
|
||||
if method == "eagle":
|
||||
golden = [0.7313432835820896, 0.373134328358209, 0.19402985074626866]
|
||||
else:
|
||||
golden = [0.68, 0.40, 0.18]
|
||||
|
||||
match = all(abs(a - b) < 0.08 for a, b in zip(acceptance_per_pos, golden))
|
||||
if not match:
|
||||
|
||||
Reference in New Issue
Block a user