[E2E] Refactor the e2e testcases. (#4789)
### What this PR does / why we need it?
Refactor the e2e testcases.
- tests/e2e/multicard/test_weight_loader.py: Remove the unused code.
- tests/e2e/singlecard/multi-modal/test_internvl.py: Move to accuracy
test.
- tests/e2e/singlecard/test_aclgraph.py: Rename the file.
- tests/e2e/singlecard/test_embedding_aclgraph.py : Combine with
tests/e2e/singlecard/test_bge_model.py
- tests/e2e/singlecard/test_completion_with_prompt_embeds.py: Delete
eager mode and modify model to Qwen3-0.6B
- tests/e2e/singlecard/test_quantization.py: Modify model to
Qwen3-0.6B-W8A8
- tests/e2e/singlecard/test_vlm.py: Modify model to Qwen3-VL-8B
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: menogrey <1299267905@qq.com>
This commit is contained in:
@@ -24,7 +24,6 @@ from tests.e2e.utils import check_embeddings_close
|
||||
|
||||
MODELS = [
|
||||
"Qwen/Qwen3-Embedding-0.6B", # lasttoken
|
||||
"BAAI/bge-small-en-v1.5", # cls_token
|
||||
"intfloat/multilingual-e5-small" # mean_tokens
|
||||
]
|
||||
|
||||
@@ -57,3 +56,45 @@ def test_embed_models_correctness(model: str):
|
||||
name_1="vllm",
|
||||
tol=1e-2,
|
||||
)
|
||||
|
||||
|
||||
def test_bge_model_correctness():
|
||||
queries = ['What is the capital of China?', 'Explain gravity']
|
||||
|
||||
model_name = snapshot_download("BAAI/bge-m3")
|
||||
with VllmRunner(
|
||||
model_name,
|
||||
runner="pooling",
|
||||
enforce_eager=False,
|
||||
) as vllm_aclgraph_runner:
|
||||
vllm_aclgraph_outputs = vllm_aclgraph_runner.embed(queries)
|
||||
|
||||
with VllmRunner(
|
||||
model_name,
|
||||
runner="pooling",
|
||||
enforce_eager=True,
|
||||
) as vllm_runner:
|
||||
vllm_eager_outputs = vllm_runner.embed(queries)
|
||||
|
||||
with HfRunner(
|
||||
model_name,
|
||||
dtype="float32",
|
||||
is_sentence_transformer=True,
|
||||
) as hf_runner:
|
||||
hf_outputs = hf_runner.encode(queries)
|
||||
|
||||
check_embeddings_close(
|
||||
embeddings_0_lst=hf_outputs,
|
||||
embeddings_1_lst=vllm_eager_outputs,
|
||||
name_0="hf",
|
||||
name_1="vllm",
|
||||
tol=1e-2,
|
||||
)
|
||||
|
||||
check_embeddings_close(
|
||||
embeddings_0_lst=vllm_eager_outputs,
|
||||
embeddings_1_lst=vllm_aclgraph_outputs,
|
||||
name_0="eager",
|
||||
name_1="aclgraph",
|
||||
tol=1e-2,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user