diff --git a/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py b/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py index 72194e64..2edae400 100644 --- a/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py +++ b/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py @@ -20,7 +20,7 @@ from vllm.utils.network_utils import get_open_port from tests.e2e.conftest import RemoteOpenAIServer from tools.aisbench import run_aisbench_cases -from tools.send_request import send_text_request +from tools.send_request import send_v1_chat_completions MODELS = [ "vllm-ascend/Qwen3-32B-W8A8", @@ -90,9 +90,9 @@ async def test_models(model: str, tp_size: int) -> None: server_port=port, env_dict=env_dict, auto_port=False) as server: - send_text_request(prompts[0], - model, - server, - request_args=api_keyword_args) + send_v1_chat_completions(prompts[0], + model, + server, + request_args=api_keyword_args) # aisbench test run_aisbench_cases(model, port, aisbench_cases) diff --git a/tools/send_request.py b/tools/send_request.py index f0bb69a7..faad3156 100644 --- a/tools/send_request.py +++ b/tools/send_request.py @@ -2,22 +2,36 @@ from typing import Any import requests -data: dict[str, Any] = { - "messages": [{ - "role": "user", - "content": "", - }], -} - -def send_text_request(prompt, model, server, request_args=None): - data["messages"][0]["content"] = prompt - data["model"] = model - url = server.url_for("v1", "chat", "completions") +def send_v1_completions(prompt, model, server, request_args=None): + data: dict[str, Any] = {"model": model, "prompt": prompt} if request_args: data.update(request_args) + url = server.url_for("v1", "completions") response = requests.post(url, json=data) - print("Status Code:", response.status_code) + print(f"Status Code: {response.status_code}") response_json = response.json() - print("Response:", response_json) - assert response_json["choices"][0]["message"]["content"], "empty response" + print(f"Response json: {response_json}") + response_text = response_json["choices"][0]["text"] + print(f"Response: {response_text}") + assert response_text, "empty response" + + +def send_v1_chat_completions(prompt, model, server, request_args=None): + data: dict[str, Any] = { + "model": model, + "messages": [{ + "role": "user", + "content": prompt, + }], + } + if request_args: + data.update(request_args) + url = server.url_for("v1", "chat", "completions") + response = requests.post(url, json=data) + print(f"Status Code: {response.status_code}") + response_json = response.json() + print(f"Response json: {response_json}") + response_text = response_json["choices"][0]["message"]["content"] + print(f"Response: {response_text}") + assert response_text, "empty response"