Sync from v0.13
This commit is contained in:
155
tests/entrypoints/openai/test_messages.py
Normal file
155
tests/entrypoints/openai/test_messages.py
Normal file
@@ -0,0 +1,155 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import anthropic
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "Qwen/Qwen3-0.6B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(): # noqa: F811
|
||||
args = [
|
||||
"--max-model-len",
|
||||
"2048",
|
||||
"--enforce-eager",
|
||||
"--enable-auto-tool-choice",
|
||||
"--tool-call-parser",
|
||||
"hermes",
|
||||
"--served-model-name",
|
||||
"claude-3-7-sonnet-latest",
|
||||
]
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server):
|
||||
async with server.get_async_client_anthropic() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_simple_messages(client: anthropic.AsyncAnthropic):
|
||||
resp = await client.messages.create(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "how are you!"}],
|
||||
)
|
||||
assert resp.stop_reason == "end_turn"
|
||||
assert resp.role == "assistant"
|
||||
|
||||
print(f"Anthropic response: {resp.model_dump_json()}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_message(client: anthropic.AsyncAnthropic):
|
||||
resp = await client.messages.create(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
max_tokens=1024,
|
||||
system="you are a helpful assistant",
|
||||
messages=[{"role": "user", "content": "how are you!"}],
|
||||
)
|
||||
assert resp.stop_reason == "end_turn"
|
||||
assert resp.role == "assistant"
|
||||
|
||||
print(f"Anthropic response: {resp.model_dump_json()}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
|
||||
resp = await client.messages.create(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "how are you!"}],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
first_chunk = None
|
||||
chunk_count = 0
|
||||
async for chunk in resp:
|
||||
chunk_count += 1
|
||||
if first_chunk is None and chunk.type == "message_start":
|
||||
first_chunk = chunk
|
||||
print(chunk.model_dump_json())
|
||||
|
||||
assert chunk_count > 0
|
||||
assert first_chunk is not None, "message_start chunk was never observed"
|
||||
assert first_chunk.message is not None, "first chunk should include message"
|
||||
assert first_chunk.message.usage is not None, (
|
||||
"first chunk should include usage stats"
|
||||
)
|
||||
assert first_chunk.message.usage.output_tokens == 0
|
||||
assert first_chunk.message.usage.input_tokens > 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
|
||||
resp = await client.messages.create(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
max_tokens=1024,
|
||||
messages=[
|
||||
{"role": "user", "content": "What's the weather like in New York today?"}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Useful for querying the weather in a specified city.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "City or region, for example: "
|
||||
"New York, London, Tokyo, etc.",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
],
|
||||
stream=False,
|
||||
)
|
||||
assert resp.stop_reason == "tool_use"
|
||||
assert resp.role == "assistant"
|
||||
|
||||
print(f"Anthropic response: {resp.model_dump_json()}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_tool_call_streaming(client: anthropic.AsyncAnthropic):
|
||||
resp = await client.messages.create(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
max_tokens=1024,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in New York today?",
|
||||
}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Useful for querying the weather in a specified city.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "City or region, for example: "
|
||||
"New York, London, Tokyo, etc.",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
async for chunk in resp:
|
||||
print(chunk.model_dump_json())
|
||||
Reference in New Issue
Block a user