Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/entrypoints/openai/test_messages.py
+++ b/tests/entrypoints/openai/test_messages.py
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import anthropic
+import pytest
+import pytest_asyncio
+
+from ...utils import RemoteOpenAIServer
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+
+
+@pytest.fixture(scope="module")
+def server():  # noqa: F811
+    args = [
+        "--max-model-len",
+        "2048",
+        "--enforce-eager",
+        "--enable-auto-tool-choice",
+        "--tool-call-parser",
+        "hermes",
+        "--served-model-name",
+        "claude-3-7-sonnet-latest",
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client_anthropic() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+async def test_simple_messages(client: anthropic.AsyncAnthropic):
+    resp = await client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=1024,
+        messages=[{"role": "user", "content": "how are you!"}],
+    )
+    assert resp.stop_reason == "end_turn"
+    assert resp.role == "assistant"
+
+    print(f"Anthropic response: {resp.model_dump_json()}")
+
+
+@pytest.mark.asyncio
+async def test_system_message(client: anthropic.AsyncAnthropic):
+    resp = await client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=1024,
+        system="you are a helpful assistant",
+        messages=[{"role": "user", "content": "how are you!"}],
+    )
+    assert resp.stop_reason == "end_turn"
+    assert resp.role == "assistant"
+
+    print(f"Anthropic response: {resp.model_dump_json()}")
+
+
+@pytest.mark.asyncio
+async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
+    resp = await client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=1024,
+        messages=[{"role": "user", "content": "how are you!"}],
+        stream=True,
+    )
+
+    first_chunk = None
+    chunk_count = 0
+    async for chunk in resp:
+        chunk_count += 1
+        if first_chunk is None and chunk.type == "message_start":
+            first_chunk = chunk
+        print(chunk.model_dump_json())
+
+    assert chunk_count > 0
+    assert first_chunk is not None, "message_start chunk was never observed"
+    assert first_chunk.message is not None, "first chunk should include message"
+    assert first_chunk.message.usage is not None, (
+        "first chunk should include usage stats"
+    )
+    assert first_chunk.message.usage.output_tokens == 0
+    assert first_chunk.message.usage.input_tokens > 5
+
+
+@pytest.mark.asyncio
+async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
+    resp = await client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=1024,
+        messages=[
+            {"role": "user", "content": "What's the weather like in New York today?"}
+        ],
+        tools=[
+            {
+                "name": "get_current_weather",
+                "description": "Useful for querying the weather in a specified city.",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City or region, for example: "
+                            "New York, London, Tokyo, etc.",
+                        }
+                    },
+                    "required": ["location"],
+                },
+            }
+        ],
+        stream=False,
+    )
+    assert resp.stop_reason == "tool_use"
+    assert resp.role == "assistant"
+
+    print(f"Anthropic response: {resp.model_dump_json()}")
+
+
+@pytest.mark.asyncio
+async def test_anthropic_tool_call_streaming(client: anthropic.AsyncAnthropic):
+    resp = await client.messages.create(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "What's the weather like in New York today?",
+            }
+        ],
+        tools=[
+            {
+                "name": "get_current_weather",
+                "description": "Useful for querying the weather in a specified city.",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City or region, for example: "
+                            "New York, London, Tokyo, etc.",
+                        }
+                    },
+                    "required": ["location"],
+                },
+            }
+        ],
+        stream=True,
+    )
+
+    async for chunk in resp:
+        print(chunk.model_dump_json())