add qwen3

2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions
--- a/vllm-v0.6.2/tests/entrypoints/openai/test_shutdown.py
+++ b/vllm-v0.6.2/tests/entrypoints/openai/test_shutdown.py
@@ -0,0 +1,48 @@
+import json
+import os
+
+import openai
+import pytest
+
+from ...utils import RemoteOpenAIServer
+
+MODEL_NAME = "meta-llama/Llama-3.2-1B"
+
+
+@pytest.mark.asyncio
+async def test_shutdown_on_engine_failure(tmp_path):
+    # Use a bad adapter to crash the engine
+    # (This test will fail when that bug is fixed)
+    adapter_path = tmp_path / "bad_adapter"
+    os.mkdir(adapter_path)
+    with open(adapter_path / "adapter_model_config.json", "w") as f:
+        json.dump({"not": "real"}, f)
+    with open(adapter_path / "adapter_model.safetensors", "wb") as f:
+        f.write(b"this is fake")
+
+    # dtype, max-len etc set so that this can run in CI
+    args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "8192",
+        "--enforce-eager",
+        "--max-num-seqs",
+        "128",
+        "--enable-lora",
+        "--lora-modules",
+        f"bad-adapter={tmp_path / 'bad_adapter'}",
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        async with remote_server.get_async_client() as client:
+
+            with pytest.raises(
+                (openai.APIConnectionError, openai.InternalServerError)):
+                # This crashes the engine
+                await client.completions.create(model="bad-adapter",
+                                                prompt="Hello, my name is")
+
+            # Now the server should shut down
+            return_code = remote_server.proc.wait(timeout=8)
+            assert return_code is not None