[router]: Add Embedding routing logic (#10129)
Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com> Co-authored-by: Waël Boukhobza <wawa_wael@live.fr>
This commit is contained in:
@@ -715,6 +715,29 @@ def e2e_router_only_rr():
|
||||
_terminate(proc)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def e2e_embedding_model() -> str:
|
||||
"""Embedding model to use for E2E tests.
|
||||
|
||||
Defaults to an E5 Mistral model, can be overridden via E2E_EMBEDDING_MODEL env var.
|
||||
"""
|
||||
import os
|
||||
|
||||
return os.getenv("E2E_EMBEDDING_MODEL", "intfloat/e5-mistral-7b-instruct")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def e2e_primary_embedding_worker(e2e_embedding_model: str):
|
||||
"""Launch a single embedding worker using the specified model."""
|
||||
port = _find_available_port()
|
||||
base_url = f"http://127.0.0.1:{port}"
|
||||
proc = _popen_launch_worker(e2e_embedding_model, base_url)
|
||||
try:
|
||||
yield SimpleNamespace(proc=proc, url=base_url)
|
||||
finally:
|
||||
_terminate(proc)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def e2e_primary_worker(e2e_model: str):
|
||||
port = _find_available_port()
|
||||
|
||||
38
sgl-router/py_test/e2e/test_e2e_embeddings.py
Normal file
38
sgl-router/py_test/e2e/test_e2e_embeddings.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_embeddings_basic(
|
||||
e2e_router_only_rr, e2e_primary_embedding_worker, e2e_embedding_model
|
||||
):
|
||||
base = e2e_router_only_rr.url
|
||||
worker_url = e2e_primary_embedding_worker.url
|
||||
|
||||
# Attach embedding worker to router-only instance
|
||||
r = requests.post(f"{base}/add_worker", params={"url": worker_url}, timeout=180)
|
||||
r.raise_for_status()
|
||||
|
||||
# Simple embedding request with two inputs
|
||||
payload = {
|
||||
"model": e2e_embedding_model,
|
||||
"input": [
|
||||
"the quick brown fox",
|
||||
"jumps over the lazy dog",
|
||||
],
|
||||
}
|
||||
r = requests.post(f"{base}/v1/embeddings", json=payload, timeout=120)
|
||||
|
||||
assert r.status_code == 200, f"unexpected status: {r.status_code} {r.text}"
|
||||
|
||||
data = r.json()
|
||||
assert "data" in data and isinstance(data["data"], list)
|
||||
assert len(data["data"]) == 2
|
||||
|
||||
# Validate shape of embedding objects
|
||||
for item in data["data"]:
|
||||
assert "embedding" in item and isinstance(item["embedding"], list)
|
||||
# Ensure non-empty vectors
|
||||
assert len(item["embedding"]) > 0
|
||||
Reference in New Issue
Block a user