Added async_encode method to Engine (#4701)

This commit is contained in:
Steven Shimizu
2025-05-10 18:58:40 -07:00
committed by GitHub
parent 66fc63d6b1
commit 03dd785cd0
2 changed files with 44 additions and 0 deletions

View File

@@ -185,6 +185,35 @@ class TestSRTEngine(CustomTestCase):
result = throughput_test(server_args=server_args, bench_args=bench_args)
self.assertGreater(result["total_throughput"], 3000)
def test_8_engine_async_encode_consistency(self):
prompt = "Today is a sunny day and I like"
model_path = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
engine = sgl.Engine(
model_path=model_path,
is_embedding=True,
random_seed=42,
disable_radix_cache=True,
)
# Get sync and async embeddings
out1 = torch.tensor(engine.encode(prompt)["embedding"])
loop = asyncio.get_event_loop()
out2 = torch.tensor(
loop.run_until_complete(engine.async_encode(prompt))["embedding"]
)
engine.shutdown()
print("\n==== Shapes ====")
print(f"sync shape: {out1.shape}")
print(f"async shape: {out2.shape}")
self.assertTrue(
torch.allclose(out1, out2, atol=1e-5, rtol=1e-3),
"Sync and async embeddings are not equal within tolerance",
)
if __name__ == "__main__":
unittest.main()