Abort disconnected requests (#457)

This commit is contained in:
Lianmin Zheng
2024-05-20 18:41:21 -07:00
committed by GitHub
parent 3e684be7a3
commit 8dbdc018a3
8 changed files with 202 additions and 132 deletions

View File

@@ -11,6 +11,7 @@ import transformers
import uvloop
import zmq
import zmq.asyncio
from fastapi import BackgroundTasks
from sglang.srt.hf_transformers_utils import (
get_config,
@@ -165,7 +166,7 @@ class TokenizerManager:
while True:
try:
await asyncio.wait_for(event.wait(), timeout=5)
await asyncio.wait_for(event.wait(), timeout=4)
except asyncio.TimeoutError:
if request is not None and await request.is_disconnected():
self.abort_request(rid)
@@ -243,7 +244,7 @@ class TokenizerManager:
while True:
try:
await asyncio.wait_for(state.event.wait(), timeout=5)
await asyncio.wait_for(state.event.wait(), timeout=4)
break
except asyncio.TimeoutError:
if request is not None and await request.is_disconnected():
@@ -270,10 +271,26 @@ class TokenizerManager:
self.send_to_router.send_pyobj(req)
def abort_request(self, rid):
if rid not in self.rid_to_state:
return
del self.rid_to_state[rid]
req = AbortReq(rid)
self.send_to_router.send_pyobj(req)
def create_abort_task(self, obj):
# Abort the request if the client is disconnected.
async def abort_request():
await asyncio.sleep(3)
if obj.is_single:
self.abort_request(obj.rid)
else:
for rid in obj.rids:
self.abort_request(rid)
background_tasks = BackgroundTasks()
background_tasks.add_task(abort_request)
return background_tasks
def create_handle_loop(self):
self.to_create_loop = False
loop = asyncio.get_event_loop()