Simplify tokenizer manager (#2254)

This commit is contained in:
Lianmin Zheng
2024-11-29 02:18:51 -08:00
committed by GitHub
parent 8b48496aaf
commit fe97a2d40f
7 changed files with 34 additions and 103 deletions

View File

@@ -45,8 +45,6 @@ from sglang.srt.managers.io_struct import (
EmbeddingReqInput,
FlushCacheReq,
GenerateReqInput,
GetMemPoolSizeReq,
GetMemPoolSizeReqOutput,
OpenSessionReqInput,
OpenSessionReqOutput,
ProfileReq,
@@ -218,7 +216,7 @@ class TokenizerManager:
input_ids = obj.input_ids
if self.is_generation:
image_inputs = await self.image_processor.process_images_async(
image_inputs: Dict = await self.image_processor.process_images_async(
obj.image_data, input_text or input_ids, obj
)
if image_inputs and "input_ids" in image_inputs:
@@ -406,25 +404,6 @@ class TokenizerManager:
req = ProfileReq.STOP_PROFILE
self.send_to_scheduler.send_pyobj(req)
async def get_memory_pool_size(self):
if self.to_create_loop:
self.create_handle_loop()
req = GetMemPoolSizeReq()
self.send_to_scheduler.send_pyobj(req)
self.mem_pool_size = asyncio.Future()
# FIXME: Each request should have its own future instead of using `self.mem_pool_size`.
if self.server_args.dp_size == 1:
res = await self.mem_pool_size
return res.size
else: # self.server_args.dp_size > 1
self.mem_pool_size_tmp = []
res = await self.mem_pool_size
ret = [r.size for r in res]
return ret
async def update_weights(
self, obj: UpdateWeightReqInput, request: Optional[fastapi.Request] = None
):
@@ -552,15 +531,6 @@ class TokenizerManager:
if len(self.model_update_tmp) == self.server_args.dp_size:
self.model_update_result.set_result(self.model_update_tmp)
continue
elif isinstance(recv_obj, GetMemPoolSizeReqOutput):
if self.server_args.dp_size == 1:
self.mem_pool_size.set_result(recv_obj)
else: # self.sever_args.dp_size > 1
self.mem_pool_size_tmp.append(recv_obj)
# set future if the all results are received
if len(self.mem_pool_size_tmp) == self.server_args.dp_size:
self.mem_pool_size.set_result(self.mem_pool_size_tmp)
continue
elif isinstance(recv_obj, OpenSessionReqOutput):
self.session_futures[recv_obj.session_id].set_result(
recv_obj.session_id