fix MultiTokenizerWrapper name (#10049)
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
This commit is contained in:
@@ -1195,7 +1195,7 @@ class MultiTokenizerRegisterReq:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MultiTokenizerWarpper:
|
class MultiTokenizerWrapper:
|
||||||
worker_id: int
|
worker_id: int
|
||||||
obj: Optional[Any] = None
|
obj: Optional[Any] = None
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from sglang.srt.managers.io_struct import (
|
|||||||
BatchStrOut,
|
BatchStrOut,
|
||||||
BatchTokenIDOut,
|
BatchTokenIDOut,
|
||||||
MultiTokenizerRegisterReq,
|
MultiTokenizerRegisterReq,
|
||||||
MultiTokenizerWarpper,
|
MultiTokenizerWrapper,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager, _Communicator
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager, _Communicator
|
||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
@@ -441,7 +441,7 @@ class MultiTokenizerRouter(TokenizerManager, MultiTokenizerMixin):
|
|||||||
|
|
||||||
async def _distribute_result_to_workers(self, recv_obj):
|
async def _distribute_result_to_workers(self, recv_obj):
|
||||||
"""Distribute result to corresponding workers based on rid"""
|
"""Distribute result to corresponding workers based on rid"""
|
||||||
if isinstance(recv_obj, MultiTokenizerWarpper):
|
if isinstance(recv_obj, MultiTokenizerWrapper):
|
||||||
worker_ids = [recv_obj.worker_id]
|
worker_ids = [recv_obj.worker_id]
|
||||||
recv_obj = recv_obj.obj
|
recv_obj = recv_obj.obj
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ from sglang.srt.managers.io_struct import (
|
|||||||
LoadLoRAAdapterReqInput,
|
LoadLoRAAdapterReqInput,
|
||||||
LoadLoRAAdapterReqOutput,
|
LoadLoRAAdapterReqOutput,
|
||||||
MultiTokenizerRegisterReq,
|
MultiTokenizerRegisterReq,
|
||||||
MultiTokenizerWarpper,
|
MultiTokenizerWrapper,
|
||||||
OpenSessionReqInput,
|
OpenSessionReqInput,
|
||||||
OpenSessionReqOutput,
|
OpenSessionReqOutput,
|
||||||
ProfileReq,
|
ProfileReq,
|
||||||
@@ -1096,13 +1096,13 @@ class Scheduler(
|
|||||||
self.send_to_tokenizer.send_pyobj(abort_req)
|
self.send_to_tokenizer.send_pyobj(abort_req)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If it is a MultiTokenizerWarpper, unwrap it and handle the inner request.
|
# If it is a MultiTokenizerWrapper, unwrap it and handle the inner request.
|
||||||
if isinstance(recv_req, MultiTokenizerWarpper):
|
if isinstance(recv_req, MultiTokenizerWrapper):
|
||||||
worker_id = recv_req.worker_id
|
worker_id = recv_req.worker_id
|
||||||
recv_req = recv_req.obj
|
recv_req = recv_req.obj
|
||||||
output = self._request_dispatcher(recv_req)
|
output = self._request_dispatcher(recv_req)
|
||||||
if output is not None:
|
if output is not None:
|
||||||
output = MultiTokenizerWarpper(worker_id, output)
|
output = MultiTokenizerWrapper(worker_id, output)
|
||||||
self.send_to_tokenizer.send_pyobj(output)
|
self.send_to_tokenizer.send_pyobj(output)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ from sglang.srt.managers.io_struct import (
|
|||||||
LoadLoRAAdapterReqInput,
|
LoadLoRAAdapterReqInput,
|
||||||
LoadLoRAAdapterReqOutput,
|
LoadLoRAAdapterReqOutput,
|
||||||
LoRAUpdateResult,
|
LoRAUpdateResult,
|
||||||
MultiTokenizerWarpper,
|
MultiTokenizerWrapper,
|
||||||
OpenSessionReqInput,
|
OpenSessionReqInput,
|
||||||
OpenSessionReqOutput,
|
OpenSessionReqOutput,
|
||||||
ProfileReq,
|
ProfileReq,
|
||||||
@@ -1118,7 +1118,7 @@ class TokenizerManager:
|
|||||||
self, obj: UpdateWeightFromDiskReqInput
|
self, obj: UpdateWeightFromDiskReqInput
|
||||||
) -> Tuple[bool, str]:
|
) -> Tuple[bool, str]:
|
||||||
if self.server_args.tokenizer_worker_num > 1:
|
if self.server_args.tokenizer_worker_num > 1:
|
||||||
obj = MultiTokenizerWarpper(self.worker_id, obj)
|
obj = MultiTokenizerWrapper(self.worker_id, obj)
|
||||||
self.send_to_scheduler.send_pyobj(obj)
|
self.send_to_scheduler.send_pyobj(obj)
|
||||||
self.model_update_result = asyncio.Future()
|
self.model_update_result = asyncio.Future()
|
||||||
if self.server_args.dp_size == 1:
|
if self.server_args.dp_size == 1:
|
||||||
@@ -1339,7 +1339,7 @@ class TokenizerManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
if self.server_args.tokenizer_worker_num > 1:
|
if self.server_args.tokenizer_worker_num > 1:
|
||||||
obj = MultiTokenizerWarpper(self.worker_id, obj)
|
obj = MultiTokenizerWrapper(self.worker_id, obj)
|
||||||
self.send_to_scheduler.send_pyobj(obj)
|
self.send_to_scheduler.send_pyobj(obj)
|
||||||
|
|
||||||
self.session_futures[obj.session_id] = asyncio.Future()
|
self.session_futures[obj.session_id] = asyncio.Future()
|
||||||
@@ -2165,7 +2165,7 @@ class _Communicator(Generic[T]):
|
|||||||
|
|
||||||
if obj:
|
if obj:
|
||||||
if _Communicator.enable_multi_tokenizer:
|
if _Communicator.enable_multi_tokenizer:
|
||||||
obj = MultiTokenizerWarpper(worker_id=os.getpid(), obj=obj)
|
obj = MultiTokenizerWrapper(worker_id=os.getpid(), obj=obj)
|
||||||
self._sender.send_pyobj(obj)
|
self._sender.send_pyobj(obj)
|
||||||
|
|
||||||
self._result_event = asyncio.Event()
|
self._result_event = asyncio.Event()
|
||||||
|
|||||||
Reference in New Issue
Block a user