Improve process creation (#1534)

This commit is contained in:
Lianmin Zheng
2024-09-29 02:36:12 -07:00
committed by GitHub
parent fd9ad817ec
commit 048685430d
15 changed files with 270 additions and 677 deletions

View File

@@ -16,6 +16,8 @@ limitations under the License.
"""DetokenizerManager is a process that detokenizes the token ids."""
import dataclasses
import logging
from collections import OrderedDict
from typing import List
import zmq
@@ -29,8 +31,11 @@ from sglang.srt.managers.io_struct import (
)
from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import configure_logger, kill_parent_process
from sglang.utils import find_printable_text, get_exception_traceback
logger = logging.getLogger(__name__)
@dataclasses.dataclass
class DecodeStatus:
@@ -53,8 +58,8 @@ class DetokenizerManager:
):
# Init inter-process communication
context = zmq.Context(2)
self.recv_from_router = context.socket(zmq.PULL)
self.recv_from_router.bind(f"tcp://127.0.0.1:{port_args.detokenizer_port}")
self.recv_from_scheduler = context.socket(zmq.PULL)
self.recv_from_scheduler.bind(f"tcp://127.0.0.1:{port_args.detokenizer_port}")
self.send_to_tokenizer = context.socket(zmq.PUSH)
self.send_to_tokenizer.connect(f"tcp://127.0.0.1:{port_args.tokenizer_port}")
@@ -68,13 +73,13 @@ class DetokenizerManager:
trust_remote_code=server_args.trust_remote_code,
)
self.decode_status = {}
self.decode_status = LimitedCapacityDict()
def handle_loop(self):
def event_loop(self):
"""The event loop that handles requests"""
while True:
recv_obj = self.recv_from_router.recv_pyobj()
recv_obj = self.recv_from_scheduler.recv_pyobj()
if isinstance(recv_obj, BatchEmbeddingOut):
# If it is embedding model, no detokenization is needed.
@@ -165,15 +170,29 @@ class DetokenizerManager:
)
def start_detokenizer_process(
class LimitedCapacityDict(OrderedDict):
def __init__(self, capacity=1 << 15, *args, **kwargs):
super().__init__(*args, **kwargs)
self.capacity = capacity
def __setitem__(self, key, value):
if len(self) >= self.capacity:
# Remove the oldest element (first item in the dict)
self.popitem(last=False)
# Set the new item
super().__setitem__(key, value)
def run_detokenizer_process(
server_args: ServerArgs,
port_args: PortArgs,
pipe_writer,
):
configure_logger(server_args)
try:
manager = DetokenizerManager(server_args, port_args)
manager.event_loop()
except Exception:
pipe_writer.send(get_exception_traceback())
raise
pipe_writer.send("init ok")
manager.handle_loop()
msg = get_exception_traceback()
logger.error(msg)
kill_parent_process()