sglang/python/sglang/srt/managers/detokenizer_manager.py

"""
Copyright 2023-2024 SGLang Team
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

"""DetokenizerManager is a process that detokenizes the token ids."""

import asyncio
import dataclasses
from typing import List

import uvloop
import zmq
import zmq.asyncio

from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.io_struct import (
    BatchEmbeddingOut,
    BatchStrOut,
    BatchTokenIDOut,
    UpdateWeightReqOutput,
)
from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.utils import find_printable_text, get_exception_traceback

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())


@dataclasses.dataclass
class DecodeStatus:
    """Store the status of incremental decoding."""

    vid: int
    decoded_text: str
    decode_ids: List[int]
    surr_offset: int
    read_offset: int


class DetokenizerManager:
    """DetokenizerManager is a process that detokenizes the token ids."""

    def __init__(
        self,
        server_args: ServerArgs,
        port_args: PortArgs,
    ):
        # Init inter-process communication
        context = zmq.asyncio.Context(2)
        self.recv_from_router = context.socket(zmq.PULL)
        self.recv_from_router.bind(f"tcp://127.0.0.1:{port_args.detokenizer_port}")

        self.send_to_tokenizer = context.socket(zmq.PUSH)
        self.send_to_tokenizer.connect(f"tcp://127.0.0.1:{port_args.tokenizer_port}")

        if server_args.skip_tokenizer_init:
            self.tokenizer = None
        else:
            self.tokenizer = get_tokenizer(
                server_args.tokenizer_path,
                tokenizer_mode=server_args.tokenizer_mode,
                trust_remote_code=server_args.trust_remote_code,
            )

        self.decode_status = {}

    async def handle_loop(self):
        """The event loop that handles requests"""

        while True:
            recv_obj = await self.recv_from_router.recv_pyobj()

            if isinstance(recv_obj, BatchEmbeddingOut):
                # If it is embedding model, no detokenization is needed.
                self.send_to_tokenizer.send_pyobj(
                    BatchEmbeddingOut(
                        rids=recv_obj.rids,
                        embeddings=recv_obj.embeddings,
                        meta_info=recv_obj.meta_info,
                        finished_reason=recv_obj.finished_reason,
                    )
                )
                continue
            elif isinstance(recv_obj, UpdateWeightReqOutput):
                # If it is a weight update request, no detokenization is needed.
                self.send_to_tokenizer.send_pyobj(recv_obj)
                continue
            elif self.tokenizer is None:
                # If the tokenizer is skipped, no detokenization is needed
                self.send_to_tokenizer.send_pyobj(recv_obj)
                continue

            assert isinstance(recv_obj, BatchTokenIDOut)
            bs = len(recv_obj.rids)

            # Initialize decode status
            read_ids, surr_ids = [], []
            for i in range(bs):
                rid = recv_obj.rids[i]
                vid = recv_obj.vids[i]
                if rid not in self.decode_status or self.decode_status[rid].vid != vid:
                    s = DecodeStatus(
                        vid=vid,
                        decoded_text=recv_obj.decoded_texts[i],
                        decode_ids=recv_obj.decode_ids[i],
                        surr_offset=0,
                        read_offset=recv_obj.read_offsets[i],
                    )
                    self.decode_status[rid] = s
                else:
                    s = self.decode_status[rid]
                    s.decode_ids = recv_obj.decode_ids[i]

                read_ids.append(s.decode_ids[s.surr_offset :])
                surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])

            # TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request
            surr_texts = self.tokenizer.batch_decode(
                surr_ids,
                skip_special_tokens=recv_obj.skip_special_tokens[0],
                spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
            )
            read_texts = self.tokenizer.batch_decode(
                read_ids,
                skip_special_tokens=recv_obj.skip_special_tokens[0],
                spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
            )

            # Incremental decoding
            output_strs = []
            for i in range(bs):
                s = self.decode_status[recv_obj.rids[i]]
                new_text = read_texts[i][len(surr_texts[i]) :]
                if recv_obj.finished_reason[i] is None:
                    # Streaming chunk: update the decode status
                    if len(new_text) > 0 and not new_text.endswith("<EFBFBD>"):
                        s.decoded_text = s.decoded_text + new_text
                        s.surr_offset = s.read_offset
                        s.read_offset = len(s.decode_ids)
                        new_text = ""
                    else:
                        new_text = find_printable_text(new_text)

                output_strs.append(s.decoded_text + new_text)

                # Trim stop str. TODO(lmzheng): handle the case where multiple stop strs are hit
                if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR):
                    pos = output_strs[i].find(recv_obj.finished_reason[i].matched)
                    if pos != -1:
                        output_strs[i] = output_strs[i][:pos]

            self.send_to_tokenizer.send_pyobj(
                BatchStrOut(
                    rids=recv_obj.rids,
                    output_strs=output_strs,
                    meta_info=recv_obj.meta_info,
                    finished_reason=recv_obj.finished_reason,
                )
            )


def start_detokenizer_process(
    server_args: ServerArgs,
    port_args: PortArgs,
    pipe_writer,
):
    try:
        manager = DetokenizerManager(server_args, port_args)
    except Exception:
        pipe_writer.send(get_exception_traceback())
        raise
    pipe_writer.send("init ok")
    loop = asyncio.get_event_loop()
    loop.run_until_complete(manager.handle_loop())
-												chore: add copyright for srt (#790)


											
										
										
											2024-07-28 23:07:12 +10:00
+								"""
 								Copyright 2023-2024 SGLang Team
 								Licensed under the Apache License, Version 2.0 (the "License");
 								you may not use this file except in compliance with the License.
 								You may obtain a copy of the License at
 								    http://www.apache.org/licenses/LICENSE-2.0
 								Unless required by applicable law or agreed to in writing, software
 								distributed under the License is distributed on an "AS IS" BASIS,
 								WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								See the License for the specific language governing permissions and
 								limitations under the License.
 								"""
-												Improve doc strings (#518)

											
										
										
											2024-06-08 02:06:52 -07:00
+								"""DetokenizerManager is a process that detokenizes the token ids."""
-												Higher priority for user input of max_prefill_tokens & format (#540)


											
										
										
											2024-06-12 21:48:40 -07:00
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								import asyncio
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								import dataclasses
 								from typing import List
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
 								import uvloop
 								import zmq
 								import zmq.asyncio
-												add `.isort.cfg` (#378)


											
										
										
											2024-04-22 22:38:09 +08:00
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								from sglang.srt.hf_transformers_utils import get_tokenizer
-												Add e5-mistral embedding model - step 3/3 (#988)


											
										
										
											2024-08-08 16:31:19 -07:00
+								from sglang.srt.managers.io_struct import (
 								    BatchEmbeddingOut,
 								    BatchStrOut,
 								    BatchTokenIDOut,
-												[Feat] Support update weights without restart server (#1157)


											
										
										
											2024-08-20 13:48:24 -07:00
+								    UpdateWeightReqOutput,
-												Add e5-mistral embedding model - step 3/3 (#988)


											
										
										
											2024-08-08 16:31:19 -07:00
+								)
-												Code structure refactor (#807)


											
										
										
											2024-07-29 23:04:48 -07:00
+								from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								from sglang.srt.server_args import PortArgs, ServerArgs
-												Test the case when max_new_tokens is very large (#1038)


											
										
										
											2024-08-11 16:41:03 -07:00
+								from sglang.utils import find_printable_text, get_exception_traceback
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
 								asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								@dataclasses.dataclass
 								class DecodeStatus:
-												[Minor] Improve logging and rename the health check endpoint name (#1180)


											
										
										
											2024-08-21 19:24:36 -07:00
+								    """Store the status of incremental decoding."""
-												Fix jump forward when streaming (#665)


											
										
										
											2024-07-19 16:42:06 -07:00
+								    vid: int
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								    decoded_text: str
 								    decode_ids: List[int]
 								    surr_offset: int
 								    read_offset: int
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								class DetokenizerManager:
-												[Minor] Improve logging and rename the health check endpoint name (#1180)


											
										
										
											2024-08-21 19:24:36 -07:00
+								    """DetokenizerManager is a process that detokenizes the token ids."""
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								    def __init__(
 								        self,
 								        server_args: ServerArgs,
 								        port_args: PortArgs,
 								    ):
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								        # Init inter-process communication
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								        context = zmq.asyncio.Context(2)
 								        self.recv_from_router = context.socket(zmq.PULL)
 								        self.recv_from_router.bind(f"tcp://127.0.0.1:{port_args.detokenizer_port}")
 								        self.send_to_tokenizer = context.socket(zmq.PUSH)
 								        self.send_to_tokenizer.connect(f"tcp://127.0.0.1:{port_args.tokenizer_port}")
-												Add skip_tokenizer_init args. (#959)

Co-authored-by: lzhang <zhanglei@modelbest.cn>
											
										
										
											2024-08-10 03:14:13 +08:00
+								        if server_args.skip_tokenizer_init:
 								            self.tokenizer = None
 								        else:
 								            self.tokenizer = get_tokenizer(
 								                server_args.tokenizer_path,
 								                tokenizer_mode=server_args.tokenizer_mode,
 								                trust_remote_code=server_args.trust_remote_code,
 								            )
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								        self.decode_status = {}
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								    async def handle_loop(self):
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								        """The event loop that handles requests"""
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								        while True:
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								            recv_obj = await self.recv_from_router.recv_pyobj()
-												Add e5-mistral embedding model - step 3/3 (#988)


											
										
										
											2024-08-08 16:31:19 -07:00
 								            if isinstance(recv_obj, BatchEmbeddingOut):
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								                # If it is embedding model, no detokenization is needed.
-												Add e5-mistral embedding model - step 3/3 (#988)


											
										
										
											2024-08-08 16:31:19 -07:00
+								                self.send_to_tokenizer.send_pyobj(
 								                    BatchEmbeddingOut(
 								                        rids=recv_obj.rids,
 								                        embeddings=recv_obj.embeddings,
 								                        meta_info=recv_obj.meta_info,
 								                        finished_reason=recv_obj.finished_reason,
 								                    )
 								                )
 								                continue
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								            elif isinstance(recv_obj, UpdateWeightReqOutput):
 								                # If it is a weight update request, no detokenization is needed.
 								                self.send_to_tokenizer.send_pyobj(recv_obj)
 								                continue
 								            elif self.tokenizer is None:
 								                # If the tokenizer is skipped, no detokenization is needed
-												[Feat] Support update weights without restart server (#1157)


											
										
										
											2024-08-20 13:48:24 -07:00
+								                self.send_to_tokenizer.send_pyobj(recv_obj)
 								                continue
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								            assert isinstance(recv_obj, BatchTokenIDOut)
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								            bs = len(recv_obj.rids)
 								            # Initialize decode status
 								            read_ids, surr_ids = [], []
 								            for i in range(bs):
 								                rid = recv_obj.rids[i]
-												Fix jump forward when streaming (#665)


											
										
										
											2024-07-19 16:42:06 -07:00
+								                vid = recv_obj.vids[i]
 								                if rid not in self.decode_status or self.decode_status[rid].vid != vid:
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								                    s = DecodeStatus(
-												Fix jump forward when streaming (#665)


											
										
										
											2024-07-19 16:42:06 -07:00
+								                        vid=vid,
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								                        decoded_text=recv_obj.decoded_texts[i],
 								                        decode_ids=recv_obj.decode_ids[i],
 								                        surr_offset=0,
 								                        read_offset=recv_obj.read_offsets[i],
 								                    )
 								                    self.decode_status[rid] = s
 								                else:
 								                    s = self.decode_status[rid]
 								                    s.decode_ids = recv_obj.decode_ids[i]
 								                read_ids.append(s.decode_ids[s.surr_offset :])
 								                surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
 								            # TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request
-												Decode Incrementally (#517)


											
										
										
											2024-06-12 14:39:12 +08:00
+								            surr_texts = self.tokenizer.batch_decode(
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								                surr_ids,
-												Decode Incrementally (#517)


											
										
										
											2024-06-12 14:39:12 +08:00
+								                skip_special_tokens=recv_obj.skip_special_tokens[0],
 								                spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
 								            )
 								            read_texts = self.tokenizer.batch_decode(
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								                read_ids,
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								                skip_special_tokens=recv_obj.skip_special_tokens[0],
-												Decode Incrementally (#517)


											
										
										
											2024-06-12 14:39:12 +08:00
+								                spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								            )
-												[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)


											
										
										
											2024-08-25 14:46:34 -07:00
+								            # Incremental decoding
-												Decode Incrementally (#517)


											
										
										
											2024-06-12 14:39:12 +08:00
+								            output_strs = []
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								            for i in range(bs):
 								                s = self.decode_status[recv_obj.rids[i]]
-												Decode Incrementally (#517)


											
										
										
											2024-06-12 14:39:12 +08:00
+								                new_text = read_texts[i][len(surr_texts[i]) :]
-												Fix streaming (#600)


											
										
										
											2024-07-07 01:55:58 -07:00
+								                if recv_obj.finished_reason[i] is None:
-												Detokenize incrementally when streaming (#653)


											
										
										
											2024-07-18 17:57:40 -07:00
+								                    # Streaming chunk: update the decode status
 								                    if len(new_text) > 0 and not new_text.endswith("<EFBFBD>"):
 								                        s.decoded_text = s.decoded_text + new_text
 								                        s.surr_offset = s.read_offset
 								                        s.read_offset = len(s.decode_ids)
 								                        new_text = ""
 								                    else:
 								                        new_text = find_printable_text(new_text)
 								                output_strs.append(s.decoded_text + new_text)
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
-												Improve the code style: more comments and remove useless packages (#1139)


											
										
										
											2024-08-17 14:37:52 -07:00
+								                # Trim stop str. TODO(lmzheng): handle the case where multiple stop strs are hit
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								                if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR):
 								                    pos = output_strs[i].find(recv_obj.finished_reason[i].matched)
 								                    if pos != -1:
 								                        output_strs[i] = output_strs[i][:pos]
 								            self.send_to_tokenizer.send_pyobj(
 								                BatchStrOut(
 								                    rids=recv_obj.rids,
-												* fix(detokenizer_manager.py): fix truncated decoded output (#586)

Co-authored-by: hnyls2002 <hnyls2002@gmail.com>
											
										
										
											2024-07-07 05:53:22 +08:00
+								                    output_strs=output_strs,
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								                    meta_info=recv_obj.meta_info,
 								                    finished_reason=recv_obj.finished_reason,
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								                )
-												Fix rid state map leak + Refractor .finished (#505)

Co-authored-by: ZX <zx@lbx.dev>
											
										
										
											2024-06-08 04:20:40 +08:00
+								            )
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
 								def start_detokenizer_process(
 								    server_args: ServerArgs,
 								    port_args: PortArgs,
 								    pipe_writer,
 								):
 								    try:
 								        manager = DetokenizerManager(server_args, port_args)
-												Crash the server when error or OOM happens (#514)


											
										
										
											2024-06-07 19:22:34 -07:00
+								    except Exception:
-												release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>

											
										
										
											2024-01-08 04:37:50 +00:00
+								        pipe_writer.send(get_exception_traceback())
 								        raise
 								    pipe_writer.send("init ok")
 								    loop = asyncio.get_event_loop()
 								    loop.run_until_complete(manager.handle_loop())