From 616b59f384ad13b824fa8bb634444b43967f8c8a Mon Sep 17 00:00:00 2001 From: rainred <107027757+gryffindor-rr@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:28:04 +0800 Subject: [PATCH] [Feature] modify Runtime to support skip_tokenizer_init (#1088) Co-authored-by: lzhang --- python/sglang/srt/server.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 7331425fa..8f735ac0c 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -533,11 +533,18 @@ class Runtime: prompt: str, sampling_params: Optional[Dict] = None, ): - json_data = { - "text": prompt, - "sampling_params": sampling_params, - "stream": True, - } + if self.server_args.skip_tokenizer_init: + json_data = { + "input_ids": prompt, + "sampling_params": sampling_params, + "stream": True, + } + else: + json_data = { + "text": prompt, + "sampling_params": sampling_params, + "stream": True, + } pos = 0 timeout = aiohttp.ClientTimeout(total=3 * 3600) @@ -549,10 +556,13 @@ class Runtime: if chunk == "data: [DONE]\n\n": break data = json.loads(chunk[5:].strip("\n")) - cur = data["text"][pos:] - if cur: - yield cur - pos += len(cur) + if hasattr(data, "text"): + cur = data["text"][pos:] + if cur: + yield cur + pos += len(cur) + else: + yield data add_request = async_generate