[Feature] modify Runtime to support skip_tokenizer_init (#1088)

Co-authored-by: lzhang <zhanglei@modelbest.cn>
This commit is contained in:
rainred
2024-08-14 15:28:04 +08:00
committed by GitHub
parent c8423ca311
commit 616b59f384

View File

@@ -533,11 +533,18 @@ class Runtime:
prompt: str,
sampling_params: Optional[Dict] = None,
):
json_data = {
"text": prompt,
"sampling_params": sampling_params,
"stream": True,
}
if self.server_args.skip_tokenizer_init:
json_data = {
"input_ids": prompt,
"sampling_params": sampling_params,
"stream": True,
}
else:
json_data = {
"text": prompt,
"sampling_params": sampling_params,
"stream": True,
}
pos = 0
timeout = aiohttp.ClientTimeout(total=3 * 3600)
@@ -549,10 +556,13 @@ class Runtime:
if chunk == "data: [DONE]\n\n":
break
data = json.loads(chunk[5:].strip("\n"))
cur = data["text"][pos:]
if cur:
yield cur
pos += len(cur)
if hasattr(data, "text"):
cur = data["text"][pos:]
if cur:
yield cur
pos += len(cur)
else:
yield data
add_request = async_generate