[Feature] modify Runtime to support skip_tokenizer_init (#1088)
Co-authored-by: lzhang <zhanglei@modelbest.cn>
This commit is contained in:
@@ -533,11 +533,18 @@ class Runtime:
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
sampling_params: Optional[Dict] = None,
|
sampling_params: Optional[Dict] = None,
|
||||||
):
|
):
|
||||||
json_data = {
|
if self.server_args.skip_tokenizer_init:
|
||||||
"text": prompt,
|
json_data = {
|
||||||
"sampling_params": sampling_params,
|
"input_ids": prompt,
|
||||||
"stream": True,
|
"sampling_params": sampling_params,
|
||||||
}
|
"stream": True,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
json_data = {
|
||||||
|
"text": prompt,
|
||||||
|
"sampling_params": sampling_params,
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
pos = 0
|
pos = 0
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=3 * 3600)
|
timeout = aiohttp.ClientTimeout(total=3 * 3600)
|
||||||
@@ -549,10 +556,13 @@ class Runtime:
|
|||||||
if chunk == "data: [DONE]\n\n":
|
if chunk == "data: [DONE]\n\n":
|
||||||
break
|
break
|
||||||
data = json.loads(chunk[5:].strip("\n"))
|
data = json.loads(chunk[5:].strip("\n"))
|
||||||
cur = data["text"][pos:]
|
if hasattr(data, "text"):
|
||||||
if cur:
|
cur = data["text"][pos:]
|
||||||
yield cur
|
if cur:
|
||||||
pos += len(cur)
|
yield cur
|
||||||
|
pos += len(cur)
|
||||||
|
else:
|
||||||
|
yield data
|
||||||
|
|
||||||
add_request = async_generate
|
add_request = async_generate
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user