Allow to disable batch decoding. (#11944)
This commit is contained in:
@@ -108,6 +108,7 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
|
||||
)
|
||||
|
||||
self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss"
|
||||
self.disable_tokenizer_batch_decode = server_args.disable_tokenizer_batch_decode
|
||||
|
||||
def event_loop(self):
|
||||
"""The event loop that handles requests"""
|
||||
@@ -176,17 +177,39 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
|
||||
)
|
||||
surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])
|
||||
|
||||
# TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request
|
||||
surr_texts = self.tokenizer.batch_decode(
|
||||
surr_ids,
|
||||
skip_special_tokens=recv_obj.skip_special_tokens[0],
|
||||
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
|
||||
)
|
||||
read_texts = self.tokenizer.batch_decode(
|
||||
read_ids,
|
||||
skip_special_tokens=recv_obj.skip_special_tokens[0],
|
||||
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
|
||||
)
|
||||
# TODO(lmzheng): better handle skip_special_tokens/spaces_between_special_tokens per request
|
||||
if self.disable_tokenizer_batch_decode:
|
||||
surr_texts = [
|
||||
self.tokenizer.decode(
|
||||
surr, skip_special_tokens=skip, spaces_between_special_tokens=space
|
||||
)
|
||||
for surr, skip, space in zip(
|
||||
surr_ids,
|
||||
recv_obj.skip_special_tokens,
|
||||
recv_obj.spaces_between_special_tokens,
|
||||
)
|
||||
]
|
||||
read_texts = [
|
||||
self.tokenizer.decode(
|
||||
read, skip_special_tokens=skip, spaces_between_special_tokens=space
|
||||
)
|
||||
for read, skip, space in zip(
|
||||
read_ids,
|
||||
recv_obj.skip_special_tokens,
|
||||
recv_obj.spaces_between_special_tokens,
|
||||
)
|
||||
]
|
||||
else:
|
||||
surr_texts = self.tokenizer.batch_decode(
|
||||
surr_ids,
|
||||
skip_special_tokens=recv_obj.skip_special_tokens[0],
|
||||
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
|
||||
)
|
||||
read_texts = self.tokenizer.batch_decode(
|
||||
read_ids,
|
||||
skip_special_tokens=recv_obj.skip_special_tokens[0],
|
||||
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
|
||||
)
|
||||
|
||||
# Incremental decoding
|
||||
output_strs = []
|
||||
|
||||
@@ -433,6 +433,7 @@ class ServerArgs:
|
||||
enable_symm_mem: bool = False
|
||||
disable_flashinfer_cutlass_moe_fp4_allgather: bool = False
|
||||
enable_tokenizer_batch_encode: bool = False
|
||||
disable_tokenizer_batch_decode: bool = False
|
||||
disable_outlines_disk_cache: bool = False
|
||||
disable_custom_all_reduce: bool = False
|
||||
enable_mscclpp: bool = False
|
||||
@@ -2898,6 +2899,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable batch tokenization for improved performance when processing multiple text inputs. Do not use with image inputs, pre-tokenized input_ids, or input_embeds.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-tokenizer-batch-decode",
|
||||
action="store_true",
|
||||
help="Disable batch decoding when decoding multiple completions.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-outlines-disk-cache",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user