Support incremental streaming of logprob/token_ids between scheduler and detokenizer (#6225)

Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-05-12 14:33:38 -07:00
committed by GitHub
parent f1c896007a
commit d18c6b3358
9 changed files with 257 additions and 86 deletions

View File

@@ -41,7 +41,7 @@ class BaseGrammarObject:
raise NotImplementedError()
def is_terminated(self):
raise NotImplementedError()
return False
def allocate_vocab_mask(
self, vocab_size: int, batch_size: int, device