fix: kimi k2 xgrammar crash (#8367)
Co-authored-by: cicirori <32845984+cicirori@users.noreply.github.com> Co-authored-by: gongwei-130 <56567052+gongwei-130@users.noreply.github.com>
This commit is contained in:
@@ -431,6 +431,7 @@ class Req:
|
|||||||
bootstrap_port: Optional[int] = None,
|
bootstrap_port: Optional[int] = None,
|
||||||
bootstrap_room: Optional[int] = None,
|
bootstrap_room: Optional[int] = None,
|
||||||
data_parallel_rank: Optional[int] = None,
|
data_parallel_rank: Optional[int] = None,
|
||||||
|
vocab_size: Optional[int] = None,
|
||||||
):
|
):
|
||||||
# Input and output info
|
# Input and output info
|
||||||
self.rid = rid
|
self.rid = rid
|
||||||
@@ -480,6 +481,7 @@ class Req:
|
|||||||
self.to_abort_message: str = None
|
self.to_abort_message: str = None
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.eos_token_ids = eos_token_ids
|
self.eos_token_ids = eos_token_ids
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
|
||||||
# For incremental decoding
|
# For incremental decoding
|
||||||
# ----- | --------- read_ids -------|
|
# ----- | --------- read_ids -------|
|
||||||
@@ -713,6 +715,14 @@ class Req:
|
|||||||
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
|
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if last_token_id > self.vocab_size or last_token_id < 0:
|
||||||
|
if self.sampling_params.stop_token_ids:
|
||||||
|
self.output_ids[-1] = next(iter(self.sampling_params.stop_token_ids))
|
||||||
|
if self.eos_token_ids:
|
||||||
|
self.output_ids[-1] = next(iter(self.eos_token_ids))
|
||||||
|
self.finished_reason = FINISH_MATCHED_STR(matched="NaN happened")
|
||||||
|
return
|
||||||
|
|
||||||
# Check stop strings
|
# Check stop strings
|
||||||
if len(self.sampling_params.stop_strs) > 0:
|
if len(self.sampling_params.stop_strs) > 0:
|
||||||
tail_str = self.tokenizer.decode(
|
tail_str = self.tokenizer.decode(
|
||||||
|
|||||||
@@ -1129,6 +1129,7 @@ class Scheduler(
|
|||||||
bootstrap_port=recv_req.bootstrap_port,
|
bootstrap_port=recv_req.bootstrap_port,
|
||||||
bootstrap_room=recv_req.bootstrap_room,
|
bootstrap_room=recv_req.bootstrap_room,
|
||||||
data_parallel_rank=recv_req.data_parallel_rank,
|
data_parallel_rank=recv_req.data_parallel_rank,
|
||||||
|
vocab_size=self.model_config.vocab_size,
|
||||||
)
|
)
|
||||||
req.tokenizer = self.tokenizer
|
req.tokenizer = self.tokenizer
|
||||||
|
|
||||||
@@ -1395,8 +1396,10 @@ class Scheduler(
|
|||||||
logger.info(f)
|
logger.info(f)
|
||||||
|
|
||||||
if self.enable_metrics:
|
if self.enable_metrics:
|
||||||
cache_hit_rate = adder.log_hit_tokens / (
|
total_tokens = adder.log_input_tokens + adder.log_hit_tokens
|
||||||
adder.log_input_tokens + adder.log_hit_tokens
|
|
||||||
|
cache_hit_rate = (
|
||||||
|
adder.log_hit_tokens / total_tokens if total_tokens > 0 else 0.0
|
||||||
)
|
)
|
||||||
self.stats.num_running_reqs = running_bs
|
self.stats.num_running_reqs = running_bs
|
||||||
self.stats.num_used_tokens = num_used
|
self.stats.num_used_tokens = num_used
|
||||||
|
|||||||
Reference in New Issue
Block a user