From 3c79ad35cae8c7883b2d0d9f067b46804c24f544 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Dec 2024 23:36:28 -0800 Subject: [PATCH] [Fix] Fix the padded hash value for image tokens (#2309) --- python/sglang/srt/managers/scheduler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 16e7691c1..153d6f6f5 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -1170,6 +1170,14 @@ class Scheduler: + 1 : len(req.fill_ids) - req.last_update_decode_tokens ] + + # Clip the padded hash values from image tokens. + # Otherwise, it will lead to detokenization errors. + input_token_ids = [ + x if x < self.model_config.vocab_size - 1 else 0 + for x in input_token_ids + ] + req.input_token_logprobs = list(zip(input_token_logprobs, input_token_ids)) if (