From d3fc86a43e2287e0446a4b3c9acf1300611f1f85 Mon Sep 17 00:00:00 2001 From: Haotian Liu <6631389+haotian-liu@users.noreply.github.com> Date: Wed, 24 Jan 2024 14:23:27 -0600 Subject: [PATCH] Improve Chinese character streaming when the last char is half Chinese word. (#95) --- python/sglang/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/utils.py b/python/sglang/utils.py index ac103415e..dc4ea45b7 100644 --- a/python/sglang/utils.py +++ b/python/sglang/utils.py @@ -154,6 +154,9 @@ def find_printable_text(text): # If the last token is a CJK character, we print the characters. elif len(text) > 0 and _is_chinese_char(ord(text[-1])): return text + # Otherwise if the penultimate token is a CJK character, we print the characters except for the last one. + elif len(text) > 1 and _is_chinese_char(ord(text[-2])): + return text[:-1] # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words, # which may change with the subsequent token -- there are probably smarter ways to do this!) else: