diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 8972d42bc..0556f852a 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -941,7 +941,13 @@ def v1_chat_generate_request( ) if assistant_prefix: - prompt_ids += tokenizer_manager.tokenizer.encode(assistant_prefix) + encoded = tokenizer_manager.tokenizer.encode(assistant_prefix) + if ( + encoded + and encoded[0] == tokenizer_manager.tokenizer.bos_token_id + ): + encoded = encoded[1:] + prompt_ids += encoded stop = request.stop image_data = None modalities = []