diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index 6e90d19cd..4ae31ecc8 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -75,8 +75,8 @@ class DetokenizerManager: self.decode_status = LimitedCapacityDict() - def trim_eos(self, output: Union[str, List[int]], finished_reason, no_eos_trim): - if no_eos_trim: + def trim_eos(self, output: Union[str, List[int]], finished_reason, no_stop_trim): + if no_stop_trim: return output # Trim stop str. TODO(lmzheng): handle the case where multiple stop strs are hit @@ -141,7 +141,7 @@ class DetokenizerManager: self.trim_eos( s.decode_ids[s.surr_offset :], recv_obj.finished_reason[i], - recv_obj.no_eos_trim[i], + recv_obj.no_stop_trim[i], ) ) surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset]) @@ -177,7 +177,7 @@ class DetokenizerManager: self.trim_eos( s.decoded_text + new_text, recv_obj.finished_reason[i], - recv_obj.no_eos_trim[i], + recv_obj.no_stop_trim[i], ) ) diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 9cc847706..80816c28e 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -295,7 +295,7 @@ class BatchTokenIDOut: spaces_between_special_tokens: List[bool] meta_info: List[Dict] finished_reason: List[BaseFinishReason] - no_eos_trim: List[bool] + no_stop_trim: List[bool] @dataclass diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index e59679ffa..42c2a2841 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -885,7 +885,7 @@ class Scheduler: output_read_offsets = [] output_skip_special_tokens = [] output_spaces_between_special_tokens = [] - output_no_eos_trim = [] + output_no_stop_trim = [] else: # embedding or reward model output_embeddings = [] unfinished_indices = [] @@ -917,7 +917,7 @@ class Scheduler: output_spaces_between_special_tokens.append( req.sampling_params.spaces_between_special_tokens ) - output_no_eos_trim.append(req.sampling_params.no_eos_trim) + output_no_stop_trim.append(req.sampling_params.no_stop_trim) meta_info = { "prompt_tokens": len(req.origin_input_ids), @@ -968,7 +968,7 @@ class Scheduler: output_spaces_between_special_tokens, output_meta_info, output_finished_reason, - output_no_eos_trim, + output_no_stop_trim, ) ) else: # embedding or reward model diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index a3638d601..5c8990c69 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -494,7 +494,7 @@ def v1_generate_request( request.logprobs if request.logprobs is not None else 0 ) sampling_params = [] - if isinstance(request.no_eos_trim, list): + if isinstance(request.no_stop_trim, list): num_reqs = len(request.prompt) else: num_reqs = 1 @@ -514,10 +514,10 @@ def v1_generate_request( "json_schema": request.json_schema, "n": request.n, "ignore_eos": request.ignore_eos, - "no_eos_trim": ( - request.no_eos_trim - if not isinstance(request.no_eos_trim, list) - else request.no_eos_trim[i] + "no_stop_trim": ( + request.no_stop_trim + if not isinstance(request.no_stop_trim, list) + else request.no_stop_trim[i] ), } ) diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 4b382240a..1da27af28 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -174,7 +174,7 @@ class CompletionRequest(BaseModel): min_tokens: int = 0 repetition_penalty: Optional[float] = 1.0 stop_token_ids: Optional[List[int]] = Field(default_factory=list) - no_eos_trim: Union[bool, List[bool]] = False + no_stop_trim: Union[bool, List[bool]] = False class CompletionResponseChoice(BaseModel): diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index 6e497ea7b..55ec00fc1 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -40,7 +40,7 @@ class SamplingParams: regex: Optional[str] = None, n: int = 1, json_schema: Optional[str] = None, - no_eos_trim: bool = False, + no_stop_trim: bool = False, ) -> None: self.temperature = temperature self.top_p = top_p @@ -61,7 +61,7 @@ class SamplingParams: self.regex = regex self.n = n self.json_schema = json_schema - self.no_eos_trim = no_eos_trim + self.no_stop_trim = no_stop_trim # Process some special cases if self.temperature < _SAMPLING_EPS: