Support n in OpenAI API completions (#3446)

Co-authored-by: Shan Yu <shanyu1@g.ucla.edu> Co-authored-by: Yineng Zhang <me@zhyncs.com> Co-authored-by: chuyue sun <chuyue@lmsys.us-northcentral1-a.compute.internal>
2025-03-19 22:46:46 -07:00
parent df7014a8d2
commit fad86a6863
5 changed files with 114 additions and 8 deletions
--- a/python/sglang/lang/interpreter.py
+++ b/python/sglang/lang/interpreter.py
@@ -566,13 +566,13 @@ class StreamExecutor:
    def _execute_gen(self, expr: SglGen):
        sampling_params = self._resolve_sampling_params(expr.sampling_params)
        name = expr.name
-
        if not self.stream:
            if self.num_api_spec_tokens is None:
                comp, meta_info = self.backend.generate(
                    self,
                    sampling_params=sampling_params,
                )
+
            else:
                if self.backend.is_chat_model:
                    # Speculative execution on models with only chat interface.
@@ -587,8 +587,11 @@ class StreamExecutor:

                else:  # Speculative execution on models with completion interface
                    comp, meta_info = self._spec_gen(sampling_params)
-
-            self.text_ += comp
+            if isinstance(comp, list):
+                self.text_ += comp[0]
+            else:
+                assert isinstance(comp, str)
+                self.text_ += comp

            self.variables[name] = comp
            self.meta_info[name] = meta_info
@@ -747,6 +750,7 @@ class StreamExecutor:
        for item in [
            "max_new_tokens",
            "min_new_tokens",
+            "n",
            "stop",
            "stop_token_ids",
            "temperature",