Support n in OpenAI API completions (#3446)

Co-authored-by: Shan Yu <shanyu1@g.ucla.edu>
Co-authored-by: Yineng Zhang <me@zhyncs.com>
Co-authored-by: chuyue sun <chuyue@lmsys.us-northcentral1-a.compute.internal>
This commit is contained in:
Chuyue Sun
2025-03-19 22:46:46 -07:00
committed by GitHub
parent df7014a8d2
commit fad86a6863
5 changed files with 114 additions and 8 deletions

View File

@@ -566,13 +566,13 @@ class StreamExecutor:
def _execute_gen(self, expr: SglGen):
sampling_params = self._resolve_sampling_params(expr.sampling_params)
name = expr.name
if not self.stream:
if self.num_api_spec_tokens is None:
comp, meta_info = self.backend.generate(
self,
sampling_params=sampling_params,
)
else:
if self.backend.is_chat_model:
# Speculative execution on models with only chat interface.
@@ -587,8 +587,11 @@ class StreamExecutor:
else: # Speculative execution on models with completion interface
comp, meta_info = self._spec_gen(sampling_params)
self.text_ += comp
if isinstance(comp, list):
self.text_ += comp[0]
else:
assert isinstance(comp, str)
self.text_ += comp
self.variables[name] = comp
self.meta_info[name] = meta_info
@@ -747,6 +750,7 @@ class StreamExecutor:
for item in [
"max_new_tokens",
"min_new_tokens",
"n",
"stop",
"stop_token_ids",
"temperature",