Support n in OpenAI API completions (#3446)
Co-authored-by: Shan Yu <shanyu1@g.ucla.edu> Co-authored-by: Yineng Zhang <me@zhyncs.com> Co-authored-by: chuyue sun <chuyue@lmsys.us-northcentral1-a.compute.internal>
This commit is contained in:
@@ -566,13 +566,13 @@ class StreamExecutor:
|
||||
def _execute_gen(self, expr: SglGen):
|
||||
sampling_params = self._resolve_sampling_params(expr.sampling_params)
|
||||
name = expr.name
|
||||
|
||||
if not self.stream:
|
||||
if self.num_api_spec_tokens is None:
|
||||
comp, meta_info = self.backend.generate(
|
||||
self,
|
||||
sampling_params=sampling_params,
|
||||
)
|
||||
|
||||
else:
|
||||
if self.backend.is_chat_model:
|
||||
# Speculative execution on models with only chat interface.
|
||||
@@ -587,8 +587,11 @@ class StreamExecutor:
|
||||
|
||||
else: # Speculative execution on models with completion interface
|
||||
comp, meta_info = self._spec_gen(sampling_params)
|
||||
|
||||
self.text_ += comp
|
||||
if isinstance(comp, list):
|
||||
self.text_ += comp[0]
|
||||
else:
|
||||
assert isinstance(comp, str)
|
||||
self.text_ += comp
|
||||
|
||||
self.variables[name] = comp
|
||||
self.meta_info[name] = meta_info
|
||||
@@ -747,6 +750,7 @@ class StreamExecutor:
|
||||
for item in [
|
||||
"max_new_tokens",
|
||||
"min_new_tokens",
|
||||
"n",
|
||||
"stop",
|
||||
"stop_token_ids",
|
||||
"temperature",
|
||||
|
||||
Reference in New Issue
Block a user