support more optioin about usage in stream mode (#985)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
yichuan~
2024-08-08 17:41:57 +08:00
committed by GitHub
parent 1ac304eeb4
commit 3a79613c28
4 changed files with 96 additions and 28 deletions

View File

@@ -98,10 +98,17 @@ class TestOpenAIServer(unittest.TestCase):
echo=echo,
logprobs=logprobs,
stream=True,
stream_options={"include_usage": True},
)
first = True
for response in generator:
usage = response.usage
if usage is not None:
assert usage.prompt_tokens > 0
assert usage.completion_tokens > 0
assert usage.total_tokens > 0
continue
if logprobs:
assert response.choices[0].logprobs
assert isinstance(response.choices[0].logprobs.tokens[0], str)
@@ -122,12 +129,8 @@ class TestOpenAIServer(unittest.TestCase):
prompt
), f"{response.choices[0].text} and all args {echo} {logprobs} {token_input} {first}"
first = False
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
def run_chat_completion(self, logprobs, parallel_sample_num):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
@@ -179,11 +182,20 @@ class TestOpenAIServer(unittest.TestCase):
logprobs=logprobs is not None and logprobs > 0,
top_logprobs=logprobs,
stream=True,
stream_options={"include_usage": True},
)
is_first = True
for response in generator:
usage = response.usage
if usage is not None:
assert usage.prompt_tokens > 0
assert usage.completion_tokens > 0
assert usage.total_tokens > 0
continue
data = response.choices[0].delta
if is_first:
data.role == "assistant"
is_first = False