Fix prompt len in parallel sampling (#928)

2024-08-05 15:56:08 +08:00
parent 399cad91f3
commit fd7926e46e
2 changed files with 11 additions and 15 deletions
--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
@@ -45,11 +45,6 @@ class TestOpenAIServer(unittest.TestCase):
            prompt_arg = prompt_input
            num_choices = 1

-        if parallel_sample_num:
-            # FIXME: This is wrong. We should not count the prompt tokens multiple times for
-            # parallel sampling.
-            num_prompt_tokens *= parallel_sample_num
-
        response = client.completions.create(
            model=self.model,
            prompt=prompt_arg,