fix benchmark (#743)

Co-authored-by: hnyls2002 <hnyls2002@gmail.com> Co-authored-by: Ying Sheng <sqy1415@gmail.com>
2024-07-26 21:26:13 +10:00
parent 01fbb11bb7
commit 768e05d08f
6 changed files with 103 additions and 180 deletions
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -369,7 +369,7 @@ def sample_random_requests(
 ) -> List[Tuple[str, int, int]]:

    input_lens = np.random.randint(
-        int(input_len * range_ratio),
+        max(int(input_len * range_ratio), 1),
        input_len + 1,
        size=num_prompts,
    )
@@ -415,7 +415,7 @@ def sample_random_requests(
            prompt_token_ids = tokenizer(prompt).input_ids
            prompt_len = len(prompt_token_ids)

-            if prompt_len <= input_lens[i]:
+            if prompt_len > input_lens[i]:
                input_ids = prompt_token_ids[: input_lens[i]]
            else:
                ratio = (input_lens[i] + prompt_len - 1) // prompt_len
@@ -935,7 +935,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--random-range-ratio",
        type=float,
-        default=1.0,
+        default=0.0,
        help="Range of sampled ratio of input/output length, "
        "used only for random dataset.",
    )
--- a/python/sglang/global_config.py
+++ b/python/sglang/global_config.py
@@ -17,7 +17,7 @@ class GlobalConfig:

        # Runtime constants: New generation token ratio estimation
        self.init_new_token_ratio = 0.7
-        self.base_min_new_token_ratio = 0.2
+        self.base_min_new_token_ratio = 0.1
        self.new_token_ratio_decay = 0.001
        self.new_token_ratio_recovery = 0.05