Improve test cases for eagle infer (#7173)

2025-06-13 22:25:13 -07:00
parent 50876abc47
commit ba589b88fc
3 changed files with 44 additions and 32 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -31,8 +31,8 @@ suites = {
        TestFile("test_block_int8.py", 22),
        TestFile("test_create_kvindices.py", 2),
        TestFile("test_chunked_prefill.py", 313),
-        TestFile("test_eagle_infer_a.py", 300),
+        TestFile("test_eagle_infer_a.py", 370),
-        TestFile("test_eagle_infer_b.py", 300),
+        TestFile("test_eagle_infer_b.py", 270),
        TestFile("test_ebnf_constrained.py", 108),
        TestFile("test_enable_thinking.py", 70),
        TestFile("test_embedding_openai_server.py", 141),
--- a/test/srt/test_eagle_infer_a.py
+++ b/test/srt/test_eagle_infer_a.py
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
            output["meta_info"]["completion_tokens"]
            / output["meta_info"]["e2e_latency"]
        )
-        print(f"{acc_length=}")
+        print(f"{acc_length=:.4f}, {speed=}")
        if engine.server_args.model_path == DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST:
            self.assertGreater(acc_length, 3.6)
--- a/test/srt/test_eagle_infer_b.py
+++ b/test/srt/test_eagle_infer_b.py
@@ -10,7 +10,6 @@ from types import SimpleNamespace
 import numpy as np
 import requests
 import torch
 from sglang.srt.utils import kill_process_tree
 from sglang.test.few_shot_gsm8k import run_eval
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
    run_logprob_check,
 )
 torch_dtype = torch.float16
 prefill_tolerance = 5e-2
 decode_tolerance: float = 5e-2
 class TestEAGLEServer(CustomTestCase):
    PROMPTS = [
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
        """Test the output logprobs are close to the input logprobs if we run a prefill again."""
        def run_generate(
-            prompt, return_logprob=False, max_new_tokens=512, logprob_start_len=-1
+            prompt,
            return_logprob=False,
            max_new_tokens=512,
            logprob_start_len=-1,
            temperature=1.0,
        ):
            if isinstance(prompt, str):
@@ -215,20 +214,27 @@ class TestEAGLEServer(CustomTestCase):
                json={
                    **prompt_kwargs,
                    "sampling_params": {
-                        "temperature": 1.0,
+                        "temperature": temperature,
                        "max_new_tokens": max_new_tokens,
                        "ignore_eos": True,
                    },
                    "return_logprob": return_logprob,
                    "return_text_in_logprobs": True,
                    "logprob_start_len": logprob_start_len,
                    "temp_scaled_logprobs": True,
                },
            )
            return response.json()
        prompt = "I have a very good idea on how to"
-        gen = run_generate(prompt, return_logprob=True, logprob_start_len=0)
+        for temperature in [1.0]:
            gen = run_generate(
                prompt,
                return_logprob=True,
                logprob_start_len=0,
                temperature=temperature,
            )
            output_logprobs = np.array(
                [x[0] for x in gen["meta_info"]["output_token_logprobs"]]
            )
@@ -239,12 +245,18 @@ class TestEAGLEServer(CustomTestCase):
            new_prompt = input_tokens + output_tokens
            score = run_generate(
-            new_prompt, return_logprob=True, logprob_start_len=0, max_new_tokens=0
+                new_prompt,
                return_logprob=True,
                logprob_start_len=0,
                max_new_tokens=0,
                temperature=temperature,
            )
            output_logprobs_score = np.array(
                [
                    x[0]
-                for x in score["meta_info"]["input_token_logprobs"][num_prompts_tokens:]
+                    for x in score["meta_info"]["input_token_logprobs"][
                        num_prompts_tokens:
                    ]
                ]
            )
@@ -253,7 +265,7 @@ class TestEAGLEServer(CustomTestCase):
            diff = np.abs(output_logprobs - output_logprobs_score)
            max_diff = np.max(diff)
-        self.assertLess(max_diff, 0.25)
+            self.assertLess(max_diff, 0.255)
    def test_logprob_mixed(self):
        args = []