[Test] Add basic matched stop for beta eagle (#11833)

2025-10-20 01:17:00 +08:00
parent 48738af7f9
commit 7a020e0f3b
4 changed files with 201 additions and 222 deletions
--- a/test/srt/openai_server/validation/test_matched_stop.py
+++ b/test/srt/openai_server/validation/test_matched_stop.py
@@ -1,10 +1,8 @@
-import json
 import unittest

-import requests
-
 from sglang.srt.sampling.sampling_params import MAX_LEN, get_max_seq_length
 from sglang.srt.utils import kill_process_tree
+from sglang.test.kit_matched_stop import MatchedStopMixin
 from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_TEST,
@@ -12,15 +10,8 @@ from sglang.test.test_utils import (
    popen_launch_server,
 )

-MANY_NEW_TOKENS_PROMPT = """
-Please write an extremely detailed and vivid fantasy story, set in a world full of intricate magic systems, political intrigue, and complex characters.
-Ensure that you thoroughly describe every scene, character's motivations, and the environment. Include long, engaging dialogues and elaborate on the inner thoughts of the characters.
-Each section should be as comprehensive as possible to create a rich and immersive experience for the reader.
-The story should span multiple events, challenges, and character developments over time. Aim to make the story at least 3,000 words long.
-"""

-
-class TestMatchedStop(CustomTestCase):
+class TestMatchedStop(CustomTestCase, MatchedStopMixin):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
@@ -36,138 +27,6 @@ class TestMatchedStop(CustomTestCase):
    def tearDownClass(cls):
        kill_process_tree(cls.process.pid)

-    def run_completions_generation(
-        self,
-        prompt=MANY_NEW_TOKENS_PROMPT,
-        max_tokens=1,
-        stop=None,
-        stop_regex=None,
-        finish_reason=None,
-        matched_stop=None,
-    ):
-        payload = {
-            "prompt": prompt,
-            "model": self.model,
-            "temperature": 0,
-            "top_p": 1,
-            "max_tokens": max_tokens,
-        }
-
-        if stop is not None:
-            payload["stop"] = stop
-
-        if stop_regex is not None:
-            payload["stop_regex"] = stop_regex
-
-        response_completions = requests.post(
-            self.base_url + "/v1/completions",
-            json=payload,
-        )
-        print(json.dumps(response_completions.json()))
-        print("=" * 100)
-
-        assert (
-            response_completions.json()["choices"][0]["finish_reason"] == finish_reason
-        )
-        assert response_completions.json()["choices"][0]["matched_stop"] == matched_stop
-
-    def run_chat_completions_generation(
-        self,
-        prompt=MANY_NEW_TOKENS_PROMPT,
-        max_tokens=1,
-        stop=None,
-        stop_regex=None,
-        finish_reason=None,
-        matched_stop=None,
-    ):
-        chat_payload = {
-            "model": self.model,
-            "messages": [
-                {"role": "system", "content": "You are a helpful AI assistant"},
-                {"role": "user", "content": prompt},
-            ],
-            "temperature": 0,
-            "top_p": 1,
-            "max_tokens": max_tokens,
-        }
-
-        if stop is not None:
-            chat_payload["stop"] = stop
-
-        if stop_regex is not None:
-            chat_payload["stop_regex"] = stop_regex
-
-        response_chat = requests.post(
-            self.base_url + "/v1/chat/completions",
-            json=chat_payload,
-        )
-        print(json.dumps(response_chat.json()))
-        print("=" * 100)
-
-        assert response_chat.json()["choices"][0]["finish_reason"] == finish_reason
-        assert response_chat.json()["choices"][0]["matched_stop"] == matched_stop
-
-    def test_finish_stop_str(self):
-        self.run_completions_generation(
-            max_tokens=1000, stop="\n", finish_reason="stop", matched_stop="\n"
-        )
-        self.run_chat_completions_generation(
-            max_tokens=1000, stop="\n", finish_reason="stop", matched_stop="\n"
-        )
-
-    def test_finish_stop_regex_str(self):
-        STOP_REGEX_STR = r"and|or"
-        self.run_completions_generation(
-            max_tokens=1000,
-            stop_regex=STOP_REGEX_STR,
-            finish_reason="stop",
-            matched_stop=STOP_REGEX_STR,
-        )
-        self.run_chat_completions_generation(
-            max_tokens=1000,
-            stop_regex=STOP_REGEX_STR,
-            finish_reason="stop",
-            matched_stop=STOP_REGEX_STR,
-        )
-
-        # Match a complete sentence
-        STOP_REGEX_STR_SENTENCE = r"[.!?]\s*$"
-        self.run_chat_completions_generation(
-            max_tokens=1000,
-            stop_regex=STOP_REGEX_STR_SENTENCE,
-            finish_reason="stop",
-            matched_stop=STOP_REGEX_STR_SENTENCE,
-        )
-
-    def test_finish_stop_eos(self):
-        llama_format_prompt = """
-        <|begin_of_text|><|start_header_id|>system<|end_header_id|>
-        You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
-
-        What is 2 + 2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-        """
-        eos_token_id = 128009
-        self.run_completions_generation(
-            prompt=llama_format_prompt,
-            max_tokens=1000,
-            finish_reason="stop",
-            matched_stop=eos_token_id,
-        )
-        self.run_chat_completions_generation(
-            prompt="What is 2 + 2?",
-            max_tokens=1000,
-            finish_reason="stop",
-            matched_stop=eos_token_id,
-        )
-
-    def test_finish_length(self):
-        self.run_completions_generation(
-            max_tokens=5, finish_reason="length", matched_stop=None
-        )
-        self.run_chat_completions_generation(
-            max_tokens=5, finish_reason="length", matched_stop=None
-        )
-

 class TestRegexPatternMaxLength(unittest.TestCase):
    @classmethod