Support Eagle2 for Triton backend (#3466)

This commit is contained in:
Ke Bao
2025-02-10 20:00:42 +08:00
committed by GitHub
parent cddb1cdf8f
commit 2d61132374
5 changed files with 285 additions and 41 deletions

View File

@@ -193,5 +193,34 @@ class TestEAGLEServer(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.20)
class TestEAGLEServerTriton(TestEAGLEServer):
@classmethod
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--speculative-algorithm",
"EAGLE",
"--speculative-draft-model-path",
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
"--speculative-num-steps",
"5",
"--speculative-eagle-topk",
"8",
"--speculative-num-draft-tokens",
"64",
"--mem-fraction-static",
"0.7",
"--attention-backend",
"triton",
# TODO: Support cuda graph
"--disable-cuda-graph",
],
)
if __name__ == "__main__":
unittest.main()