[Eagle] Remove the greedy branch and some redundant code (#4363)

Co-authored-by: Sehoon Kim <sehoon@x.ai>
This commit is contained in:
Ying Sheng
2025-03-16 02:48:55 -07:00
committed by GitHub
parent 9971dc2283
commit 1b859295f4
14 changed files with 380 additions and 672 deletions

View File

@@ -122,8 +122,8 @@ class TestEAGLEEngine(unittest.TestCase):
def _test_acc_length(self, engine):
prompt = [
"Human: Give me a fully functional FastAPI server. Show the python code.\n\nAssistant:"
] * 5
"Human: Give me a fully functional FastAPI server. Show the python code.\n\nAssistant:",
] * 5 # test batched generation
sampling_params = {"temperature": 0, "max_new_tokens": 512}
output = engine.generate(prompt, sampling_params)
output = output[0]

View File

@@ -67,7 +67,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
"--enable-torch-compile",
"--disable-cuda-graph",
"--cuda-graph-max-bs",
"2",
"4",
"--enable-flashinfer-mla",
"--flashinfer-mla-disable-ragged",
]
@@ -109,7 +109,7 @@ class TestFlashinferMLAMTP(unittest.TestCase):
other_args.extend(
[
"--cuda-graph-max-bs",
"2",
"4",
"--disable-radix",
"--enable-torch-compile",
"--torch-compile-max-bs",