[Eagle] Remove the greedy branch and some redundant code (#4363)

Co-authored-by: Sehoon Kim <sehoon@x.ai>
This commit is contained in:
Ying Sheng
2025-03-16 02:48:55 -07:00
committed by GitHub
parent 9971dc2283
commit 1b859295f4
14 changed files with 380 additions and 672 deletions

View File

@@ -67,7 +67,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
"--enable-torch-compile",
"--disable-cuda-graph",
"--cuda-graph-max-bs",
"2",
"4",
"--enable-flashinfer-mla",
"--flashinfer-mla-disable-ragged",
]
@@ -109,7 +109,7 @@ class TestFlashinferMLAMTP(unittest.TestCase):
other_args.extend(
[
"--cuda-graph-max-bs",
"2",
"4",
"--disable-radix",
"--enable-torch-compile",
"--torch-compile-max-bs",