Add V2-lite model test (#7390)
Co-authored-by: DiweiSun <105627594+DiweiSun@users.noreply.github.com>
This commit is contained in:
@@ -67,7 +67,7 @@ class TestFlashMLAAttnBackend(unittest.TestCase):
|
||||
|
||||
class TestFlashMLAAttnLatency(unittest.TestCase):
|
||||
def test_latency(self):
|
||||
output_throughput = run_bench_one_batch(
|
||||
_, output_throughput, _ = run_bench_one_batch(
|
||||
DEFAULT_MODEL_NAME_FOR_TEST_MLA,
|
||||
[
|
||||
"--attention-backend",
|
||||
|
||||
Reference in New Issue
Block a user