Reduce computation and communication in DP attention (#4521)

This commit is contained in:
Cheng Wan
2025-03-18 16:41:36 -04:00
committed by GitHub
parent 9e0186f352
commit 3196999f63
5 changed files with 70 additions and 80 deletions

View File

@@ -11,7 +11,7 @@ from sglang.test.test_utils import (
)
class TestDPAttention(unittest.TestCase):
class TestDPAttentionDP2TP2(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
@@ -59,7 +59,3 @@ class TestDPAttention(unittest.TestCase):
metrics = run_eval(args)
print(f"{metrics=}")
self.assertGreater(metrics["score"], 0.8)
if __name__ == "__main__":
unittest.main()