Update the mixtral to use the better FusedMoE layer (#1081)

This commit is contained in:
Lianmin Zheng
2024-08-13 15:44:25 -07:00
committed by GitHub
parent 312e849255
commit ad3e4f1619
4 changed files with 57 additions and 258 deletions

View File

@@ -84,7 +84,7 @@ class TestServingThroughput(unittest.TestCase):
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] > 950
assert res["output_throughput"] > 940
def test_default_with_chunked_prefill(self):
res = self.run_test(