Support serving DeepSeek-R1-Channel-INT8 with 32 L40S. (#4418)

This commit is contained in:
Wenbo Yang
2025-03-17 15:03:43 +08:00
committed by GitHub
parent 0f52fb55ec
commit 75b656488a
7 changed files with 489 additions and 11 deletions

View File

@@ -37,7 +37,7 @@ class TestInt8Gemm(unittest.TestCase):
print(f"M={M}, N={N}, K={K}, with_bias={with_bias}, out_dtype={out_dtype}: OK")
def test_accuracy(self):
Ms = [1, 128, 512, 1024, 4096, 8192]
Ms = [1, 16, 32, 64, 128, 512, 1024, 4096, 8192]
Ns = [16, 128, 512, 1024, 4096, 8192, 16384]
Ks = [512, 1024, 4096, 8192, 16384]
bias_opts = [True, False]