Support Triton FP8 Gemm can handle hidden_dim not divisible by 16 (#9093)

Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
2025-08-12 21:21:55 -07:00
parent 13c48dcf88
commit 930fe467bd
4 changed files with 332 additions and 7 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -57,6 +57,7 @@ suites = {
        TestFile("quant/test_block_int8.py", 22),
        TestFile("quant/test_fp8_kernel.py", 8),
        TestFile("quant/test_int8_kernel.py", 8),
+        TestFile("quant/test_triton_scaled_mm.py", 8),
        TestFile("quant/test_w8a8_quantization.py", 46),
        TestFile("rl/test_update_weights_from_disk.py", 114),
        TestFile("rl/test_update_weights_from_tensor.py", 48),