Support Triton FP8 Gemm can handle hidden_dim not divisible by 16 (#9093)

Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
Stefan He
2025-08-12 21:21:55 -07:00
committed by GitHub
parent 13c48dcf88
commit 930fe467bd
4 changed files with 332 additions and 7 deletions

View File

@@ -57,6 +57,7 @@ suites = {
TestFile("quant/test_block_int8.py", 22),
TestFile("quant/test_fp8_kernel.py", 8),
TestFile("quant/test_int8_kernel.py", 8),
TestFile("quant/test_triton_scaled_mm.py", 8),
TestFile("quant/test_w8a8_quantization.py", 46),
TestFile("rl/test_update_weights_from_disk.py", 114),
TestFile("rl/test_update_weights_from_tensor.py", 48),