Support Triton FP8 Gemm can handle hidden_dim not divisible by 16 (#9093)
Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
@@ -57,6 +57,7 @@ suites = {
|
||||
TestFile("quant/test_block_int8.py", 22),
|
||||
TestFile("quant/test_fp8_kernel.py", 8),
|
||||
TestFile("quant/test_int8_kernel.py", 8),
|
||||
TestFile("quant/test_triton_scaled_mm.py", 8),
|
||||
TestFile("quant/test_w8a8_quantization.py", 46),
|
||||
TestFile("rl/test_update_weights_from_disk.py", 114),
|
||||
TestFile("rl/test_update_weights_from_tensor.py", 48),
|
||||
|
||||
Reference in New Issue
Block a user