Enable optional FP32 compute for LM Head (#10729)
Thanks to MiniMax Team and Chenyang Zhao's support.
This commit is contained in:
@@ -59,6 +59,7 @@ suites = {
|
||||
TestFile("quant/test_int8_kernel.py", 8),
|
||||
TestFile("quant/test_triton_scaled_mm.py", 8),
|
||||
TestFile("quant/test_w8a8_quantization.py", 46),
|
||||
TestFile("rl/test_fp32_lm_head.py", 30),
|
||||
TestFile("rl/test_update_weights_from_disk.py", 114),
|
||||
TestFile("rl/test_update_weights_from_tensor.py", 48),
|
||||
TestFile("test_abort.py", 51),
|
||||
|
||||
Reference in New Issue
Block a user