Support Llama4 fp8 inference (#5194)
Co-authored-by: laixinn <xielx@shanghaitech.edu.cn> Co-authored-by: sleepcoo <sleepcoo@gmail.com> Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
@@ -76,6 +76,7 @@ suites = {
|
||||
TestFile("test_create_kvindices.py", 2),
|
||||
TestFile("test_hicache.py", 60),
|
||||
TestFile("test_hicache_mla.py", 90),
|
||||
TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
|
||||
],
|
||||
"per-commit-2-gpu": [
|
||||
TestFile("models/lora/test_lora_tp.py", 300),
|
||||
|
||||
Reference in New Issue
Block a user