Support Llama4 fp8 inference (#5194)

Co-authored-by: laixinn <xielx@shanghaitech.edu.cn>
Co-authored-by: sleepcoo <sleepcoo@gmail.com>
Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
HandH1998
2025-04-09 20:14:34 +08:00
committed by GitHub
parent 86a876d883
commit 4065248214
14 changed files with 537 additions and 106 deletions

View File

@@ -76,6 +76,7 @@ suites = {
TestFile("test_create_kvindices.py", 2),
TestFile("test_hicache.py", 60),
TestFile("test_hicache_mla.py", 90),
TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
],
"per-commit-2-gpu": [
TestFile("models/lora/test_lora_tp.py", 300),