feat: support flashinfer mla with prefix cache (#3643)

This commit is contained in:
Yineng Zhang
2025-02-18 02:06:43 +08:00
committed by GitHub
parent c38f3aed24
commit 714f3e6362
4 changed files with 107 additions and 31 deletions

View File

@@ -177,6 +177,7 @@ class ModelRunner:
"enable_ep_moe": server_args.enable_ep_moe,
"device": server_args.device,
"enable_flashinfer_mla": server_args.enable_flashinfer_mla,
"disable_radix_cache": server_args.disable_radix_cache,
}
)