[Revision] Replace enable_flashinfer_mla argument with attention_backend (#5052)

This commit is contained in:
Baizhou Zhang
2025-04-05 01:23:02 -07:00
committed by GitHub
parent ca8d02abd5
commit efbae697b3
9 changed files with 92 additions and 82 deletions

View File

@@ -71,8 +71,6 @@ class FlashInferMLAAttnBackend(AttentionBackend):
self.device = model_runner.device
self.skip_prefill = skip_prefill
global_config.enable_flashinfer_mla = True
# Allocate buffers
global global_workspace_buffer
if global_workspace_buffer is None: