Replace enable_flashinfer_mla argument with attention_backend (#5005)

This commit is contained in:
Baizhou Zhang
2025-04-03 02:53:58 -07:00
committed by GitHub
parent 772d2a191d
commit e8999b13b7
8 changed files with 21 additions and 20 deletions

View File

@@ -71,8 +71,6 @@ class FlashInferMLAAttnBackend(AttentionBackend):
self.device = model_runner.device
self.skip_prefill = skip_prefill
global_config.enable_flashinfer_mla = True
# Allocate buffers
global global_workspace_buffer
if global_workspace_buffer is None: