Revert "Replace enable_flashinfer_mla argument with attention_backend" (#5048)

This commit is contained in:
Lianmin Zheng
2025-04-03 13:30:56 -07:00
committed by GitHub
parent b8b6008f47
commit 74885a848b
8 changed files with 20 additions and 21 deletions

View File

@@ -71,6 +71,8 @@ class FlashInferMLAAttnBackend(AttentionBackend):
self.device = model_runner.device
self.skip_prefill = skip_prefill
global_config.enable_flashinfer_mla = True
# Allocate buffers
global global_workspace_buffer
if global_workspace_buffer is None: