Revert "Replace enable_flashinfer_mla argument with attention_backend" (#5048)
This commit is contained in:
@@ -71,6 +71,8 @@ class FlashInferMLAAttnBackend(AttentionBackend):
|
||||
self.device = model_runner.device
|
||||
self.skip_prefill = skip_prefill
|
||||
|
||||
global_config.enable_flashinfer_mla = True
|
||||
|
||||
# Allocate buffers
|
||||
global global_workspace_buffer
|
||||
if global_workspace_buffer is None:
|
||||
|
||||
Reference in New Issue
Block a user