Deprecate disable-mla (#5481)

This commit is contained in:
Baizhou Zhang
2025-04-17 01:43:14 -07:00
committed by GitHub
parent 81c891111f
commit 4fb05583ef
9 changed files with 188 additions and 575 deletions

View File

@@ -299,9 +299,7 @@ class FlashAttentionBackend(AttentionBackend):
self.kv_cache_dtype = model_runner.kv_cache_dtype
self.kv_cache_dtype_str = model_runner.server_args.kv_cache_dtype
self.page_size = model_runner.page_size
self.use_mla = (
model_runner.model_config.attention_arch == AttentionArch.MLA
) and (not global_server_args_dict["disable_mla"])
self.use_mla = model_runner.model_config.attention_arch == AttentionArch.MLA
self.skip_prefill = skip_prefill
self.topk = topk