Replace enable_flashinfer_mla argument with attention_backend (#5005)

This commit is contained in:
Baizhou Zhang
2025-04-03 02:53:58 -07:00
committed by GitHub
parent 772d2a191d
commit e8999b13b7
8 changed files with 21 additions and 20 deletions

View File

@@ -179,7 +179,7 @@ class ServerArgs:
tool_call_parser: Optional[str] = None
enable_hierarchical_cache: bool = False
hicache_ratio: float = 2.0
enable_flashinfer_mla: bool = False
enable_flashinfer_mla: bool = False # TODO: remove this argument
enable_flashmla: bool = False
flashinfer_mla_disable_ragged: bool = False
warmups: Optional[str] = None
@@ -836,7 +836,7 @@ class ServerArgs:
parser.add_argument(
"--enable-flashinfer-mla",
action="store_true",
help="Enable FlashInfer MLA optimization",
help="Enable FlashInfer MLA optimization. This argument will be deprecated soon! Please use '--attention-backend flashinfer' instead for switching on flashfiner mla!",
)
parser.add_argument(
"--enable-flashmla",