Replace enable_flashinfer_mla argument with attention_backend (#5005)
This commit is contained in:
@@ -179,7 +179,7 @@ class ServerArgs:
|
||||
tool_call_parser: Optional[str] = None
|
||||
enable_hierarchical_cache: bool = False
|
||||
hicache_ratio: float = 2.0
|
||||
enable_flashinfer_mla: bool = False
|
||||
enable_flashinfer_mla: bool = False # TODO: remove this argument
|
||||
enable_flashmla: bool = False
|
||||
flashinfer_mla_disable_ragged: bool = False
|
||||
warmups: Optional[str] = None
|
||||
@@ -836,7 +836,7 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--enable-flashinfer-mla",
|
||||
action="store_true",
|
||||
help="Enable FlashInfer MLA optimization",
|
||||
help="Enable FlashInfer MLA optimization. This argument will be deprecated soon! Please use '--attention-backend flashinfer' instead for switching on flashfiner mla!",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-flashmla",
|
||||
|
||||
Reference in New Issue
Block a user