Deprecate --disable-flashinfer and introduce --attention-backend (#1380)

This commit is contained in:
Lianmin Zheng
2024-09-10 17:11:16 -07:00
committed by GitHub
parent 3a6e8b6d78
commit 46094e0c1b
13 changed files with 99 additions and 61 deletions

View File

@@ -425,7 +425,7 @@ def _set_envs_and_config(server_args: ServerArgs):
maybe_set_triton_cache_manager()
# Check flashinfer version
if not server_args.disable_flashinfer:
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer",
"0.1.6",