Refactor attention backend (#1381)

This commit is contained in:
Lianmin Zheng
2024-09-11 11:44:26 -07:00
committed by GitHub
parent c03cece42f
commit fec185ce0c
16 changed files with 568 additions and 564 deletions

View File

@@ -19,7 +19,8 @@ class TestServingThroughput(unittest.TestCase):
other_args = []
if disable_radix_cache:
other_args.append("--disable-radix-cache")
other_args.extend(["--attention-backend", attention_backend])
if attention_backend:
other_args.extend(["--attention-backend", attention_backend])
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
model = DEFAULT_MODEL_NAME_FOR_TEST