Refactor attention backend (#1381)
This commit is contained in:
@@ -19,7 +19,8 @@ class TestServingThroughput(unittest.TestCase):
|
||||
other_args = []
|
||||
if disable_radix_cache:
|
||||
other_args.append("--disable-radix-cache")
|
||||
other_args.extend(["--attention-backend", attention_backend])
|
||||
if attention_backend:
|
||||
other_args.extend(["--attention-backend", attention_backend])
|
||||
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
|
||||
|
||||
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
|
||||
Reference in New Issue
Block a user