[Fix] Update to v0.1.5.post4 and refine HIP attention backend selection (#11161)
This commit is contained in:
@@ -42,7 +42,7 @@ ENV BUILD_TRITON="0"
|
||||
ENV BUILD_LLVM="0"
|
||||
ENV BUILD_AITER_ALL="1"
|
||||
ENV BUILD_MOONCAKE="1"
|
||||
ENV AITER_COMMIT="v0.1.5.post3"
|
||||
ENV AITER_COMMIT="v0.1.5.post4"
|
||||
ENV NO_DEPS_FLAG="--no-deps"
|
||||
|
||||
# ===============================
|
||||
|
||||
@@ -536,9 +536,7 @@ class ModelRunner:
|
||||
elif _is_hip:
|
||||
head_num = self.model_config.get_num_kv_heads(self.tp_size)
|
||||
# TODO current aiter only support head number 16 or 128 head number
|
||||
if (
|
||||
head_num == 128 or head_num == 16
|
||||
) and self.spec_algorithm.is_none():
|
||||
if head_num == 128 or head_num == 16:
|
||||
server_args.attention_backend = "aiter"
|
||||
else:
|
||||
server_args.attention_backend = "triton"
|
||||
|
||||
Reference in New Issue
Block a user