[Fix] Update to v0.1.5.post4 and refine HIP attention backend selection (#11161)

This commit is contained in:
jacky.cheng
2025-10-03 12:19:30 +08:00
committed by GitHub
parent a2faf8940c
commit b00a0c786f
2 changed files with 2 additions and 4 deletions

View File

@@ -42,7 +42,7 @@ ENV BUILD_TRITON="0"
ENV BUILD_LLVM="0" ENV BUILD_LLVM="0"
ENV BUILD_AITER_ALL="1" ENV BUILD_AITER_ALL="1"
ENV BUILD_MOONCAKE="1" ENV BUILD_MOONCAKE="1"
ENV AITER_COMMIT="v0.1.5.post3" ENV AITER_COMMIT="v0.1.5.post4"
ENV NO_DEPS_FLAG="--no-deps" ENV NO_DEPS_FLAG="--no-deps"
# =============================== # ===============================

View File

@@ -536,9 +536,7 @@ class ModelRunner:
elif _is_hip: elif _is_hip:
head_num = self.model_config.get_num_kv_heads(self.tp_size) head_num = self.model_config.get_num_kv_heads(self.tp_size)
# TODO current aiter only support head number 16 or 128 head number # TODO current aiter only support head number 16 or 128 head number
if ( if head_num == 128 or head_num == 16:
head_num == 128 or head_num == 16
) and self.spec_algorithm.is_none():
server_args.attention_backend = "aiter" server_args.attention_backend = "aiter"
else: else:
server_args.attention_backend = "triton" server_args.attention_backend = "triton"