Revert "ROCm: Flex Attention Enablement with custom backends (#4178)" (#4186)

2025-03-07 10:27:52 -08:00
parent 0beea4503f
commit eb61f5c9af
7 changed files with 35 additions and 1434 deletions
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -2,7 +2,7 @@
 #   docker build --build-arg SGL_BRANCH=v0.4.3.post4 -t v0.4.3.post4-rocm630 -f Dockerfile.rocm .

 # default base image
-ARG BASE_IMAGE="rocm/sgl-dev:20250114vllm-blas-flash"
+ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"

 FROM $BASE_IMAGE AS base
 USER root
@@ -16,10 +16,10 @@ ARG SGL_BRANCH=${SGL_DEFAULT}
 ARG TRITON_REPO="https://github.com/ROCm/triton.git"
 ARG TRITON_COMMIT="improve_fa_decode_3.0.0"

+
 ARG AITER_REPO="https://github.com/ROCm/aiter.git"
 ARG AITER_COMMIT="testx"

-
 RUN git clone ${SGL_REPO} \
    && cd sglang \
    && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
@@ -59,7 +59,6 @@ RUN git clone ${AITER_REPO} \
    && git submodule update --init --recursive \
    && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop

-
 # Copy config files to support MI300X in virtualized environments (MI300X_VF).  Symlinks will not be created in image build.
 RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
         /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \