ROCm: Flex Attention Enablement with custom backends (#4178)
Co-authored-by: linsun12 <linsun12@amd.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# docker build --build-arg SGL_BRANCH=v0.4.3.post4 -t v0.4.3.post4-rocm630 -f Dockerfile.rocm .
|
||||
|
||||
# default base image
|
||||
ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"
|
||||
ARG BASE_IMAGE="rocm/sgl-dev:20250114vllm-blas-flash"
|
||||
|
||||
FROM $BASE_IMAGE AS base
|
||||
USER root
|
||||
@@ -16,10 +16,10 @@ ARG SGL_BRANCH=${SGL_DEFAULT}
|
||||
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
||||
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
||||
|
||||
|
||||
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||
ARG AITER_COMMIT="testx"
|
||||
|
||||
|
||||
RUN git clone ${SGL_REPO} \
|
||||
&& cd sglang \
|
||||
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
||||
@@ -59,6 +59,7 @@ RUN git clone ${AITER_REPO} \
|
||||
&& git submodule update --init --recursive \
|
||||
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop
|
||||
|
||||
|
||||
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
||||
RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
||||
/sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
||||
|
||||
Reference in New Issue
Block a user