diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 3ba24d979..51ea12ea5 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -291,7 +291,7 @@ jobs: bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600 - name: Run CustomAllReduce test - timeout-minutes: 10 + timeout-minutes: 20 run: | bash scripts/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py index 7e6b9936e..cea097cb0 100644 --- a/python/sglang/srt/layers/attention/aiter_backend.py +++ b/python/sglang/srt/layers/attention/aiter_backend.py @@ -720,11 +720,6 @@ class AiterIndicesUpdaterPrefill: self.req_to_token = model_runner.req_to_token_pool.req_to_token self.update = self.update_single_wrapper - # get the last index of the pool - self.pool_size = ( - model_runner.token_to_kv_pool.size + model_runner.token_to_kv_pool.page_size - ) - 1 - self.kv_indices = None self.max_q_len = 0 self.max_kv_len = 0 @@ -769,9 +764,8 @@ class AiterIndicesUpdaterPrefill: # but the 0 location will be made nan (noqa) in cuda graph capture mode # this will cause the output tensor value becomes nan # WA is to assure that last index of pool not changed - kv_indices = torch.full( - (paged_kernel_lens_sum + 128,), - self.pool_size, + kv_indices = torch.empty( + paged_kernel_lens_sum + 256, dtype=torch.int32, device=req_pool_indices.device, ) @@ -785,6 +779,9 @@ class AiterIndicesUpdaterPrefill: self.req_to_token.shape[1], ) + token_num = kv_indptr[-1] + kv_indices[token_num:] = kv_indices[0] + self.max_kv_len = torch.max(paged_kernel_lens).item() extend_lens = seq_lens - prefix_lens diff --git a/scripts/amd_ci_start_container.sh b/scripts/amd_ci_start_container.sh index 239fd3770..9ce33549b 100755 --- a/scripts/amd_ci_start_container.sh +++ b/scripts/amd_ci_start_container.sh @@ -124,6 +124,7 @@ echo "Starting container: ci_sglang" docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ -v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \ --ipc=host --group-add video \ + --shm-size 32g \ --cap-add=SYS_PTRACE \ -e HF_TOKEN="${HF_TOKEN:-}" \ --security-opt seccomp=unconfined \