Fix potential memory fault issue and ncclSystemError in CI test (#8681)
Co-authored-by: wunhuang <wunhuang@amd.com>
This commit is contained in:
2
.github/workflows/pr-test-amd.yml
vendored
2
.github/workflows/pr-test-amd.yml
vendored
@@ -291,7 +291,7 @@ jobs:
|
|||||||
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
||||||
|
|
||||||
- name: Run CustomAllReduce test
|
- name: Run CustomAllReduce test
|
||||||
timeout-minutes: 10
|
timeout-minutes: 20
|
||||||
run: |
|
run: |
|
||||||
bash scripts/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
|
bash scripts/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
|
||||||
|
|
||||||
|
|||||||
@@ -720,11 +720,6 @@ class AiterIndicesUpdaterPrefill:
|
|||||||
self.req_to_token = model_runner.req_to_token_pool.req_to_token
|
self.req_to_token = model_runner.req_to_token_pool.req_to_token
|
||||||
self.update = self.update_single_wrapper
|
self.update = self.update_single_wrapper
|
||||||
|
|
||||||
# get the last index of the pool
|
|
||||||
self.pool_size = (
|
|
||||||
model_runner.token_to_kv_pool.size + model_runner.token_to_kv_pool.page_size
|
|
||||||
) - 1
|
|
||||||
|
|
||||||
self.kv_indices = None
|
self.kv_indices = None
|
||||||
self.max_q_len = 0
|
self.max_q_len = 0
|
||||||
self.max_kv_len = 0
|
self.max_kv_len = 0
|
||||||
@@ -769,9 +764,8 @@ class AiterIndicesUpdaterPrefill:
|
|||||||
# but the 0 location will be made nan (noqa) in cuda graph capture mode
|
# but the 0 location will be made nan (noqa) in cuda graph capture mode
|
||||||
# this will cause the output tensor value becomes nan
|
# this will cause the output tensor value becomes nan
|
||||||
# WA is to assure that last index of pool not changed
|
# WA is to assure that last index of pool not changed
|
||||||
kv_indices = torch.full(
|
kv_indices = torch.empty(
|
||||||
(paged_kernel_lens_sum + 128,),
|
paged_kernel_lens_sum + 256,
|
||||||
self.pool_size,
|
|
||||||
dtype=torch.int32,
|
dtype=torch.int32,
|
||||||
device=req_pool_indices.device,
|
device=req_pool_indices.device,
|
||||||
)
|
)
|
||||||
@@ -785,6 +779,9 @@ class AiterIndicesUpdaterPrefill:
|
|||||||
self.req_to_token.shape[1],
|
self.req_to_token.shape[1],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
token_num = kv_indptr[-1]
|
||||||
|
kv_indices[token_num:] = kv_indices[0]
|
||||||
|
|
||||||
self.max_kv_len = torch.max(paged_kernel_lens).item()
|
self.max_kv_len = torch.max(paged_kernel_lens).item()
|
||||||
|
|
||||||
extend_lens = seq_lens - prefix_lens
|
extend_lens = seq_lens - prefix_lens
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ echo "Starting container: ci_sglang"
|
|||||||
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
|
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
|
||||||
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
|
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
|
||||||
--ipc=host --group-add video \
|
--ipc=host --group-add video \
|
||||||
|
--shm-size 32g \
|
||||||
--cap-add=SYS_PTRACE \
|
--cap-add=SYS_PTRACE \
|
||||||
-e HF_TOKEN="${HF_TOKEN:-}" \
|
-e HF_TOKEN="${HF_TOKEN:-}" \
|
||||||
--security-opt seccomp=unconfined \
|
--security-opt seccomp=unconfined \
|
||||||
|
|||||||
Reference in New Issue
Block a user