From 8ea7df61146f87dc635ca8144c36247305435f57 Mon Sep 17 00:00:00 2001 From: kk <43161300+kkHuang-amd@users.noreply.github.com> Date: Wed, 11 Jun 2025 00:08:10 +0800 Subject: [PATCH] [WA] fix output data is nan in CI test "test_moe_eval_accuracy_large.py" (#7021) Co-authored-by: wunhuang Co-authored-by: HAI --- .../srt/layers/attention/aiter_backend.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py index 49200c071..ff7b9dc7e 100644 --- a/python/sglang/srt/layers/attention/aiter_backend.py +++ b/python/sglang/srt/layers/attention/aiter_backend.py @@ -717,6 +717,11 @@ class AiterIndicesUpdaterPrefill: self.req_to_token = model_runner.req_to_token_pool.req_to_token self.update = self.update_single_wrapper + # get the last index of the pool + self.pool_size = ( + model_runner.token_to_kv_pool.size + model_runner.token_to_kv_pool.page_size + ) - 1 + self.kv_indices = None self.max_q_len = 0 self.max_kv_len = 0 @@ -754,8 +759,16 @@ class AiterIndicesUpdaterPrefill: # Normal extend kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0) kv_indptr = kv_indptr[: bs + 1] - kv_indices = torch.empty( - paged_kernel_lens_sum + 256, + + # (TODO: Kk) WA - CI test_moe_eval_accuracy_large.py + # mha_batch_prefill reads 128 data to do computatoin + # if real data is not long enough then original padding value 0 is used + # but the 0 location will be made nan (noqa) in cuda graph capture mode + # this will cause the output tensor value becomes nan + # WA is to assure that last index of pool not changed + kv_indices = torch.full( + (paged_kernel_lens_sum + 128,), + self.pool_size, dtype=torch.int32, device=req_pool_indices.device, )