Files
xc-llm-ascend/tests/e2e/nightly/ops/test_gating_top_k_softmax.py
Li Wang 60ee4af6d0 [CI] Add custom op to nightly (#3765)
### What this PR does / why we need it?
1. Add custom op to nightly tests, fix
https://github.com/vllm-project/vllm-ascend/pull/3665
2. Correctly pass github secrets when using workflow_call, see
https://docs.github.com/en/actions/how-tos/reuse-automations/reuse-workflows
3. Fix the single node mutual cancellation issue

- vLLM version: v0.11.0rc3
- vLLM main:
c9461e05a4

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
2025-10-27 14:07:03 +08:00

38 lines
1.1 KiB
Python

import pytest
import torch
import torch_npu
@pytest.mark.parametrize(
'B',
[1, 16, 64, 128, 32768],
)
@pytest.mark.parametrize(
'D',
[8, 16, 32, 64, 128],
)
@pytest.mark.parametrize(
'top_k',
[1, 2, 4, 8],
)
@pytest.mark.parametrize(
"dtype, atol, rtol",
[
(torch.float16, 1e-3, 1e-3),
(torch.bfloat16, 1e-3, 1e-3),
],
)
def test_quant_fpx_linear(B: int, D: int, top_k: int, dtype, atol, rtol):
x = torch.rand((B, D), dtype=dtype).to("npu")
# finished = torch.randint(1, size=(B,), dtype=torch.bool).to("npu")
finished = None
y, expert_idx, row_idx = torch_npu.npu_moe_gating_top_k_softmax(x,
finished,
k=top_k)
topk_weights = x.softmax(dim=-1)
topk_weights, topk_ids = topk_weights.topk(top_k, dim=-1)
topk_ids = topk_ids.to(torch.int32)
torch.allclose(y, topk_weights, atol=atol, rtol=rtol)
torch.allclose(expert_idx, topk_ids, atol=atol, rtol=rtol)