[CI] Add Triton Ascend in CI (#4921)

Add triton-ascend in UT and e2e

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
meihanc
2025-12-23 12:47:35 +08:00
committed by GitHub
parent 2e010e12dd
commit 592cfb6a6f
8 changed files with 85 additions and 36 deletions

View File

@@ -127,19 +127,32 @@ class TestAscendRejectionSampler(TestBase):
x = torch.tensor([10, 20, 30])
cu_num_tokens = torch.tensor([2, 5, 7])
num_tokens = 7
# Test PyTorch path
with patch("vllm_ascend.sample.rejection_sampler.HAS_TRITON", False):
with patch("vllm_ascend.sample.rejection_sampler.expand_pytorch"
) as mock_pytorch:
expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
mock_pytorch.assert_called_once()
args = mock_pytorch.call_args[0]
assert (args[1] == x).all()
assert (args[2] == cu_num_tokens).all()
with patch("vllm_ascend.sample.rejection_sampler.expand_pytorch"
) as mock_kernel:
expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
mock_kernel.assert_called_once()
args = mock_kernel.call_args[0]
assert (args[1] == x).all()
assert (args[2] == cu_num_tokens).all()
# Test Triton kernel path
with patch("vllm_ascend.sample.rejection_sampler.HAS_TRITON", True):
with patch("vllm_ascend.sample.rejection_sampler.expand_kernel"
) as mock_triton:
expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
# grid = triton.cdiv(n, BLOCK_SIZE) = triton.cdiv(3, 2) = 2
mock_triton.__getitem__.assert_called_once_with((2, ))
call_args = mock_triton.__getitem__.return_value.call_args[0]
assert (call_args[1] == x).all()
assert (call_args[2] == cu_num_tokens).all()
# Run actual function
result = expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
expected = torch.tensor([10, 10, 20, 20, 20, 30, 30])
assert torch.equal(result, expected)
with patch("vllm_ascend.sample.rejection_sampler.HAS_TRITON", False):
result = expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
expected = torch.tensor([10, 10, 20, 20, 20, 30, 30])
assert torch.equal(result, expected)
def test_sample_recovered_tokens_pytorch_ngram(self):
"""Test recovered token sampling under n-gram mode"""