From cd58a643c598622fdf5a853764f851a5074d0328 Mon Sep 17 00:00:00 2001 From: lio <101571019+lio1226@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:20:57 +0800 Subject: [PATCH] [UT] Fix test_sample_recovered_tokens_pytorch_autoregressive (#3434) ### What this PR does / why we need it? This 'test_rejection_sampler' unit test is something wrong. > def test_sample_recovered_tokens_pytorch_autoregressive(self): > output_token_ids = torch.empty(2, dtype=torch.int32) > cu_num_draft_tokens = torch.tensor([1, 1]) > draft_token_ids = torch.tensor([0, 1]) len(draft_token_ids ) = 2, cu_num_draft_tokens should be torch.tensor([1, 2]) or torch.tensor([2, 2]) I fix it and set cu_num_draft_tokens = torch.tensor([1, 2]). The methods before and after optimization can pass. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? NA - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: lio <1983142975@qq.com> --- tests/ut/sample/test_rejection_sampler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ut/sample/test_rejection_sampler.py b/tests/ut/sample/test_rejection_sampler.py index adbf376d..f2f8ac19 100644 --- a/tests/ut/sample/test_rejection_sampler.py +++ b/tests/ut/sample/test_rejection_sampler.py @@ -174,7 +174,7 @@ class TestAscendRejectionSampler(TestBase): def test_sample_recovered_tokens_pytorch_autoregressive(self): """Test recovered token sampling for autoregressive models""" output_token_ids = torch.empty(2, dtype=torch.int32) - cu_num_draft_tokens = torch.tensor([1, 1]) + cu_num_draft_tokens = torch.tensor([1, 2]) draft_token_ids = torch.tensor([0, 1]) draft_probs = torch.tensor([ [0.6, 0.1, 0.3], @@ -201,3 +201,4 @@ class TestAscendRejectionSampler(TestBase): IS_NGRAM=False, ) assert output_token_ids[0].item() == 0 + assert output_token_ids[1].item() == 0