From cd58a643c598622fdf5a853764f851a5074d0328 Mon Sep 17 00:00:00 2001
From: lio <101571019+lio1226@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:20:57 +0800
Subject: [PATCH] [UT] Fix test_sample_recovered_tokens_pytorch_autoregressive 
 (#3434)

### What this PR does / why we need it?

This 'test_rejection_sampler' unit test is something wrong.

> def test_sample_recovered_tokens_pytorch_autoregressive(self):
>       output_token_ids = torch.empty(2, dtype=torch.int32)
>       cu_num_draft_tokens = torch.tensor([1, 1])
>       draft_token_ids = torch.tensor([0, 1])

len(draft_token_ids ) = 2, cu_num_draft_tokens should be
torch.tensor([1, 2]) or torch.tensor([2, 2])

I fix it and set cu_num_draft_tokens = torch.tensor([1, 2]). The methods
before and after optimization can pass.

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
NA

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: lio <1983142975@qq.com>
---
 tests/ut/sample/test_rejection_sampler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ut/sample/test_rejection_sampler.py b/tests/ut/sample/test_rejection_sampler.py
index adbf376d..f2f8ac19 100644
--- a/tests/ut/sample/test_rejection_sampler.py
+++ b/tests/ut/sample/test_rejection_sampler.py
@@ -174,7 +174,7 @@ class TestAscendRejectionSampler(TestBase):
     def test_sample_recovered_tokens_pytorch_autoregressive(self):
         """Test recovered token sampling for autoregressive models"""
         output_token_ids = torch.empty(2, dtype=torch.int32)
-        cu_num_draft_tokens = torch.tensor([1, 1])
+        cu_num_draft_tokens = torch.tensor([1, 2])
         draft_token_ids = torch.tensor([0, 1])
         draft_probs = torch.tensor([
             [0.6, 0.1, 0.3],
@@ -201,3 +201,4 @@ class TestAscendRejectionSampler(TestBase):
             IS_NGRAM=False,
         )
         assert output_token_ids[0].item() == 0
+        assert output_token_ids[1].item() == 0