[Refactor]Refactor sampler (#2050)

Refactor Sampler implementation from patch way to inherit from vLLM Sampler interface. Next step: Make the op `TopKTopPSampler` in vLLM support custom ops register mechanism - vLLM version: v0.10.0 - vLLM main: 61a6905ab0 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-07-30 08:47:22 +08:00
parent b6a7f07c70
commit 9b67c87b14
8 changed files with 108 additions and 150 deletions
--- a/tests/ut/patch/worker/patch_common/test_patch_sampler.py
+++ b/tests/ut/patch/worker/patch_common/test_patch_sampler.py
@@ -1,46 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-
-import importlib
-import os
-from unittest import mock
-
-import torch
-from vllm.v1.sample.ops import topk_topp_sampler
-
-from tests.ut.base import TestBase
-
-
-class TestTopKTopPSamplerOptimize(TestBase):
-
-    @mock.patch.dict(os.environ,
-                     {"VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION": "1"})
-    @mock.patch("torch_npu.npu_top_k_top_p")
-    def test_npu_topk_topp_called_when_optimized(self, mock_npu_op):
-        # We have to patch and reload because the patch will take effect
-        # only after VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE is set.
-        import vllm_ascend.patch.worker.patch_common.patch_sampler
-        importlib.reload(vllm_ascend.patch.worker.patch_common.patch_sampler)
-
-        mock_npu_op.return_value = (torch.randn(1, 3))
-        sampler = topk_topp_sampler.TopKTopPSampler()
-
-        logits = torch.tensor([[1.0, 2.0, 3.0]])
-        k = torch.tensor([2])
-        p = torch.tensor([0.9])
-        generators = {0: torch.Generator()}
-        generators[0].manual_seed(42)
-
-        sampler.forward_native(logits, generators, k, p)
-        mock_npu_op.assert_called_once_with(logits, p, k)
--- a/tests/ut/sample/test_sampler.py
+++ b/tests/ut/sample/test_sampler.py
@@ -0,0 +1,32 @@
+from unittest import mock
+
+import torch
+
+from tests.ut.base import TestBase
+from vllm_ascend.sample.sampler import AscendSampler, AscendTopKTopPSampler
+
+
+class TestAscendSampler(TestBase):
+
+    def test_init_with_raw_logprobs(self):
+        sampler = AscendSampler(logprobs_mode="raw_logprobs")
+        self.assertEqual(sampler.logprobs_mode, "raw_logprobs")
+        self.assertTrue(hasattr(sampler, 'topk_topp_sampler'))
+        self.assertIsInstance(sampler.topk_topp_sampler, AscendTopKTopPSampler)
+
+
+class TestAscendTopKTopPSampler(TestBase):
+
+    @mock.patch("torch_npu.npu_top_k_top_p")
+    def test_npu_topk_topp_called_when_optimized(self, mock_npu_op):
+        mock_npu_op.return_value = (torch.randn(1, 3))
+        sampler = AscendTopKTopPSampler()
+
+        logits = torch.tensor([[1.0, 2.0, 3.0]])
+        k = torch.tensor([2])
+        p = torch.tensor([0.9])
+        generators = {0: torch.Generator()}
+        generators[0].manual_seed(42)
+
+        sampler.forward_native(logits, generators, k, p)
+        mock_npu_op.assert_called_once_with(logits, p, k)