From 670c2397b8a749c163edadcc4d72c1fe29da4ba4 Mon Sep 17 00:00:00 2001
From: xyDong0223 <dongxinyu23@gmail.com>
Date: Wed, 10 Dec 2025 21:52:48 +0800
Subject: [PATCH] [Kernel] Enable fast random sample on Kunlun P

---
 vllm_kunlun/v1/sample/ops/topk_topp_sampler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py
index e175040..e45c426 100644
--- a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py
@@ -151,7 +151,12 @@ def random_sample(
     # not have its own seed. Then, we overwrite the values for the requests
     # that have their own seeds.
     if len(generators) != probs.shape[0]:
-        q.exponential_()
+        if os.getenv('FAST_RANDOM_SAMPLE') == "1":
+            q.uniform_()
+            q = -torch.log(q)
+            q = q.clamp(min=1e-4)
+        else:
+            q.exponential_()
     if generators:
         # TODO(woosuk): This can be slow because we handle each request
         # one by one. Optimize this.