Upgrade to 0.11.1 newest vllm commit (#3762)

### What this PR does / why we need it? c9461e05a4 Fix ```spec decode rejection sampler```, caused by https://github.com/vllm-project/vllm/pull/26060 Fix some ```import```, caused by https://github.com/vllm-project/vllm/pull/27374 Fix ```scheduler_config.send_delta_data```, caused by https://github.com/vllm-project/vllm-ascend/pull/3719 Fix ```init_with_cudagraph_sizes```, caused by https://github.com/vllm-project/vllm/pull/26016 Fix ```vl model```of replacing PatchEmbed's conv3d to linear layer, caused by https://github.com/vllm-project/vllm/pull/27418 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main: c9461e05a4 --------- Signed-off-by: Icey <1790571317@qq.com>
2025-10-28 14:55:03 +08:00
parent f846bd20e4
commit a7450db1bd
12 changed files with 175 additions and 51 deletions
--- a/vllm_ascend/sample/rejection_sampler.py
+++ b/vllm_ascend/sample/rejection_sampler.py
@@ -5,10 +5,17 @@ import torch
 import torch.nn as nn
 import vllm.v1.sample.rejection_sampler as rs
 from vllm.v1.sample.metadata import SamplingMetadata
-from vllm.v1.sample.rejection_sampler import (RejectionSampler, compute_probs,
+from vllm.v1.sample.rejection_sampler import (RejectionSampler,
                                              generate_uniform_probs)
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata

+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.v1.sample.rejection_sampler import compute_probs
+else:
+    from vllm.v1.sample.rejection_sampler import apply_sampling_constraints
+
 PLACEHOLDER_TOKEN_ID = -1
 GREEDY_TEMPERATURE = -1
 # Maximum number of speculative draft tokens allowed per request in a single
@@ -82,11 +89,19 @@ class AscendRejectionSampler(RejectionSampler, nn.Module):
        # [num_tokens, vocab_size]
        # NOTE(woosuk): `target_logits` can be updated in place inside the
        # `compute_probs` function.
-        target_probs = compute_probs(
-            target_logits,
-            metadata.cu_num_draft_tokens,
-            sampling_metadata,
-        )
+        if vllm_version_is("0.11.0"):
+            target_probs = compute_probs(
+                target_logits,
+                metadata.cu_num_draft_tokens,
+                sampling_metadata,
+            )
+        else:
+            target_logits = apply_sampling_constraints(
+                target_logits,
+                metadata.cu_num_draft_tokens,
+                sampling_metadata,
+            )
+            target_probs = target_logits.softmax(dim=-1, dtype=torch.float32)

        output_token_ids = rejection_sample(
            metadata.draft_token_ids,