Tiny skip_sample adjust (#11225)

This commit is contained in:
Liangsheng Yin
2025-10-05 23:41:04 +08:00
committed by GitHub
parent 85c1f79377
commit 4cb5a5235e
5 changed files with 17 additions and 18 deletions

View File

@@ -663,7 +663,11 @@ class Req:
@property
def is_prefill_only(self) -> bool:
"""Check if this request is prefill-only (no token generation needed)."""
return self.sampling_params.max_new_tokens == 0
# NOTE: when spec is enabled, prefill_only optimizations are disabled
return (
self.sampling_params.max_new_tokens == 0
and global_server_args_dict["speculative_algorithm"] is None
)
def add_latency(self, stage: RequestStage):
if self.metrics_collector is None: