Tiny skip_sample adjust (#11225)
This commit is contained in:
@@ -663,7 +663,11 @@ class Req:
|
||||
@property
|
||||
def is_prefill_only(self) -> bool:
|
||||
"""Check if this request is prefill-only (no token generation needed)."""
|
||||
return self.sampling_params.max_new_tokens == 0
|
||||
# NOTE: when spec is enabled, prefill_only optimizations are disabled
|
||||
return (
|
||||
self.sampling_params.max_new_tokens == 0
|
||||
and global_server_args_dict["speculative_algorithm"] is None
|
||||
)
|
||||
|
||||
def add_latency(self, stage: RequestStage):
|
||||
if self.metrics_collector is None:
|
||||
|
||||
Reference in New Issue
Block a user