feat: add priority based scheduling with priority based request acceptance and preemption (#8746)

This commit is contained in:
harrisonlimh
2025-09-16 17:10:10 -07:00
committed by GitHub
parent f949ad5794
commit 14fdd52740
16 changed files with 822 additions and 71 deletions

View File

@@ -738,6 +738,7 @@ class TokenizerManager(TokenizerCommunicatorMixin):
custom_logit_processor=obj.custom_logit_processor,
return_hidden_states=obj.return_hidden_states,
data_parallel_rank=obj.data_parallel_rank,
priority=obj.priority,
)
elif isinstance(obj, EmbeddingReqInput):
tokenized_obj = TokenizedEmbeddingReqInput(
@@ -747,6 +748,7 @@ class TokenizerManager(TokenizerCommunicatorMixin):
mm_inputs,
token_type_ids,
sampling_params,
priority=obj.priority,
)
return tokenized_obj