feat: add priority based scheduling with priority based request acceptance and preemption (#8746)
This commit is contained in:
@@ -738,6 +738,7 @@ class TokenizerManager(TokenizerCommunicatorMixin):
|
||||
custom_logit_processor=obj.custom_logit_processor,
|
||||
return_hidden_states=obj.return_hidden_states,
|
||||
data_parallel_rank=obj.data_parallel_rank,
|
||||
priority=obj.priority,
|
||||
)
|
||||
elif isinstance(obj, EmbeddingReqInput):
|
||||
tokenized_obj = TokenizedEmbeddingReqInput(
|
||||
@@ -747,6 +748,7 @@ class TokenizerManager(TokenizerCommunicatorMixin):
|
||||
mm_inputs,
|
||||
token_type_ids,
|
||||
sampling_params,
|
||||
priority=obj.priority,
|
||||
)
|
||||
|
||||
return tokenized_obj
|
||||
|
||||
Reference in New Issue
Block a user