[BugFix][Core] Fix a bug running multi-modal with ascend_scheduler (#3675)
This PR fix the bug related with running multi-modal models with
AscendScheduler. This bug was introduced by PR #2372 by using the same
parameter names as vLLM with different default values.
Currently I fix this bug by changing the default values of these two
parameters to align with vLLM.
- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993
Signed-off-by: hw_whx <wanghexiang7@huawei.com>
Co-authored-by: hw_whx <wanghexiang7@huawei.com>
This commit is contained in:
@@ -26,7 +26,7 @@ MAX_INT = 2147483647
|
||||
@dataclass
|
||||
class AscendSchedulerConfig(SchedulerConfig):
|
||||
enable_chunked_prefill: bool = False
|
||||
max_long_partial_prefills: int = MAX_INT
|
||||
max_long_partial_prefills: int = 1
|
||||
long_prefill_token_threshold: int = MAX_INT
|
||||
policy: str = "fcfs"
|
||||
scheduler_cls: Union[str, Type[object]] = (
|
||||
@@ -73,9 +73,9 @@ class AscendSchedulerConfig(SchedulerConfig):
|
||||
"max_num_batched_tokens and makes vLLM reject longer "
|
||||
"sequences. Please increase max_num_batched_tokens or "
|
||||
"decrease max_model_len.")
|
||||
# concurrent partial prefills. Default is inf
|
||||
# concurrent partial prefills. Default is 1 meaning not enabled.
|
||||
if self.max_long_partial_prefills is None:
|
||||
self.max_long_partial_prefills = MAX_INT
|
||||
self.max_long_partial_prefills = 1
|
||||
self.long_prefill_token_threshold = MAX_INT
|
||||
|
||||
if self.long_prefill_token_threshold is None or \
|
||||
@@ -105,4 +105,4 @@ class AscendSchedulerConfig(SchedulerConfig):
|
||||
if getattr(self, "scheduler_delay_factor", 0) > 0:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler doesn't support scheduler_delay_factor."
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user