main add ascend scheduler support multimodal (#2844)
### What this PR does / why we need it?
On main, AscendScheduler does not support Multimodels, becuse of lacking
of scheduled_encoder_inputs which is need on multimodels inference
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
vLLM version: main@93e28e6862669e3b5cf47cea9f782a65ec47e155
- vLLM version: v0.10.2rc2
- vLLM main:
15b8fef453
---------
Signed-off-by: fan2956 <zhoufan53@huawei.com>
Co-authored-by: zhoufan2956 <zhoufan2956@163.com>
This commit is contained in:
@@ -75,13 +75,6 @@ class TestAscendSchedulerConfig(TestBase):
|
||||
str(context.exception),
|
||||
)
|
||||
|
||||
def test_not_implemented_multimodal(self):
|
||||
with self.assertRaises(NotImplementedError) as context:
|
||||
AscendSchedulerConfig.initialize_from_config(
|
||||
SchedulerConfig(is_multimodal_model=True), {})
|
||||
self.assertIn("currently AscendScheduler only supports LLM models",
|
||||
str(context.exception))
|
||||
|
||||
def test_not_implemented_send_delta_data(self):
|
||||
with self.assertRaises(NotImplementedError) as context:
|
||||
AscendSchedulerConfig.initialize_from_config(
|
||||
@@ -118,6 +111,11 @@ class TestAscendSchedulerConfig(TestBase):
|
||||
self.assertEqual(ascend_config.max_num_encoder_input_tokens, 8192)
|
||||
self.assertEqual(ascend_config.encoder_cache_size, 8192)
|
||||
|
||||
def test_valid_config_with_multimodal(self):
|
||||
config = AscendSchedulerConfig.initialize_from_config(
|
||||
SchedulerConfig(is_multimodal_model=True), {})
|
||||
self.assertTrue(config.is_multimodal_model)
|
||||
|
||||
def test_valid_config_with_chunked_prefill(self):
|
||||
ascend_config = AscendSchedulerConfig.initialize_from_config(
|
||||
self.basic_scheduler_config,
|
||||
|
||||
Reference in New Issue
Block a user