Upgrade to new vllm commit (#3719)
### What this PR does / why we need it? Upgrade to new vllm commit:c9461e05a4- Fix many imports, caused by https://github.com/vllm-project/vllm/pull/26908 - Fix import ```sha256```, caused by https://github.com/vllm-project/vllm/pull/27169 - Remove ```SchedulerConfig.send_delta_data```, caused by https://github.com/vllm-project/vllm/pull/27142 - Fix ```FusedMoE``` because of dual stream execution, caused by https://github.com/vllm-project/vllm/pull/26440 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main:17c540a993--------- Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: Icey <1790571317@qq.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -24,6 +24,9 @@ import vllm.envs as envs_vllm
|
||||
from vllm.logger import logger
|
||||
from vllm.platforms import Platform, PlatformEnum
|
||||
|
||||
# todo: please remove it when solve cuda hard code in vllm
|
||||
os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "True"
|
||||
|
||||
from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
|
||||
init_ascend_config)
|
||||
from vllm_ascend.torchair.utils import (check_torchair_cache_exist,
|
||||
@@ -142,7 +145,6 @@ class NPUPlatform(Platform):
|
||||
if not model_config.is_multimodal_model and \
|
||||
structured_outputs_config.backend == "auto" and \
|
||||
not getattr(scheduler_config, "scheduler_delay_factor", 0) > 0 and \
|
||||
not scheduler_config.send_delta_data and \
|
||||
scheduler_config.policy == "fcfs":
|
||||
ascend_scheduler_config.enabled = True
|
||||
chunked_prefill_enabled_in_ascend_scheduler = getattr(
|
||||
|
||||
Reference in New Issue
Block a user