[CI] Upgrade vLLM to 20250919 (6d8246aa) and fix some broken issue (#2907)

### What this PR does / why we need it?
1. This pr bump vllm commit to
6d8246aaff
2. fix upstream changes https://github.com/vllm-project/vllm/pull/24548
abort multi-modal kwargs, make vllm main and `v0.10.2` both adaptable
3. fix metadata_builder changes introduced by
https://github.com/vllm-project/vllm/pull/23693
4. fix `structured_outputs_config` changes introduced by
https://github.com/vllm-project/vllm/pull/22772
5. fix `moe_config` changes introduced by
https://github.com/vllm-project/vllm/pull/22537

Co-authored-by:  MengqingCao <cmq0113@163.com>
Co-authored-by:  Yikun Jiang <yikunkero@gmail.com>


- vLLM version: v0.10.2
- vLLM main:
c60e6137f0

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
Signed-off-by: MengqingCao <cmq0113@163.com>
Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Li Wang
2025-09-20 17:37:57 +08:00
committed by GitHub
parent 53ecd89e8f
commit 12bcbd02bb
14 changed files with 359 additions and 143 deletions

View File

@@ -31,7 +31,7 @@ from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
from vllm_ascend.torchair.utils import (check_torchair_cache_exist,
delete_torchair_cache_file)
from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, is_310p,
update_aclgraph_sizes)
update_aclgraph_sizes, vllm_version_is)
if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
@@ -128,9 +128,12 @@ class NPUPlatform(Platform):
model_config = vllm_config.model_config
parallel_config = vllm_config.parallel_config
cache_config = vllm_config.cache_config
decoding_config = vllm_config.decoding_config
scheduler_config = vllm_config.scheduler_config
ascend_scheduler_config = ascend_config.ascend_scheduler_config
if vllm_version_is("0.10.2"):
structured_outputs_config = vllm_config.decoding_config
else:
structured_outputs_config = vllm_config.structured_outputs_config
if model_config is not None and not model_config.use_mla:
logger.info(
@@ -138,7 +141,7 @@ class NPUPlatform(Platform):
"as the performance of operators supporting this feature "
"functionality is currently suboptimal.")
if not model_config.is_multimodal_model and \
decoding_config.backend == "auto" and \
structured_outputs_config.backend == "auto" and \
not scheduler_config.delay_factor > 0 and \
not scheduler_config.send_delta_data and \
scheduler_config.policy == "fcfs":