Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -419,6 +419,7 @@ class EngineArgs:
enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
moe_backend: MoEBackend = KernelConfig.moe_backend
all2all_backend: All2AllBackend = ParallelConfig.all2all_backend
enable_elastic_ep: bool = ParallelConfig.enable_elastic_ep
enable_dbo: bool = ParallelConfig.enable_dbo
ubatch_size: int = ParallelConfig.ubatch_size
dbo_decode_token_threshold: int = ParallelConfig.dbo_decode_token_threshold
@@ -896,6 +897,9 @@ class EngineArgs:
"--ubatch-size",
**parallel_kwargs["ubatch_size"],
)
parallel_group.add_argument(
"--enable-elastic-ep", **parallel_kwargs["enable_elastic_ep"]
)
parallel_group.add_argument(
"--dbo-decode-token-threshold",
**parallel_kwargs["dbo_decode_token_threshold"],
@@ -1321,6 +1325,7 @@ class EngineArgs:
"launched vLLM.",
self.seed,
)
return ModelConfig(
model=self.model,
model_weights=self.model_weights,
@@ -1697,6 +1702,7 @@ class EngineArgs:
is_moe_model=model_config.is_moe,
enable_expert_parallel=self.enable_expert_parallel,
all2all_backend=self.all2all_backend,
enable_elastic_ep=self.enable_elastic_ep,
enable_dbo=self.enable_dbo,
ubatch_size=self.ubatch_size,
dbo_decode_token_threshold=self.dbo_decode_token_threshold,
@@ -1905,6 +1911,7 @@ class EngineArgs:
performance_mode=self.performance_mode,
weight_transfer_config=self.weight_transfer_config,
)
return config
def _check_feature_supported(self):
@@ -2074,20 +2081,19 @@ class EngineArgs:
)
# Disable chunked prefill and prefix caching for:
# POWER (ppc64le)/RISCV CPUs in V1
# RISCV CPUs in V1
if current_platform.is_cpu() and current_platform.get_cpu_architecture() in (
CpuArchEnum.POWERPC,
CpuArchEnum.RISCV,
):
logger.info(
"Chunked prefill is not supported for POWER, "
"and RISC-V CPUs; "
"Chunked prefill is not supported for"
"RISC-V CPUs; "
"disabling it for V1 backend."
)
self.enable_chunked_prefill = False
logger.info(
"Prefix caching is not supported for POWER, "
"and RISC-V CPUs; "
"Prefix caching is not supported for "
"RISC-V CPUs; "
"disabling it for V1 backend."
)
self.enable_prefix_caching = False
@@ -2181,14 +2187,10 @@ class AsyncEngineArgs(EngineArgs):
"--enable-log-requests",
action=argparse.BooleanOptionalAction,
default=AsyncEngineArgs.enable_log_requests,
help="Enable logging requests.",
)
parser.add_argument(
"--disable-log-requests",
action=argparse.BooleanOptionalAction,
default=not AsyncEngineArgs.enable_log_requests,
help="[DEPRECATED] Disable logging requests.",
deprecated=True,
help="Enable logging request information, dependant on log level:\n"
"- INFO: Request ID, parameters and LoRA request.\n"
"- DEBUG: Prompt inputs (e.g: text, token IDs).\n"
"You can set the minimum log level via `VLLM_LOGGING_LEVEL`.",
)
current_platform.pre_register_and_update(parser)
return parser