Upgrade to vllm 0.17.0 corex v4.1 overlay

2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -419,6 +419,7 @@ class EngineArgs:
    enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
    moe_backend: MoEBackend = KernelConfig.moe_backend
    all2all_backend: All2AllBackend = ParallelConfig.all2all_backend
+    enable_elastic_ep: bool = ParallelConfig.enable_elastic_ep
    enable_dbo: bool = ParallelConfig.enable_dbo
    ubatch_size: int = ParallelConfig.ubatch_size
    dbo_decode_token_threshold: int = ParallelConfig.dbo_decode_token_threshold
@@ -896,6 +897,9 @@ class EngineArgs:
            "--ubatch-size",
            **parallel_kwargs["ubatch_size"],
        )
+        parallel_group.add_argument(
+            "--enable-elastic-ep", **parallel_kwargs["enable_elastic_ep"]
+        )
        parallel_group.add_argument(
            "--dbo-decode-token-threshold",
            **parallel_kwargs["dbo_decode_token_threshold"],
@@ -1321,6 +1325,7 @@ class EngineArgs:
                "launched vLLM.",
                self.seed,
            )
+
        return ModelConfig(
            model=self.model,
            model_weights=self.model_weights,
@@ -1697,6 +1702,7 @@ class EngineArgs:
            is_moe_model=model_config.is_moe,
            enable_expert_parallel=self.enable_expert_parallel,
            all2all_backend=self.all2all_backend,
+            enable_elastic_ep=self.enable_elastic_ep,
            enable_dbo=self.enable_dbo,
            ubatch_size=self.ubatch_size,
            dbo_decode_token_threshold=self.dbo_decode_token_threshold,
@@ -1905,6 +1911,7 @@ class EngineArgs:
            performance_mode=self.performance_mode,
            weight_transfer_config=self.weight_transfer_config,
        )
+
        return config

    def _check_feature_supported(self):
@@ -2074,20 +2081,19 @@ class EngineArgs:
            )

        # Disable chunked prefill and prefix caching for:
-        # POWER (ppc64le)/RISCV CPUs in V1
+        # RISCV CPUs in V1
        if current_platform.is_cpu() and current_platform.get_cpu_architecture() in (
-            CpuArchEnum.POWERPC,
            CpuArchEnum.RISCV,
        ):
            logger.info(
-                "Chunked prefill is not supported for POWER, "
-                "and RISC-V CPUs; "
+                "Chunked prefill is not supported for"
+                "RISC-V CPUs; "
                "disabling it for V1 backend."
            )
            self.enable_chunked_prefill = False
            logger.info(
-                "Prefix caching is not supported for POWER, "
-                "and RISC-V CPUs; "
+                "Prefix caching is not supported for "
+                "RISC-V CPUs; "
                "disabling it for V1 backend."
            )
            self.enable_prefix_caching = False
@@ -2181,14 +2187,10 @@ class AsyncEngineArgs(EngineArgs):
            "--enable-log-requests",
            action=argparse.BooleanOptionalAction,
            default=AsyncEngineArgs.enable_log_requests,
-            help="Enable logging requests.",
-        )
-        parser.add_argument(
-            "--disable-log-requests",
-            action=argparse.BooleanOptionalAction,
-            default=not AsyncEngineArgs.enable_log_requests,
-            help="[DEPRECATED] Disable logging requests.",
-            deprecated=True,
+            help="Enable logging request information, dependant on log level:\n"
+            "- INFO: Request ID, parameters and LoRA request.\n"
+            "- DEBUG: Prompt inputs (e.g: text, token IDs).\n"
+            "You can set the minimum log level via `VLLM_LOGGING_LEVEL`.",
        )
        current_platform.pre_register_and_update(parser)
        return parser