Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -143,7 +143,8 @@ class BaseFrontendArgs:
templates and other tokenizer configuration."""
enable_log_outputs: bool = False
"""If set to True, log model outputs (generations).
Requires --enable-log-requests."""
Requires `--enable-log-requests`. As with `--enable-log-requests`,
information is only logged at INFO level at maximum."""
enable_log_deltas: bool = True
"""If set to False, output deltas will not be logged. Relevant only if
--enable-log-outputs is set.
@@ -277,6 +278,10 @@ class FrontendArgs(BaseFrontendArgs):
Enable offline FastAPI documentation for air-gapped environments.
Uses vendored static assets bundled with vLLM.
"""
use_gpu_for_pooling_score: bool = False
"""If set, run pooling score MaxSim on GPU in the API server process.
Can significantly improve late-interaction scoring performance.
https://github.com/vllm-project/vllm/pull/35330"""
@classmethod
def _customize_cli_kwargs(