Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -143,7 +143,8 @@ class BaseFrontendArgs:
|
||||
templates and other tokenizer configuration."""
|
||||
enable_log_outputs: bool = False
|
||||
"""If set to True, log model outputs (generations).
|
||||
Requires --enable-log-requests."""
|
||||
Requires `--enable-log-requests`. As with `--enable-log-requests`,
|
||||
information is only logged at INFO level at maximum."""
|
||||
enable_log_deltas: bool = True
|
||||
"""If set to False, output deltas will not be logged. Relevant only if
|
||||
--enable-log-outputs is set.
|
||||
@@ -277,6 +278,10 @@ class FrontendArgs(BaseFrontendArgs):
|
||||
Enable offline FastAPI documentation for air-gapped environments.
|
||||
Uses vendored static assets bundled with vLLM.
|
||||
"""
|
||||
use_gpu_for_pooling_score: bool = False
|
||||
"""If set, run pooling score MaxSim on GPU in the API server process.
|
||||
Can significantly improve late-interaction scoring performance.
|
||||
https://github.com/vllm-project/vllm/pull/35330"""
|
||||
|
||||
@classmethod
|
||||
def _customize_cli_kwargs(
|
||||
|
||||
Reference in New Issue
Block a user