From 810874ddb81313138f1ead7142f48050da18183f Mon Sep 17 00:00:00 2001 From: Lu Xinlong Date: Fri, 26 Jun 2026 13:27:52 +0800 Subject: [PATCH] enable prefix caching --- computility-run.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/computility-run.yaml b/computility-run.yaml index ae58bdf..6447abd 100644 --- a/computility-run.yaml +++ b/computility-run.yaml @@ -10,7 +10,7 @@ command: - --max-model-len - '100000' - --gpu-memory-utilization - - '0.95' + - '0.9' - --trust-remote-code - -tp - '4' @@ -28,6 +28,7 @@ command: - qwen3_coder - --reasoning-parser - qwen3 + - --enable-prefix-caching env: - name: VLLM_ENGINE_ITERATION_TIMEOUT_S value: 3600