enable prefix caching

2026-06-26 13:27:52 +08:00
parent c84151eef9
commit 810874ddb8
1 changed files with 2 additions and 1 deletions
--- a/computility-run.yaml
+++ b/computility-run.yaml
@@ -10,7 +10,7 @@ command:
    - --max-model-len
    - '100000'
    - --gpu-memory-utilization
-    - '0.95'
+    - '0.9'
    - --trust-remote-code
    - -tp
    - '4'
@@ -28,6 +28,7 @@ command:
    - qwen3_coder
    - --reasoning-parser
    - qwen3
    - --enable-prefix-caching
 env:
    - name: VLLM_ENGINE_ITERATION_TIMEOUT_S
      value: 3600