diff --git a/computility-run.yaml b/computility-run.yaml index ae58bdf..6447abd 100644 --- a/computility-run.yaml +++ b/computility-run.yaml @@ -10,7 +10,7 @@ command: - --max-model-len - '100000' - --gpu-memory-utilization - - '0.95' + - '0.9' - --trust-remote-code - -tp - '4' @@ -28,6 +28,7 @@ command: - qwen3_coder - --reasoning-parser - qwen3 + - --enable-prefix-caching env: - name: VLLM_ENGINE_ITERATION_TIMEOUT_S value: 3600