enable prefix caching

This commit is contained in:
2026-06-26 13:27:52 +08:00
parent c84151eef9
commit 810874ddb8

View File

@@ -10,7 +10,7 @@ command:
- --max-model-len - --max-model-len
- '100000' - '100000'
- --gpu-memory-utilization - --gpu-memory-utilization
- '0.95' - '0.9'
- --trust-remote-code - --trust-remote-code
- -tp - -tp
- '4' - '4'
@@ -28,6 +28,7 @@ command:
- qwen3_coder - qwen3_coder
- --reasoning-parser - --reasoning-parser
- qwen3 - qwen3
- --enable-prefix-caching
env: env:
- name: VLLM_ENGINE_ITERATION_TIMEOUT_S - name: VLLM_ENGINE_ITERATION_TIMEOUT_S
value: 3600 value: 3600