enable prefix caching
This commit is contained in:
@@ -10,7 +10,7 @@ command:
|
||||
- --max-model-len
|
||||
- '100000'
|
||||
- --gpu-memory-utilization
|
||||
- '0.95'
|
||||
- '0.9'
|
||||
- --trust-remote-code
|
||||
- -tp
|
||||
- '4'
|
||||
@@ -28,6 +28,7 @@ command:
|
||||
- qwen3_coder
|
||||
- --reasoning-parser
|
||||
- qwen3
|
||||
- --enable-prefix-caching
|
||||
env:
|
||||
- name: VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||
value: 3600
|
||||
|
||||
Reference in New Issue
Block a user