Add README and start commands
This commit is contained in:
33
computility-run.yaml
Normal file
33
computility-run.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
gpu_num: 4
|
||||
command:
|
||||
- python3
|
||||
- -m
|
||||
- vllm.entrypoints.openai.api_server
|
||||
- --model
|
||||
- /model
|
||||
- --served-model-name
|
||||
- llm
|
||||
- --max-model-len
|
||||
- '100000'
|
||||
- --gpu-memory-utilization
|
||||
- '0.95'
|
||||
- --trust-remote-code
|
||||
- -tp
|
||||
- '4'
|
||||
- --max-num-seqs
|
||||
- '1'
|
||||
- --disable-log-requests
|
||||
- --disable-frontend-multiprocessing
|
||||
- --max-num-batched-tokens
|
||||
- '4096'
|
||||
- --enable-chunked-prefill
|
||||
- --max-seq-len-to-capture
|
||||
- '32768'
|
||||
- --enable-auto-tool-choice
|
||||
- --tool-call-parser
|
||||
- qwen3_coder
|
||||
- --reasoning-parser
|
||||
- qwen3
|
||||
env:
|
||||
- name: VLLM_ENGINE_ITERATION_TIMEOUT_S
|
||||
value: 3600
|
||||
Reference in New Issue
Block a user