Add README and start commands

2026-06-23 17:17:22 +08:00
parent b5806731e0
commit 72aa7e690a
3 changed files with 63 additions and 144 deletions
--- a/computility-run.yaml
+++ b/computility-run.yaml
@@ -0,0 +1,33 @@
+gpu_num: 4
+command:
+    - python3
+    - -m
+    - vllm.entrypoints.openai.api_server
+    - --model
+    - /model
+    - --served-model-name
+    - llm
+    - --max-model-len
+    - '100000'
+    - --gpu-memory-utilization
+    - '0.95'
+    - --trust-remote-code
+    - -tp
+    - '4'
+    - --max-num-seqs
+    - '1'
+    - --disable-log-requests
+    - --disable-frontend-multiprocessing
+    - --max-num-batched-tokens
+    - '4096'
+    - --enable-chunked-prefill
+    - --max-seq-len-to-capture
+    - '32768'
+    - --enable-auto-tool-choice
+    - --tool-call-parser
+    - qwen3_coder
+    - --reasoning-parser
+    - qwen3
+env:
+    - name: VLLM_ENGINE_ITERATION_TIMEOUT_S
+      value: 3600