enginex-vllm-bi100-qwen36/computility-run.yaml

gpu_num: 4
command:
    - python3
    - -m
    - vllm.entrypoints.openai.api_server
    - --model
    - /model
    - --served-model-name
    - llm
    - --max-model-len
    - '100000'
    - --gpu-memory-utilization
    - '0.95'
    - --trust-remote-code
    - -tp
    - '4'
    - --max-num-seqs
    - '1'
    - --disable-log-requests
    - --disable-frontend-multiprocessing
    - --max-num-batched-tokens
    - '4096'
    - --enable-chunked-prefill
    - --max-seq-len-to-capture
    - '32768'
    - --enable-auto-tool-choice
    - --tool-call-parser
    - qwen3_coder
    - --reasoning-parser
    - qwen3
env:
    - name: VLLM_ENGINE_ITERATION_TIMEOUT_S
      value: 3600
Add README and start commands 2026-06-23 17:17:22 +08:00			`gpu_num: 4`
			`command:`
			`- python3`
			`- -m`
			`- vllm.entrypoints.openai.api_server`
			`- --model`
			`- /model`
			`- --served-model-name`
			`- llm`
			`- --max-model-len`
			`- '100000'`
			`- --gpu-memory-utilization`
			`- '0.95'`
			`- --trust-remote-code`
			`- -tp`
			`- '4'`
			`- --max-num-seqs`
			`- '1'`
			`- --disable-log-requests`
			`- --disable-frontend-multiprocessing`
			`- --max-num-batched-tokens`
			`- '4096'`
			`- --enable-chunked-prefill`
			`- --max-seq-len-to-capture`
			`- '32768'`
			`- --enable-auto-tool-choice`
			`- --tool-call-parser`
			`- qwen3_coder`
			`- --reasoning-parser`
			`- qwen3`
			`env:`
			`- name: VLLM_ENGINE_ITERATION_TIMEOUT_S`
			`value: 3600`