add qwen3

This commit is contained in:
Chranos
2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
#/bin/bash
rm output/server -rf
mkdir -p output/server
PORT=32345
use_ray=0
use_pp=1
use_eager=0
eager_option=""
if [ $use_eager -gt 0 ]; then
eager_option="--enforce-eager"
fi
ray_option=""
if [ $use_ray -gt 0 ]; then
ray_option="--worker-use-ray"
ray stop --force
fi
export VLLM_ENGINE_ITERATION_TIMEOUT_S=180
MODEL_PATH="/data/vllm/sq_per_token_per_channel/deepseek_v2_temp"
if [ $use_pp -gt 0 ]; then
parallel_option="--pipeline-parallel-size=8"
else
parallel_option="--tensor-parallel-size=8"
fi
# TP8
python -m vllm.entrypoints.openai.api_server \
--disable-log-requests \
--port ${PORT} \
--model ${MODEL_PATH} \
--trust-remote-code \
--swap-space 16 \
${parallel_option} \
--max-num-batched-tokens=40960 \
--max-model-len=1034 \
--block-size=16 \
--dtype=bfloat16 \
--max-seq-len-to-capture=1034 \
--max-num-seqs=40 \
--quantization=smoothquant \
${eager_option} \
${ray_option} \
2>&1 | tee output/server/server.log