From 30d08ced2d46d1d2fd548a498cbd36e82c6b8cd0 Mon Sep 17 00:00:00 2001 From: pz1116 <47019764+Pz1116@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:07:47 +0800 Subject: [PATCH] [Doc][0.18.0] Fix kv pool CLI flag typo and formatting (#8608) ### What this PR does / why we need it? Fix kv pool CLI flag typo and formatting ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Signed-off-by: Pz1116 --- .../user_guide/feature_guide/kv_pool.md | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/source/user_guide/feature_guide/kv_pool.md b/docs/source/user_guide/feature_guide/kv_pool.md index 8edb4d73..886bcbb9 100644 --- a/docs/source/user_guide/feature_guide/kv_pool.md +++ b/docs/source/user_guide/feature_guide/kv_pool.md @@ -153,7 +153,7 @@ The content of the multi_producer.sh script: ```shell export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH -export PYTHONHASHSEED=0  +export PYTHONHASHSEED=0 export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json" export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 @@ -232,7 +232,7 @@ The content of multi_consumer.sh: ```shell export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm -export PYTHONHASHSEED=0  +export PYTHONHASHSEED=0 export MOONCAKE_CONFIG_PATH="/xxxxx/mooncake.json" export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 export ACL_OP_INIT_MODE=1 @@ -349,7 +349,7 @@ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packa export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json" export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 -export PYTHONHASHSEED=0  +export PYTHONHASHSEED=0 export ACL_OP_INIT_MODE=1 #A3 export ASCEND_ENABLE_USE_FABRIC_MEM=1 @@ -415,10 +415,10 @@ This is because HCCL one-sided communication connections are created lazily afte ### Configuring the memcache Config File - config Path:/usr/local/memcache_hybrid/latest/config/ -    **Config file parameters description**: +config Path:/usr/local/memcache_hybrid/latest/config/ +**Config file parameters description**: -    Set TLS certificate configurations. If TLS is disabled, you do not need to upload a certificate. If TLS is enabled, you need to upload a certificate. +Set TLS certificate configurations. If TLS is disabled, you do not need to upload a certificate. If TLS is enabled, you need to upload a certificate. ```shell # mmc-meta.conf @@ -650,7 +650,7 @@ vllm serve xxxxxxx/Qwen3-32B \ --max-num-batched-tokens 16384 \ --trust-remote-code \ --gpu-memory-utilization 0.9 \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --no-enable-prefix-caching \ --kv-transfer-config \ '{ @@ -729,7 +729,7 @@ vllm serve xxxxxxx/Qwen3-32B \ --max-num-batched-tokens 16384 \ --trust-remote-code \ --gpu-memory-utilization 0.9 \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --no-enable-prefix-caching \ --kv-transfer-config \ '{ @@ -796,7 +796,7 @@ python -m vllm.entrypoints.openai.api_server \ --data-parallel-size 2 \ --tensor-parallel-size 8 \ --port 30050 \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --max-model-len 32768 \ --max-num-batched-tokens 16384 \ --enable_expert_parallel \ @@ -869,7 +869,7 @@ python -m vllm.entrypoints.openai.api_server \ --enforce-eager\ --quantization ascend \ --no-enable-prefix-caching \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \ --enable_expert_parallel \ --gpu-memory-utilization 0.9 \ @@ -967,7 +967,7 @@ vllm serve xxxxxxx/DeepSeek-R1 \ --trust-remote-code \ --gpu-memory-utilization 0.9 \ --quantization ascend \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --enable-expert-parallel \ --no-enable-prefix-caching \ --kv-transfer-config \ @@ -1032,7 +1032,7 @@ vllm serve xxxxxxx/DeepSeek-R1 \ --trust-remote-code \ --gpu-memory-utilization 0.9 \ --quantization ascend \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --enable-expert-parallel \ --no-enable-prefix-caching \ --kv-transfer-config \ @@ -1081,7 +1081,7 @@ python -m vllm.entrypoints.openai.api_server \ -dp 2 \ -tp 8 \ --port 30050 \ - --max-num_seqs 20 \ + --max-num-seqs 20 \ --max-model-len 32768 \ --max-num-batched-tokens 16384 \ --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \