diff --git a/docs/source/developer_guide/contribution/testing.md b/docs/source/developer_guide/contribution/testing.md index d1773086..7781bf18 100644 --- a/docs/source/developer_guide/contribution/testing.md +++ b/docs/source/developer_guide/contribution/testing.md @@ -66,6 +66,7 @@ export DEVICE=/dev/davinci0 export IMAGE=quay.io/ascend/vllm-ascend:main docker run --rm \ --name vllm-ascend \ + --shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -101,6 +102,7 @@ pip install -r requirements-dev.txt export IMAGE=quay.io/ascend/vllm-ascend:main docker run --rm \ --name vllm-ascend \ + --shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/developer_guide/evaluation/using_evalscope.md b/docs/source/developer_guide/evaluation/using_evalscope.md index 859f1c20..24488c64 100644 --- a/docs/source/developer_guide/evaluation/using_evalscope.md +++ b/docs/source/developer_guide/evaluation/using_evalscope.md @@ -13,6 +13,7 @@ export DEVICE=/dev/davinci7 # Update the vllm-ascend image export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ +--shm-size=1g \ --name vllm-ascend \ --device $DEVICE \ --device /dev/davinci_manager \ diff --git a/docs/source/developer_guide/evaluation/using_lm_eval.md b/docs/source/developer_guide/evaluation/using_lm_eval.md index 799eff1c..2ceadd3f 100644 --- a/docs/source/developer_guide/evaluation/using_lm_eval.md +++ b/docs/source/developer_guide/evaluation/using_lm_eval.md @@ -13,6 +13,7 @@ export DEVICE=/dev/davinci7 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -141,6 +142,7 @@ export DEVICE=/dev/davinci7 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/developer_guide/evaluation/using_opencompass.md b/docs/source/developer_guide/evaluation/using_opencompass.md index 4edc292f..0a0d99f6 100644 --- a/docs/source/developer_guide/evaluation/using_opencompass.md +++ b/docs/source/developer_guide/evaluation/using_opencompass.md @@ -13,6 +13,7 @@ export DEVICE=/dev/davinci7 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/developer_guide/performance/optimization_and_tuning.md b/docs/source/developer_guide/performance/optimization_and_tuning.md index 61e761ab..cfd5e443 100644 --- a/docs/source/developer_guide/performance/optimization_and_tuning.md +++ b/docs/source/developer_guide/performance/optimization_and_tuning.md @@ -14,6 +14,7 @@ export DEVICE=/dev/davinci0 export IMAGE=m.daocloud.io/quay.io/ascend/cann:|cann_image_tag| docker run --rm \ --name performance-test \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/developer_guide/performance/performance_benchmark.md b/docs/source/developer_guide/performance/performance_benchmark.md index e08d7693..263950d4 100644 --- a/docs/source/developer_guide/performance/performance_benchmark.md +++ b/docs/source/developer_guide/performance/performance_benchmark.md @@ -12,6 +12,7 @@ export DEVICE=/dev/davinci7 export IMAGE=m.daocloud.io/quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/installation.md b/docs/source/installation.md index 03194659..f1256405 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -53,6 +53,7 @@ export DEVICE=/dev/davinci7 export IMAGE=quay.io/ascend/cann:|cann_image_tag| docker run --rm \ --name vllm-ascend-env \ + --shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -207,6 +208,7 @@ export DEVICE=/dev/davinci7 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend-env \ + --shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index d61394fa..95f7f9d1 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -23,6 +23,7 @@ export DEVICE=/dev/davinci0 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -52,6 +53,7 @@ export DEVICE=/dev/davinci0 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version|-openeuler docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device $DEVICE \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/tutorials/multi-node_dsv3.2.md b/docs/source/tutorials/multi-node_dsv3.2.md index 39633e44..5bc57c96 100644 --- a/docs/source/tutorials/multi-node_dsv3.2.md +++ b/docs/source/tutorials/multi-node_dsv3.2.md @@ -105,6 +105,7 @@ export NAME=vllm-ascend docker run --rm \ --name $NAME \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ @@ -145,6 +146,7 @@ export NAME=vllm-ascend docker run --rm \ --name $NAME \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_node.md b/docs/source/tutorials/multi_node.md index 00a61a67..5f7ae3db 100644 --- a/docs/source/tutorials/multi_node.md +++ b/docs/source/tutorials/multi_node.md @@ -70,6 +70,7 @@ export NAME=vllm-ascend docker run --rm \ --name $NAME \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_node_kimi.md b/docs/source/tutorials/multi_node_kimi.md index 09ee2ca2..b8502a39 100644 --- a/docs/source/tutorials/multi_node_kimi.md +++ b/docs/source/tutorials/multi_node_kimi.md @@ -18,6 +18,7 @@ export NAME=vllm-ascend docker run --rm \ --name $NAME \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_node_qwen3vl.md b/docs/source/tutorials/multi_node_qwen3vl.md index 4ebedfaf..25be3281 100644 --- a/docs/source/tutorials/multi_node_qwen3vl.md +++ b/docs/source/tutorials/multi_node_qwen3vl.md @@ -18,6 +18,7 @@ export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_node_ray.md b/docs/source/tutorials/multi_node_ray.md index ad1a8d61..d8ae41bb 100644 --- a/docs/source/tutorials/multi_node_ray.md +++ b/docs/source/tutorials/multi_node_ray.md @@ -65,6 +65,7 @@ export NAME=vllm-ascend docker run --rm \ --name $NAME \ --net=host \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_npu.md b/docs/source/tutorials/multi_npu.md index e59b7256..cbca7559 100644 --- a/docs/source/tutorials/multi_npu.md +++ b/docs/source/tutorials/multi_npu.md @@ -10,6 +10,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_npu_moge.md b/docs/source/tutorials/multi_npu_moge.md index 7fc85e10..135cdab7 100644 --- a/docs/source/tutorials/multi_npu_moge.md +++ b/docs/source/tutorials/multi_npu_moge.md @@ -10,6 +10,7 @@ Run container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_npu_quantization.md b/docs/source/tutorials/multi_npu_quantization.md index 63bc489f..a1f3a3be 100644 --- a/docs/source/tutorials/multi_npu_quantization.md +++ b/docs/source/tutorials/multi_npu_quantization.md @@ -11,6 +11,7 @@ w8a8 quantization feature is supported by v0.8.4rc2 or higher export IMAGE=m.daocloud.io/quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_npu_qwen3_moe.md b/docs/source/tutorials/multi_npu_qwen3_moe.md index 4ac9bcad..4b163ec2 100644 --- a/docs/source/tutorials/multi_npu_qwen3_moe.md +++ b/docs/source/tutorials/multi_npu_qwen3_moe.md @@ -10,6 +10,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/multi_npu_qwen3_next.md b/docs/source/tutorials/multi_npu_qwen3_next.md index 4fa58611..d7e9d736 100644 --- a/docs/source/tutorials/multi_npu_qwen3_next.md +++ b/docs/source/tutorials/multi_npu_qwen3_next.md @@ -13,6 +13,7 @@ Run docker container: # Update the vllm-ascend image export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ +--shm-size=1g \ --name vllm-ascend-qwen3 \ --device /dev/davinci0 \ --device /dev/davinci1 \ diff --git a/docs/source/tutorials/single_node_300i.md b/docs/source/tutorials/single_node_300i.md index 4109495f..f877591a 100644 --- a/docs/source/tutorials/single_node_300i.md +++ b/docs/source/tutorials/single_node_300i.md @@ -17,6 +17,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:v0.10.0rc1-310p docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci1 \ --device /dev/davinci2 \ diff --git a/docs/source/tutorials/single_npu.md b/docs/source/tutorials/single_npu.md index fc8a2669..e1ec1ce3 100644 --- a/docs/source/tutorials/single_npu.md +++ b/docs/source/tutorials/single_npu.md @@ -12,6 +12,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -117,6 +118,7 @@ Run docker container to start the vLLM server on a single NPU: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -143,6 +145,7 @@ vllm serve Qwen/Qwen3-8B --max_model_len 26240 export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/tutorials/single_npu_audio.md b/docs/source/tutorials/single_npu_audio.md index 137d7611..c04c4882 100644 --- a/docs/source/tutorials/single_npu_audio.md +++ b/docs/source/tutorials/single_npu_audio.md @@ -12,6 +12,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/tutorials/single_npu_multimodal.md b/docs/source/tutorials/single_npu_multimodal.md index a678ec71..45aeeaa7 100644 --- a/docs/source/tutorials/single_npu_multimodal.md +++ b/docs/source/tutorials/single_npu_multimodal.md @@ -12,6 +12,7 @@ Run docker container: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ @@ -128,6 +129,7 @@ Run docker container to start the vLLM server on a single NPU: export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/tutorials/single_npu_qwen3_embedding.md b/docs/source/tutorials/single_npu_qwen3_embedding.md index b4325097..4462d8eb 100644 --- a/docs/source/tutorials/single_npu_qwen3_embedding.md +++ b/docs/source/tutorials/single_npu_qwen3_embedding.md @@ -12,6 +12,7 @@ Take Qwen3-Embedding-8B model as an example, first run the docker container with export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ diff --git a/docs/source/tutorials/single_npu_qwen3_quantization.md b/docs/source/tutorials/single_npu_qwen3_quantization.md index 30924808..46b84322 100644 --- a/docs/source/tutorials/single_npu_qwen3_quantization.md +++ b/docs/source/tutorials/single_npu_qwen3_quantization.md @@ -11,6 +11,7 @@ w4a8 quantization feature is supported by v0.9.1rc2 or higher export IMAGE=m.daocloud.io/quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run --rm \ --name vllm-ascend \ +--shm-size=1g \ --device /dev/davinci0 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \