From 41aba1cfc11c925e75d221bbb8b8121028e4729f Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 13 Mar 2025 15:24:05 +0800 Subject: [PATCH] [Doc]Fix tutorial doc expression (#319) Fix tutorial doc expression Signed-off-by: wangli --- docs/source/tutorials/multi_npu.md | 4 ++-- docs/source/tutorials/single_npu.md | 4 ++-- docs/source/tutorials/single_npu_multimodal.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/tutorials/multi_npu.md b/docs/source/tutorials/multi_npu.md index c1e5680..e59b725 100644 --- a/docs/source/tutorials/multi_npu.md +++ b/docs/source/tutorials/multi_npu.md @@ -30,10 +30,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ``` diff --git a/docs/source/tutorials/single_npu.md b/docs/source/tutorials/single_npu.md index d5bba0a..63e9331 100644 --- a/docs/source/tutorials/single_npu.md +++ b/docs/source/tutorials/single_npu.md @@ -29,10 +29,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ``` diff --git a/docs/source/tutorials/single_npu_multimodal.md b/docs/source/tutorials/single_npu_multimodal.md index 2d2d531..c893090 100644 --- a/docs/source/tutorials/single_npu_multimodal.md +++ b/docs/source/tutorials/single_npu_multimodal.md @@ -29,10 +29,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ```