diff --git a/docs/source/tutorials/multi_npu.md b/docs/source/tutorials/multi_npu.md index c1e5680..e59b725 100644 --- a/docs/source/tutorials/multi_npu.md +++ b/docs/source/tutorials/multi_npu.md @@ -30,10 +30,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ``` diff --git a/docs/source/tutorials/single_npu.md b/docs/source/tutorials/single_npu.md index d5bba0a..63e9331 100644 --- a/docs/source/tutorials/single_npu.md +++ b/docs/source/tutorials/single_npu.md @@ -29,10 +29,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ``` diff --git a/docs/source/tutorials/single_npu_multimodal.md b/docs/source/tutorials/single_npu_multimodal.md index 2d2d531..c893090 100644 --- a/docs/source/tutorials/single_npu_multimodal.md +++ b/docs/source/tutorials/single_npu_multimodal.md @@ -29,10 +29,10 @@ docker run --rm \ Setup environment variables: ```bash -# Use Modelscope mirror to speed up model download +# Load model from ModelScope to speed up download export VLLM_USE_MODELSCOPE=True -# To avoid NPU out of memory, set `max_split_size_mb` to any value lower than you need to allocate for Qwen2.5-7B-Instruct +# Set `max_split_size_mb` to reduce memory fragmentation and avoid out of memory export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 ```