diff --git a/docs/source/conf.py b/docs/source/conf.py index 393e674..40a1ad9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -61,14 +61,17 @@ myst_enable_extensions = ["colon_fence", "substitution"] # Change this when cut down release myst_substitutions = { # the branch of vllm, used in vllm clone - # such as 'main', 'v0.7.1' + # - main branch: 'main' + # - vX.Y.Z branch: 'vX.Y.Z' 'vllm_version': 'main', # the branch of vllm-ascend, used in vllm-ascend clone and image tag - # such as 'main', 'v0.7.1-dev', 'v0.7.1rc1' + # - main branch: 'main' + # - vX.Y.Z branch: latest vllm-ascend release tag 'vllm_ascend_version': 'main', - # the newest release version of vllm, used in quick start or container image tag. + # the newest release version of vllm-ascend and matched vLLM, used in pip install. # This value should be updated when cut down release. - 'vllm_newest_release_version': "v0.7.1rc1", + 'pip_vllm_ascend_version': "v0.7.1rc1", + 'pip_vllm_version': "v0.7.1", } # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/installation.md b/docs/source/installation.md index ea0c26c..ae1caa9 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -114,15 +114,18 @@ Once it's done, you can start to set up `vllm` and `vllm-ascend`. You can install `vllm` and `vllm-ascend` from **pre-built wheel**: -```bash +```{code-block} bash + :substitutions: + # Install vllm from source, since `pip install vllm` doesn't work on CPU currently. # It'll be fixed in the next vllm release, e.g. v0.7.3. -git clone --branch v0.7.1 https://github.com/vllm-project/vllm +git clone --branch |pip_vllm_version| https://github.com/vllm-project/vllm + cd vllm VLLM_TARGET_DEVICE=empty pip install . --extra-index https://download.pytorch.org/whl/cpu/ # Install vllm-ascend from pypi. -pip install vllm-ascend --extra-index https://download.pytorch.org/whl/cpu/ +pip install vllm-ascend==|pip_vllm_ascend_version| --extra-index https://download.pytorch.org/whl/cpu/ # Once the packages are installed, you need to install `torch-npu` manually, # because that vllm-ascend relies on an unreleased version of torch-npu. @@ -146,7 +149,7 @@ or build from **source code**: ```{code-block} bash :substitutions: -git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm +git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm cd vllm VLLM_TARGET_DEVICE=empty pip install . --extra-index https://download.pytorch.org/whl/cpu/ diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index d3a7fd2..ff3a9f3 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -12,7 +12,7 @@ :substitutions: # You can change version a suitable one base on your requirement, e.g. main -export IMAGE=quay.io/ascend:|vllm_newest_release_version| +export IMAGE=quay.io/ascend/vllm-ascend:|vllm_ascend_version| docker run \ --name vllm-ascend \ diff --git a/docs/source/tutorials.md b/docs/source/tutorials.md index d400855..9428a32 100644 --- a/docs/source/tutorials.md +++ b/docs/source/tutorials.md @@ -6,7 +6,8 @@ Run docker container: -```bash +```{code-block} bash + :substitutions: docker run \ --name vllm-ascend \ --device /dev/davinci0 \ @@ -20,7 +21,7 @@ docker run \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -v /root/.cache:/root/.cache \ -p 8000:8000 \ --it quay.io/ascend/vllm-ascend:v0.7.1rc1 bash +-it quay.io/ascend/vllm-ascend:|vllm_ascend_version| bash ``` Setup environment variables: @@ -67,7 +68,9 @@ Prompt: 'The future of AI is', Generated text: ' following you. As the technolog Run docker container to start the vLLM server on a single NPU: -```bash +```{code-block} bash + :substitutions: + docker run \ --name vllm-ascend \ --device /dev/davinci0 \ @@ -83,7 +86,7 @@ docker run \ -p 8000:8000 \ -e VLLM_USE_MODELSCOPE=True \ -e PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 \ --it quay.io/ascend/vllm-ascend:v0.7.1rc1 \ +-it quay.io/ascend/vllm-ascend:|vllm_ascend_version| \ vllm serve Qwen/Qwen2.5-7B-Instruct --max_model_len 26240 ``` @@ -131,7 +134,9 @@ INFO 02-13 08:34:35 logger.py:39] Received request cmpl-574f00e342904692a73fb6c1 Run docker container: -```bash +```{code-block} bash + :substitutions: + docker run \ --name vllm-ascend \ --device /dev/davinci0 \ @@ -146,7 +151,7 @@ docker run \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -v /root/.cache:/root/.cache \ -p 8000:8000 \ --it quay.io/ascend/vllm-ascend:v0.7.1rc1 bash +-it quay.io/ascend/vllm-ascend:|vllm_ascend_version| bash ``` Setup environment variables: