diff --git a/docs/source/conf.py b/docs/source/conf.py index be147ca..5d8f199 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -67,10 +67,10 @@ myst_substitutions = { # the branch of vllm-ascend, used in vllm-ascend clone and image tag # - main branch: 'main' # - vX.Y.Z branch: latest vllm-ascend release tag - 'vllm_ascend_version': 'v0.8.4rc1', + 'vllm_ascend_version': 'v0.8.4rc2', # the newest release version of vllm-ascend and matched vLLM, used in pip install. # This value should be updated when cut down release. - 'pip_vllm_ascend_version': "0.8.4rc1", + 'pip_vllm_ascend_version': "0.8.4rc2", 'pip_vllm_version': "0.8.4", # CANN image tag 'cann_image_tag': "8.0.0-910b-ubuntu22.04-py3.10", diff --git a/docs/source/developer_guide/versioning_policy.md b/docs/source/developer_guide/versioning_policy.md index ec5ff80..bfa4b50 100644 --- a/docs/source/developer_guide/versioning_policy.md +++ b/docs/source/developer_guide/versioning_policy.md @@ -80,6 +80,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | |--------------|--------------|----------------| --- | --- | +| v0.8.4rc2 | v0.8.4 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1 | | v0.8.4rc1 | v0.8.4 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | | v0.7.3rc2 | v0.7.3 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | | v0.7.3rc1 | v0.7.3 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250308 | @@ -92,6 +93,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | Date | Event | |------------|-------------------------------------------| | End of 2025.04 | v0.7.x Final release, v0.7.3 | +| 2025.04.28 | Release candidates, v0.8.4rc2 | | 2025.04.18 | Release candidates, v0.8.4rc1 | | 2025.03.28 | Release candidates, v0.7.3rc2 | | 2025.03.14 | Release candidates, v0.7.3rc1 | diff --git a/docs/source/faqs.md b/docs/source/faqs.md index fbe9883..288689b 100644 --- a/docs/source/faqs.md +++ b/docs/source/faqs.md @@ -6,6 +6,7 @@ - [[v0.7.3rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/267) - [[v0.7.3rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/418) - [[v0.8.4rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/546) +- [[v0.8.4rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/707) ## General FAQs diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md index 6cef9de..7597a13 100644 --- a/docs/source/user_guide/release_notes.md +++ b/docs/source/user_guide/release_notes.md @@ -1,5 +1,25 @@ # Release note +## v0.8.4rc2 + +This is the second release candidate of v0.8.4 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the journey. Some experimental features are included in this version, such as W8A8 quantization and EP/DP support. We'll make them stable enough in the next release. + +### Highlights +- Qwen3 and Qwen3MOE is supported now. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/latest/tutorials/single_npu.html) to run the quick demo. [#709](https://github.com/vllm-project/vllm-ascend/pull/709) +- Ascend W8A8 quantization method is supported now. Please take the [official doc](https://vllm-ascend.readthedocs.io/en/latest/tutorials/multi_npu_quantization.html) for example. Any [feedback](https://github.com/vllm-project/vllm-ascend/issues/619) is welcome. [#580](https://github.com/vllm-project/vllm-ascend/pull/580) +- DeepSeek V3/R1 works with DP, TP and MTP now. Please note that it's still in experimental status. Let us know if you hit any problem. [#429](https://github.com/vllm-project/vllm-ascend/pull/429) [#585](https://github.com/vllm-project/vllm-ascend/pull/585) [#626](https://github.com/vllm-project/vllm-ascend/pull/626) [#636](https://github.com/vllm-project/vllm-ascend/pull/636) [#671](https://github.com/vllm-project/vllm-ascend/pull/671) + +### Core +- ACLGraph feature is supported with V1 engine now. It's disabled by default because this feature rely on CANN 8.1 release. We'll make it avaiable by default in the next release [#426](https://github.com/vllm-project/vllm-ascend/pull/426) +- Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version of torch-npu now. Now users don't need to install the torch-npu by hand. The 2.5.1 version of torch-npu will be installed automaticlly. [#661](https://github.com/vllm-project/vllm-ascend/pull/661) + +### Other +- MiniCPM model works now. [#645](https://github.com/vllm-project/vllm-ascend/pull/645) +- openEuler container image supported with `v0.8.4-openeuler` tag and customs Ops build is enabled by default for openEuler OS. [#689](https://github.com/vllm-project/vllm-ascend/pull/689) +- Fix ModuleNotFoundError bug to make Lora work [#600](https://github.com/vllm-project/vllm-ascend/pull/600) +- Add "Using EvalScope evaluation" doc [#611](https://github.com/vllm-project/vllm-ascend/pull/611) +- Add a `VLLM_VERSION` environment to make vLLM version configurable to help developer set correct vLLM version if the code of vLLM is changed by hand locally. [#651](https://github.com/vllm-project/vllm-ascend/pull/651) + ## v0.8.4rc1 This is the first release candidate of v0.8.4 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the journey. From this version, vllm-ascend will follow the newest version of vllm and release every two weeks. For example, if vllm releases v0.8.5 in the next two weeks, vllm-ascend will release v0.8.5rc1 instead of v0.8.4rc2. Please find the detail from the [official documentation](https://vllm-ascend.readthedocs.io/en/latest/developer_guide/versioning_policy.html#release-window).