From ec27af346ae812f4bd29565d3eb46f0dfc653ba9 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Tue, 6 May 2025 23:46:35 +0800 Subject: [PATCH] [Doc] Add 0.8.5rc1 release note (#756) ### What this PR does / why we need it? Add 0.8.5rc1 release note and bump vllm version to v0.8.5.post1 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed --------- Signed-off-by: Yikun Jiang --- README.md | 2 +- docs/source/conf.py | 8 +++---- .../developer_guide/versioning_policy.md | 4 +++- docs/source/faqs.md | 1 + docs/source/installation.md | 15 ++---------- docs/source/user_guide/release_notes.md | 23 +++++++++++++++++++ docs/source/user_guide/suppoted_features.md | 18 +++++++++------ 7 files changed, 45 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 9cbaf51..3db2c65 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l - OS: Linux - Software: * Python >= 3.9, < 3.12 - * CANN >= 8.1.rc1 + * CANN >= 8.1.RC1 * PyTorch >= 2.5.1, torch-npu >= 2.5.1 * vLLM (the same version as vllm-ascend) diff --git a/docs/source/conf.py b/docs/source/conf.py index 1dc45f7..09abc2d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -63,15 +63,15 @@ myst_substitutions = { # the branch of vllm, used in vllm clone # - main branch: 'main' # - vX.Y.Z branch: 'vX.Y.Z' - 'vllm_version': 'v0.8.4', + 'vllm_version': 'v0.8.5.post1', # the branch of vllm-ascend, used in vllm-ascend clone and image tag # - main branch: 'main' # - vX.Y.Z branch: latest vllm-ascend release tag - 'vllm_ascend_version': 'v0.8.4rc2', + 'vllm_ascend_version': 'v0.8.5rc1', # the newest release version of vllm-ascend and matched vLLM, used in pip install. # This value should be updated when cut down release. - 'pip_vllm_ascend_version': "0.8.4rc2", - 'pip_vllm_version': "0.8.4", + 'pip_vllm_ascend_version': "0.8.5rc1", + 'pip_vllm_version': "0.8.5.post1", # CANN image tag 'cann_image_tag': "8.1.rc1-910b-ubuntu22.04-py3.10", } diff --git a/docs/source/developer_guide/versioning_policy.md b/docs/source/developer_guide/versioning_policy.md index bfa4b50..4397890 100644 --- a/docs/source/developer_guide/versioning_policy.md +++ b/docs/source/developer_guide/versioning_policy.md @@ -80,6 +80,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | |--------------|--------------|----------------| --- | --- | +| v0.8.5rc1 | v0.8.5.post1 | >= 3.9, < 3.12 | 8.1.RC1 | 2.5.1 / 2.5.1 | | v0.8.4rc2 | v0.8.4 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1 | | v0.8.4rc1 | v0.8.4 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | | v0.7.3rc2 | v0.7.3 | >= 3.9, < 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | @@ -92,7 +93,8 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | Date | Event | |------------|-------------------------------------------| -| End of 2025.04 | v0.7.x Final release, v0.7.3 | +| Early of 2025.05 | v0.7.x Final release, v0.7.3 | +| 2025.05.06 | Release candidates, v0.8.5rc1 | | 2025.04.28 | Release candidates, v0.8.4rc2 | | 2025.04.18 | Release candidates, v0.8.4rc1 | | 2025.03.28 | Release candidates, v0.7.3rc2 | diff --git a/docs/source/faqs.md b/docs/source/faqs.md index f954098..d4326fa 100644 --- a/docs/source/faqs.md +++ b/docs/source/faqs.md @@ -7,6 +7,7 @@ - [[v0.7.3rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/418) - [[v0.8.4rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/546) - [[v0.8.4rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/707) +- [[v0.8.5rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/754) ## General FAQs diff --git a/docs/source/installation.md b/docs/source/installation.md index 6314e31..8fbe182 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -11,7 +11,7 @@ This document describes how to install vllm-ascend manually. | Software | Supported version | Note | |-----------|-------------------|----------------------------------------| - | CANN | >= 8.1.rc1 | Required for vllm-ascend and torch-npu | + | CANN | >= 8.1.RC1 | Required for vllm-ascend and torch-npu | | torch-npu | >= 2.5.1 | Required for vllm-ascend | | torch | >= 2.5.1 | Required for torch-npu and vllm | @@ -135,23 +135,12 @@ Then you can install `vllm` and `vllm-ascend` from **pre-built wheel**: :substitutions: # Install vllm-project/vllm from pypi -# (v0.8.4 aarch64 is unsupported see detail in below note) -# pip install vllm==|pip_vllm_version| -# Install vLLM -git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm -cd vllm -VLLM_TARGET_DEVICE=empty pip install -v -e . -cd .. +pip install vllm==|pip_vllm_version| # Install vllm-project/vllm-ascend from pypi. pip install vllm-ascend==|pip_vllm_ascend_version| ``` -```{note} -There was a installation bug on vLLM v0.8.4 aarch64: [No matching distribution found for triton](https://github.com/vllm-project/vllm-ascend/issues/581). -If you failed to install vLLM due to it, please build from source code. -``` - :::{dropdown} Click here to see "Build from source code" or build from **source code**: diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md index 7597a13..a8dce9a 100644 --- a/docs/source/user_guide/release_notes.md +++ b/docs/source/user_guide/release_notes.md @@ -1,5 +1,28 @@ # Release note +## v0.8.5rc1 + +This is the 1st release candidate of v0.8.5 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the journey. Now you can enable V1 egnine by setting the environment variable `VLLM_USE_V1=1`, see the feature support status of vLLM Ascend in [here](https://vllm-ascend.readthedocs.io/en/latest/user_guide/suppoted_features.html). + +### Highlights +- Upgrade CANN version to 8.1.RC1 to support chunked prefill and automatic prefix caching (`--enable_prefix_caching`) when V1 is enabled [#747](https://github.com/vllm-project/vllm-ascend/pull/747) +- Optimize Qwen2 VL and Qwen 2.5 VL [#701](https://github.com/vllm-project/vllm-ascend/pull/701) +- Improve Deepseek V3 eager mode and graph mode performance, now you can use --additional_config={'enable_graph_mode': True} to enable graph mode. [#598](https://github.com/vllm-project/vllm-ascend/pull/598) [#719](https://github.com/vllm-project/vllm-ascend/pull/719) + +### Core +- Upgrade vLLM to 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-ascend/pull/715) +- Fix early return in CustomDeepseekV2MoE.forward during profile_run [#682](https://github.com/vllm-project/vllm-ascend/pull/682) +- Adapts for new quant model generated by modelslim [#719](https://github.com/vllm-project/vllm-ascend/pull/719) +- Initial support on P2P Disaggregated Prefill based on llm_datadist [#694](https://github.com/vllm-project/vllm-ascend/pull/694) +- Use `/vllm-workspace` as code path and include `.git` in container image to fix issue when start vllm under `/workspace` [#726](https://github.com/vllm-project/vllm-ascend/pull/726) +- Optimize NPU memory usage to make DeepSeek R1 W8A8 32K model len work. [#728](https://github.com/vllm-project/vllm-ascend/pull/728) +- Fix `PYTHON_INCLUDE_PATH` typo in setup.py [#762](https://github.com/vllm-project/vllm-ascend/pull/762) + +### Other +- Add Qwen3-0.6B test [#717](https://github.com/vllm-project/vllm-ascend/pull/717) +- Add nightly CI [#668](https://github.com/vllm-project/vllm-ascend/pull/668) +- Add accuracy test report [#542](https://github.com/vllm-project/vllm-ascend/pull/542) + ## v0.8.4rc2 This is the second release candidate of v0.8.4 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the journey. Some experimental features are included in this version, such as W8A8 quantization and EP/DP support. We'll make them stable enough in the next release. diff --git a/docs/source/user_guide/suppoted_features.md b/docs/source/user_guide/suppoted_features.md index 7f45206..e181bee 100644 --- a/docs/source/user_guide/suppoted_features.md +++ b/docs/source/user_guide/suppoted_features.md @@ -6,18 +6,18 @@ You can check the [support status of vLLM V1 Engine][v1_user_guide]. Below is th | Feature | vLLM V0 Engine | vLLM V1 Engine | Next Step | |-------------------------------|----------------|----------------|------------------------------------------------------------------------| -| Chunked Prefill | 🚧 WIP | 🚧 WIP | Functional, waiting for CANN 8.1 nnal package release | -| Automatic Prefix Caching | 🚧 WIP | 🚧 WIP | Functional, waiting for CANN 8.1 nnal package release | +| Chunked Prefill | 🚧 WIP | 🟢 Functional | Functional, see detail note: [Chunked Prefill][cp] | +| Automatic Prefix Caching | 🚧 WIP | 🟢 Functional | Functional, see detail note: [vllm-ascend#732][apc] | | LoRA | 🟢 Functional | 🚧 WIP | [vllm-ascend#396][multilora], CI needed, working on V1 support | -| Prompt adapter | No plan | 🟡 Planned | Plan in 2025.06.30 | +| Prompt adapter | 🔴 No plan | 🟡 Planned | Plan in 2025.06.30 | | Speculative decoding | 🟢 Functional | 🚧 WIP | CI needed; working on V1 support | -| Pooling | 🟢 Functional | 🟢 Functional | CI needed and adapting more models; V1 support rely on vLLM support. | +| Pooling | 🟢 Functional | 🟡 Planned | CI needed and adapting more models; V1 support rely on vLLM support. | | Enc-dec | 🔴 NO plan | 🟡 Planned | Plan in 2025.06.30 | | Multi Modality | 🟢 Functional | 🟢 Functional | [Tutorial][multimodal], optimizing and adapting more models | | LogProbs | 🟢 Functional | 🟢 Functional | CI needed | | Prompt logProbs | 🟢 Functional | 🟢 Functional | CI needed | | Async output | 🟢 Functional | 🟢 Functional | CI needed | -| Multi step scheduler | 🟢 Functional | 🔴 Deprecated | [vllm#8779][v1_rfc], replaced by [vLLM V1 Scheduler][v1_scheduler]) | +| Multi step scheduler | 🟢 Functional | 🔴 Deprecated | [vllm#8779][v1_rfc], replaced by [vLLM V1 Scheduler][v1_scheduler] | | Best of | 🟢 Functional | 🔴 Deprecated | [vllm#13361][best_of], CI needed | | Beam search | 🟢 Functional | 🟢 Functional | CI needed | | Guided Decoding | 🟢 Functional | 🟢 Functional | [vllm-ascend#177][guided_decoding] | @@ -27,11 +27,12 @@ You can check the [support status of vLLM V1 Engine][v1_user_guide]. Below is th | Data Parallel | 🔴 NO plan | 🟢 Functional | CI needed; No plan on V0 support | | Prefill Decode Disaggregation | 🟢 Functional | 🟢 Functional | 1P1D available, working on xPyD and V1 support. | | Quantization | 🟢 Functional | 🟢 Functional | W8A8 available, CI needed; working on more quantization method support | -| Graph Mode | 🔴 NO plan | 🟢 Functional | Functional, waiting for CANN 8.1 nnal package release | +| Graph Mode | 🔴 NO plan | 🔵 Experimental| Experimental, see detail note: [vllm-ascend#767][graph_mode] | | Sleep Mode | 🟢 Functional | 🟢 Functional | level=1 available, CI needed, working on V1 support | - 🟢 Functional: Fully operational, with ongoing optimizations. -- 🚧 WIP: Under active development +- 🔵 Experimental: Experimental support, interfaces and functions may change. +- 🚧 WIP: Under active development, will be supported soon. - 🟡 Planned: Scheduled for future implementation (some may have open PRs/RFCs). - 🔴 NO plan / Deprecated: No plan for V0 or deprecated by vLLM v1. @@ -42,3 +43,6 @@ You can check the [support status of vLLM V1 Engine][v1_user_guide]. Below is th [v1_scheduler]: https://github.com/vllm-project/vllm/blob/main/vllm/v1/core/sched/scheduler.py [v1_rfc]: https://github.com/vllm-project/vllm/issues/8779 [multilora]: https://github.com/vllm-project/vllm-ascend/issues/396 +[graph_mode]: https://github.com/vllm-project/vllm-ascend/issues/767 +[apc]: https://github.com/vllm-project/vllm-ascend/issues/732 +[cp]: https://docs.vllm.ai/en/stable/performance/optimization.html#chunked-prefill