From e66ded56796212209189e75edb84aab367f959f4 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Fri, 18 Apr 2025 13:24:36 +0800 Subject: [PATCH] [Doc] Add release note for 0.8.4rc1 (#557) Add release note for 0.8.4rc1, we'll release 0.8.4rc1 now. Signed-off-by: wangxiyuan --- README.md | 2 +- docs/source/conf.py | 8 +++---- .../developer_guide/versioning_policy.md | 7 +++--- docs/source/faqs.md | 23 ++++++++----------- docs/source/user_guide/release_notes.md | 22 ++++++++++++++++++ 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a02fa44..9bedb0c 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Below is maintained branches: | Branch | Status | Note | |------------|--------------|--------------------------------------| -| main | Maintained | CI commitment for vLLM main branch | +| main | Maintained | CI commitment for vLLM main branch and vLLM 0.8.x branch | | v0.7.1-dev | Unmaintained | Only doc fixed is allowed | | v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version | diff --git a/docs/source/conf.py b/docs/source/conf.py index 411efc6..be147ca 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -63,15 +63,15 @@ myst_substitutions = { # the branch of vllm, used in vllm clone # - main branch: 'main' # - vX.Y.Z branch: 'vX.Y.Z' - 'vllm_version': 'main', + 'vllm_version': 'v0.8.4', # the branch of vllm-ascend, used in vllm-ascend clone and image tag # - main branch: 'main' # - vX.Y.Z branch: latest vllm-ascend release tag - 'vllm_ascend_version': 'main', + 'vllm_ascend_version': 'v0.8.4rc1', # the newest release version of vllm-ascend and matched vLLM, used in pip install. # This value should be updated when cut down release. - 'pip_vllm_ascend_version': "0.7.3rc1", - 'pip_vllm_version': "0.7.3", + 'pip_vllm_ascend_version': "0.8.4rc1", + 'pip_vllm_version': "0.8.4", # CANN image tag 'cann_image_tag': "8.0.0-910b-ubuntu22.04-py3.10", } diff --git a/docs/source/developer_guide/versioning_policy.md b/docs/source/developer_guide/versioning_policy.md index 72dbefe..686c376 100644 --- a/docs/source/developer_guide/versioning_policy.md +++ b/docs/source/developer_guide/versioning_policy.md @@ -42,7 +42,7 @@ Usually, each minor version of vLLM (such as 0.7) will correspond to a vllm-asce | Branch | Status | Note | |------------|--------------|--------------------------------------| -| main | Maintained | CI commitment for vLLM main branch | +| main | Maintained | CI commitment for vLLM main branch and vLLM 0.8.x branch | | v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version | | v0.7.1-dev | Unmaintained | Replaced by v0.7.3-dev | @@ -67,6 +67,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | vllm-ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu | |--------------|--------------| --- | --- | --- | +| v0.8.4rc1 | v0.8.4 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | | v0.7.3rc2 | v0.7.3 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250320 | | v0.7.3rc1 | v0.7.3 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250308 | | v0.7.1rc1 | v0.7.1 | 3.9 - 3.12 | 8.0.0 | 2.5.1 / 2.5.1.dev20250218 | @@ -77,8 +78,8 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin: | Date | Event | |------------|-------------------------------------------| -| 2025.04.15 | Release candidates, v0.8.Xrc1 | -| 2025.04.15 | Final release, v0.7.3 | +| End of 2025.04 | v0.7.x Final release, v0.7.3 | +| 2025.04.18 | Release candidates, v0.8.4rc1 | | 2025.03.28 | Release candidates, v0.7.3rc2 | | 2025.03.14 | Release candidates, v0.7.3rc1 | | 2025.02.19 | Release candidates, v0.7.1rc1 | diff --git a/docs/source/faqs.md b/docs/source/faqs.md index 9af8ce8..4972dbf 100644 --- a/docs/source/faqs.md +++ b/docs/source/faqs.md @@ -5,6 +5,7 @@ - [[v0.7.1rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/19) - [[v0.7.3rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/267) - [[v0.7.3rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/418) +- [[v0.8.4rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/546) ## General FAQs @@ -66,26 +67,22 @@ import vllm If all above steps are not working, feel free to submit a GitHub issue. -### 7. Does vllm-ascend support Atlas 300I Duo? - -No, vllm-ascend now only supports Atlas A2 series. We are working on it. - -### 8. How does vllm-ascend perform? +### 7. How does vllm-ascend perform? Currently, only some models are improved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well. -### 9. How vllm-ascend work with vllm? +### 8. How vllm-ascend work with vllm? vllm-ascend is a plugin for vllm. Basically, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit. -### 10. Does vllm-ascend support Prefill Disaggregation feature? +### 9. Does vllm-ascend support Prefill Disaggregation feature? Currently, only 1P1D is supported by vllm. For vllm-ascend, it'll be done by [this PR](https://github.com/vllm-project/vllm-ascend/pull/432). For NPND, vllm is not stable and fully supported yet. We will make it stable and supported by vllm-ascend in the future. -### 11. Does vllm-ascend support quantization method? +### 10. Does vllm-ascend support quantization method? Currently, there is no quantization method supported in vllm-ascend originally. And the quantization supported is working in progress, w8a8 will firstly be supported. -### 12. How to run w8a8 DeepSeek model? +### 11. How to run w8a8 DeepSeek model? Currently, running on v0.7.3, we should run w8a8 with vllm + vllm-ascend + mindie-turbo. And we only need vllm + vllm-ascend when v0.8.X is released. After installing the above packages, you can follow the steps below to run w8a8 DeepSeek: @@ -93,15 +90,15 @@ Currently, running on v0.7.3, we should run w8a8 with vllm + vllm-ascend + mindi 2. Copy the content of `quant_model_description_w8a8_dynamic.json` into the `quantization_config` of `config.json` of the quantized model files. 3. Reference with the quantized DeepSeek model. -### 13. There is not output in log when loading models using vllm-ascend, How to solve it? +### 12. There is not output in log when loading models using vllm-ascend, How to solve it? If you're using vllm 0.7.3 version, this is a known progress bar display issue in VLLM, which has been resolved in [this PR](https://github.com/vllm-project/vllm/pull/12428), please cherry-pick it locally by yourself. Otherwise, please fill up an issue. -### 14. How vllm-ascend is tested +### 13. How vllm-ascend is tested -vllm-ascend is tested by functionnal test, performance test and accuracy test. +vllm-ascend is tested by functional test, performance test and accuracy test. -- **Functionnal test**: we added CI, includes portion of vllm's native unit tests and vllm-ascend's own unit tests,on vllm-ascend's test, we test basic functional usability for popular models, include `Qwen2.5-7B-Instruct`、 `Qwen2.5-VL-7B-Instruct`、`Qwen2.5-VL-32B-Instruct`、`QwQ-32B`. +- **Functional test**: we added CI, includes portion of vllm's native unit tests and vllm-ascend's own unit tests,on vllm-ascend's test, we test basic functional usability for popular models, include `Qwen2.5-7B-Instruct`、 `Qwen2.5-VL-7B-Instruct`、`Qwen2.5-VL-32B-Instruct`、`QwQ-32B`. - **Performance test**: we provide [benchmark](https://github.com/vllm-project/vllm-ascend/tree/main/benchmarks) tools for end-to-end performance benchmark which can easily to re-route locally, we'll publish a perf website like [vllm](https://simon-mo-workspace.observablehq.cloud/vllm-dashboard-v0/perf) does to show the performance test results for each pull request diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md index 5504820..d71ba2f 100644 --- a/docs/source/user_guide/release_notes.md +++ b/docs/source/user_guide/release_notes.md @@ -1,5 +1,27 @@ # Release note +## v0.8.4rc1 + +This is the first release candidate of v0.8.4 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the journey. From this version, vllm-ascend will follow the newest version of vllm and release every two weeks. For example, if vllm releases v0.8.5 in the next two weeks, vllm-ascend will release v0.8.5rc1 instead of v0.8.4rc2. Please find the detail from the [official documentation](https://vllm-ascend.readthedocs.io/en/latest/developer_guide/versioning_policy.html#release-window). + +### Highlights + +- vLLM V1 engine experimental support is included in this version. You can visit [official guide](https://docs.vllm.ai/en/latest/getting_started/v1_user_guide.html) to get more detail. By default, vLLM will fallback to V0 if V1 doesn't work, please set `VLLM_USE_V1=1` environment if you want to use V1 forcely. +- LoRA、Multi-LoRA And Dynamic Serving is supported now. The performance will be improved in the next release. Please follow the [official doc](https://docs.vllm.ai/en/latest/features/lora.html) for more usage information. Thanks for the contribution from China Merchants Bank. [#521](https://github.com/vllm-project/vllm-ascend/pull/521). +- Sleep Mode feature is supported. Currently it's only work on V0 engine. V1 engine support will come soon. [#513](https://github.com/vllm-project/vllm-ascend/pull/513) + +### Core + +- The Ascend scheduler is added for V1 engine. This scheduler is more affinity with Ascend hardware. More scheduler policy will be added in the future. [#543](https://github.com/vllm-project/vllm-ascend/pull/543) +- Disaggregated Prefill feature is supported. Currently only 1P1D works. NPND is under design by vllm team. vllm-ascend will support it once it's ready from vLLM. Follow the [official guide](https://docs.vllm.ai/en/latest/features/disagg_prefill.html) to use. [#432](https://github.com/vllm-project/vllm-ascend/pull/432) +- Spec decode feature works now. Currently it's only work on V0 engine. V1 engine support will come soon. [#500]((https://github.com/vllm-project/vllm-ascend/pull/500) + +### Other + +- A new communicator `pyhccl` is added. It's used for call CANN HCCL library directly instead of using `torch.distribute`. More usage of it will be added in the next release [#503](https://github.com/vllm-project/vllm-ascend/pull/503) +- The custom ops build is enabled by default. You should install the packages like `gcc`, `cmake` first to build `vllm-ascend` from source. Set `COMPILE_CUSTOM_KERNELS=0` environment to disable the compilation if you don't need it. [#466](https://github.com/vllm-project/vllm-ascend/pull/466) +- The custom op `rotay embedding` is enabled by default now to improve the performance. [#555](https://github.com/vllm-project/vllm-ascend/pull/555) + ## v0.7.3rc2 This is 2nd release candidate of v0.7.3 for vllm-ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev) to start the journey.