diff --git a/docs/source/faqs.md b/docs/source/faqs.md index 1b72ad2..b44651e 100644 --- a/docs/source/faqs.md +++ b/docs/source/faqs.md @@ -72,7 +72,20 @@ Find more details [here](https://github.com/vllm-project/vllm-ascend/issu ### 6. How to solve the problem of "Failed to infer device type" or "libatb.so: cannot open shared object file"? -Basicly, the reason is that the NNAL environment is not sourced. Please try `source /usr/local/Ascend/nnal/atb/set_env.sh` to solve the problem. +Basically, the reason is that the NPU environment is not configured correctly. You can: +1. try `source /usr/local/Ascend/nnal/atb/set_env.sh` to enable NNAL package. +2. try `source /usr/local/Ascend/ascend-toolkit/set_env.sh` to enable CANN package. +3. try `npu-smi info` to check whether the NPU is working. + +If all above steps are not working, you can try the following code with python to check whether there is any error: + +``` +import torch +import torch_npu +import vllm +``` + +If all above steps are not working, feel free to submit a GitHub issue. ### 7. Does vllm-ascend support Atlas 300I Duo? @@ -80,10 +93,10 @@ No, vllm-ascend now only supports Atlas A2 series. We are working on it. ### 8. How does vllm-ascend perform? -Currently, only some models are imporved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well. +Currently, only some models are improved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well. ### 9. How vllm-ascend work with vllm? -vllm-ascend is a plugin for vllm. Basicly, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit. +vllm-ascend is a plugin for vllm. Basically, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit. ### 10. Does vllm-ascend support Prefill Disaggregation feature? diff --git a/pyproject.toml b/pyproject.toml index f8855c4..ac81c21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ requires = [ "cmake>=3.26", "decorator", "numpy<2.0.0", + "packaging", "pip", "pybind11", "pyyaml", diff --git a/requirements.txt b/requirements.txt index e20b03e..fec71cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ cmake>=3.26 decorator numpy<2.0.0 +packaging pybind11 pyyaml scipy diff --git a/vllm_ascend/__init__.py b/vllm_ascend/__init__.py index c3b7661..31657de 100644 --- a/vllm_ascend/__init__.py +++ b/vllm_ascend/__init__.py @@ -18,6 +18,10 @@ def register(): """Register the NPU platform.""" + # Adapt the global patch here. + from vllm_ascend.utils import adapt_patch + adapt_patch(is_global_patch=True) + return "vllm_ascend.platform.NPUPlatform" diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py new file mode 100644 index 0000000..0fb24d2 --- /dev/null +++ b/vllm_ascend/patch/platform/__init__.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import vllm +from packaging.version import Version + +# Import common patches for all versions +from vllm_ascend.patch.platform import patch_common # noqa: F401 + +# Import specific patches for different versions +if Version(vllm.__version__) == Version("0.8.4"): + from vllm_ascend.patch.platform import patch_0_8_4 # noqa: F401 +else: + from vllm_ascend.patch.platform import patch_main # noqa: F401 diff --git a/vllm_ascend/patch/platform/patch_0_8_4/__init__.py b/vllm_ascend/patch/platform/patch_0_8_4/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/platform/patch_0_8_4/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/patch/platform/patch_common/__init__.py b/vllm_ascend/patch/platform/patch_common/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/platform/patch_common/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/patch/platform/patch_main/__init__.py b/vllm_ascend/patch/platform/patch_main/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/platform/patch_main/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py new file mode 100644 index 0000000..c2f54a7 --- /dev/null +++ b/vllm_ascend/patch/worker/__init__.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import vllm +from packaging.version import Version + +# Import common patches for all versions +from vllm_ascend.patch.worker import patch_common # noqa: F401 + +# Import specific patches for different versions +if Version(vllm.__version__) == Version("0.8.4"): + from vllm_ascend.patch.worker import patch_0_8_4 # noqa: F401 +else: + from vllm_ascend.patch.worker import patch_main # noqa: F401 diff --git a/vllm_ascend/patch/worker/patch_0_8_4/__init__.py b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/worker/patch_0_8_4/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/worker/patch_common/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/patch/worker/patch_main/__init__.py b/vllm_ascend/patch/worker/patch_main/__init__.py new file mode 100644 index 0000000..2ed088b --- /dev/null +++ b/vllm_ascend/patch/worker/patch_main/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the vllm-ascend project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index 0b6c69e..dd83d3d 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -51,3 +51,10 @@ def current_stream() -> torch.npu.Stream: # we return the default stream. _current_stream = torch.npu.current_stream() return _current_stream + + +def adapt_patch(is_global_patch: bool = False): + if is_global_patch: + from vllm_ascend.patch import platform # noqa: F401 + else: + from vllm_ascend.patch import worker # noqa: F401 diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py index 306907e..adeae07 100644 --- a/vllm_ascend/worker/worker.py +++ b/vllm_ascend/worker/worker.py @@ -64,6 +64,9 @@ class NPUWorker(LocalOrDistributedWorkerBase): distributed_init_method: str, is_driver_worker: bool = False, model_runner_cls: Optional[Type[ModelRunnerBase]] = None): + # register patch for vllm + from vllm_ascend.utils import adapt_patch + adapt_patch() # Register ops when worker init. from vllm_ascend import ops # noqa: F401 diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index ae50311..056e84e 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -55,6 +55,9 @@ class NPUWorker(WorkerBase): # Additional parameters for compatibility with vllm **kwargs): """Initialize the worker for Ascend.""" + # register patch for vllm + from vllm_ascend.utils import adapt_patch + adapt_patch() # Register ops when worker init. from vllm_ascend import ops # noqa: F401