[MISC] Add patch module (#526)
This PR added patch module for vllm 1. platform patch: the patch will be registered when load the platform 2. worker patch: the patch will be registered when worker is started. The detail is: 1. patch_common: patch for main and 0.8.4 version 4. patch_main: patch for main verison 5. patch_0_8_4: patch for 0.8.4 version
This commit is contained in:
@@ -72,7 +72,20 @@ Find more details [<u>here</u>](https://github.com/vllm-project/vllm-ascend/issu
|
||||
|
||||
### 6. How to solve the problem of "Failed to infer device type" or "libatb.so: cannot open shared object file"?
|
||||
|
||||
Basicly, the reason is that the NNAL environment is not sourced. Please try `source /usr/local/Ascend/nnal/atb/set_env.sh` to solve the problem.
|
||||
Basically, the reason is that the NPU environment is not configured correctly. You can:
|
||||
1. try `source /usr/local/Ascend/nnal/atb/set_env.sh` to enable NNAL package.
|
||||
2. try `source /usr/local/Ascend/ascend-toolkit/set_env.sh` to enable CANN package.
|
||||
3. try `npu-smi info` to check whether the NPU is working.
|
||||
|
||||
If all above steps are not working, you can try the following code with python to check whether there is any error:
|
||||
|
||||
```
|
||||
import torch
|
||||
import torch_npu
|
||||
import vllm
|
||||
```
|
||||
|
||||
If all above steps are not working, feel free to submit a GitHub issue.
|
||||
|
||||
### 7. Does vllm-ascend support Atlas 300I Duo?
|
||||
|
||||
@@ -80,10 +93,10 @@ No, vllm-ascend now only supports Atlas A2 series. We are working on it.
|
||||
|
||||
### 8. How does vllm-ascend perform?
|
||||
|
||||
Currently, only some models are imporved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well.
|
||||
Currently, only some models are improved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well.
|
||||
|
||||
### 9. How vllm-ascend work with vllm?
|
||||
vllm-ascend is a plugin for vllm. Basicly, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit.
|
||||
vllm-ascend is a plugin for vllm. Basically, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit.
|
||||
|
||||
### 10. Does vllm-ascend support Prefill Disaggregation feature?
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ requires = [
|
||||
"cmake>=3.26",
|
||||
"decorator",
|
||||
"numpy<2.0.0",
|
||||
"packaging",
|
||||
"pip",
|
||||
"pybind11",
|
||||
"pyyaml",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
cmake>=3.26
|
||||
decorator
|
||||
numpy<2.0.0
|
||||
packaging
|
||||
pybind11
|
||||
pyyaml
|
||||
scipy
|
||||
|
||||
@@ -18,6 +18,10 @@
|
||||
|
||||
def register():
|
||||
"""Register the NPU platform."""
|
||||
# Adapt the global patch here.
|
||||
from vllm_ascend.utils import adapt_patch
|
||||
adapt_patch(is_global_patch=True)
|
||||
|
||||
return "vllm_ascend.platform.NPUPlatform"
|
||||
|
||||
|
||||
|
||||
0
vllm_ascend/patch/__init__.py
Normal file
0
vllm_ascend/patch/__init__.py
Normal file
27
vllm_ascend/patch/platform/__init__.py
Normal file
27
vllm_ascend/patch/platform/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import vllm
|
||||
from packaging.version import Version
|
||||
|
||||
# Import common patches for all versions
|
||||
from vllm_ascend.patch.platform import patch_common # noqa: F401
|
||||
|
||||
# Import specific patches for different versions
|
||||
if Version(vllm.__version__) == Version("0.8.4"):
|
||||
from vllm_ascend.patch.platform import patch_0_8_4 # noqa: F401
|
||||
else:
|
||||
from vllm_ascend.patch.platform import patch_main # noqa: F401
|
||||
16
vllm_ascend/patch/platform/patch_0_8_4/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_0_8_4/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
16
vllm_ascend/patch/platform/patch_common/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_common/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
16
vllm_ascend/patch/platform/patch_main/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_main/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
27
vllm_ascend/patch/worker/__init__.py
Normal file
27
vllm_ascend/patch/worker/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import vllm
|
||||
from packaging.version import Version
|
||||
|
||||
# Import common patches for all versions
|
||||
from vllm_ascend.patch.worker import patch_common # noqa: F401
|
||||
|
||||
# Import specific patches for different versions
|
||||
if Version(vllm.__version__) == Version("0.8.4"):
|
||||
from vllm_ascend.patch.worker import patch_0_8_4 # noqa: F401
|
||||
else:
|
||||
from vllm_ascend.patch.worker import patch_main # noqa: F401
|
||||
16
vllm_ascend/patch/worker/patch_0_8_4/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_0_8_4/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
16
vllm_ascend/patch/worker/patch_common/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_common/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
16
vllm_ascend/patch/worker/patch_main/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_main/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
@@ -51,3 +51,10 @@ def current_stream() -> torch.npu.Stream:
|
||||
# we return the default stream.
|
||||
_current_stream = torch.npu.current_stream()
|
||||
return _current_stream
|
||||
|
||||
|
||||
def adapt_patch(is_global_patch: bool = False):
|
||||
if is_global_patch:
|
||||
from vllm_ascend.patch import platform # noqa: F401
|
||||
else:
|
||||
from vllm_ascend.patch import worker # noqa: F401
|
||||
|
||||
@@ -64,6 +64,9 @@ class NPUWorker(LocalOrDistributedWorkerBase):
|
||||
distributed_init_method: str,
|
||||
is_driver_worker: bool = False,
|
||||
model_runner_cls: Optional[Type[ModelRunnerBase]] = None):
|
||||
# register patch for vllm
|
||||
from vllm_ascend.utils import adapt_patch
|
||||
adapt_patch()
|
||||
# Register ops when worker init.
|
||||
from vllm_ascend import ops # noqa: F401
|
||||
|
||||
|
||||
@@ -55,6 +55,9 @@ class NPUWorker(WorkerBase):
|
||||
# Additional parameters for compatibility with vllm
|
||||
**kwargs):
|
||||
"""Initialize the worker for Ascend."""
|
||||
# register patch for vllm
|
||||
from vllm_ascend.utils import adapt_patch
|
||||
adapt_patch()
|
||||
# Register ops when worker init.
|
||||
from vllm_ascend import ops # noqa: F401
|
||||
|
||||
|
||||
Reference in New Issue
Block a user