[MISC] Add patch module (#526)
This PR added patch module for vllm 1. platform patch: the patch will be registered when load the platform 2. worker patch: the patch will be registered when worker is started. The detail is: 1. patch_common: patch for main and 0.8.4 version 4. patch_main: patch for main verison 5. patch_0_8_4: patch for 0.8.4 version
This commit is contained in:
@@ -72,7 +72,20 @@ Find more details [<u>here</u>](https://github.com/vllm-project/vllm-ascend/issu
|
|||||||
|
|
||||||
### 6. How to solve the problem of "Failed to infer device type" or "libatb.so: cannot open shared object file"?
|
### 6. How to solve the problem of "Failed to infer device type" or "libatb.so: cannot open shared object file"?
|
||||||
|
|
||||||
Basicly, the reason is that the NNAL environment is not sourced. Please try `source /usr/local/Ascend/nnal/atb/set_env.sh` to solve the problem.
|
Basically, the reason is that the NPU environment is not configured correctly. You can:
|
||||||
|
1. try `source /usr/local/Ascend/nnal/atb/set_env.sh` to enable NNAL package.
|
||||||
|
2. try `source /usr/local/Ascend/ascend-toolkit/set_env.sh` to enable CANN package.
|
||||||
|
3. try `npu-smi info` to check whether the NPU is working.
|
||||||
|
|
||||||
|
If all above steps are not working, you can try the following code with python to check whether there is any error:
|
||||||
|
|
||||||
|
```
|
||||||
|
import torch
|
||||||
|
import torch_npu
|
||||||
|
import vllm
|
||||||
|
```
|
||||||
|
|
||||||
|
If all above steps are not working, feel free to submit a GitHub issue.
|
||||||
|
|
||||||
### 7. Does vllm-ascend support Atlas 300I Duo?
|
### 7. Does vllm-ascend support Atlas 300I Duo?
|
||||||
|
|
||||||
@@ -80,10 +93,10 @@ No, vllm-ascend now only supports Atlas A2 series. We are working on it.
|
|||||||
|
|
||||||
### 8. How does vllm-ascend perform?
|
### 8. How does vllm-ascend perform?
|
||||||
|
|
||||||
Currently, only some models are imporved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well.
|
Currently, only some models are improved. Such as `Qwen2 VL`, `Deepseek V3`. Others are not good enough. In the future, we will support graph mode and custom ops to improve the performance of vllm-ascend. And when the official release of vllm-ascend is released, you can install `mindie-turbo` with `vllm-ascend` to speed up the inference as well.
|
||||||
|
|
||||||
### 9. How vllm-ascend work with vllm?
|
### 9. How vllm-ascend work with vllm?
|
||||||
vllm-ascend is a plugin for vllm. Basicly, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit.
|
vllm-ascend is a plugin for vllm. Basically, the version of vllm-ascend is the same as the version of vllm. For example, if you use vllm 0.7.3, you should use vllm-ascend 0.7.3 as well. For main branch, we will make sure `vllm-ascend` and `vllm` are compatible by each commit.
|
||||||
|
|
||||||
### 10. Does vllm-ascend support Prefill Disaggregation feature?
|
### 10. Does vllm-ascend support Prefill Disaggregation feature?
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ requires = [
|
|||||||
"cmake>=3.26",
|
"cmake>=3.26",
|
||||||
"decorator",
|
"decorator",
|
||||||
"numpy<2.0.0",
|
"numpy<2.0.0",
|
||||||
|
"packaging",
|
||||||
"pip",
|
"pip",
|
||||||
"pybind11",
|
"pybind11",
|
||||||
"pyyaml",
|
"pyyaml",
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
cmake>=3.26
|
cmake>=3.26
|
||||||
decorator
|
decorator
|
||||||
numpy<2.0.0
|
numpy<2.0.0
|
||||||
|
packaging
|
||||||
pybind11
|
pybind11
|
||||||
pyyaml
|
pyyaml
|
||||||
scipy
|
scipy
|
||||||
|
|||||||
@@ -18,6 +18,10 @@
|
|||||||
|
|
||||||
def register():
|
def register():
|
||||||
"""Register the NPU platform."""
|
"""Register the NPU platform."""
|
||||||
|
# Adapt the global patch here.
|
||||||
|
from vllm_ascend.utils import adapt_patch
|
||||||
|
adapt_patch(is_global_patch=True)
|
||||||
|
|
||||||
return "vllm_ascend.platform.NPUPlatform"
|
return "vllm_ascend.platform.NPUPlatform"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
0
vllm_ascend/patch/__init__.py
Normal file
0
vllm_ascend/patch/__init__.py
Normal file
27
vllm_ascend/patch/platform/__init__.py
Normal file
27
vllm_ascend/patch/platform/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import vllm
|
||||||
|
from packaging.version import Version
|
||||||
|
|
||||||
|
# Import common patches for all versions
|
||||||
|
from vllm_ascend.patch.platform import patch_common # noqa: F401
|
||||||
|
|
||||||
|
# Import specific patches for different versions
|
||||||
|
if Version(vllm.__version__) == Version("0.8.4"):
|
||||||
|
from vllm_ascend.patch.platform import patch_0_8_4 # noqa: F401
|
||||||
|
else:
|
||||||
|
from vllm_ascend.patch.platform import patch_main # noqa: F401
|
||||||
16
vllm_ascend/patch/platform/patch_0_8_4/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_0_8_4/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
16
vllm_ascend/patch/platform/patch_common/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_common/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
16
vllm_ascend/patch/platform/patch_main/__init__.py
Normal file
16
vllm_ascend/patch/platform/patch_main/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
27
vllm_ascend/patch/worker/__init__.py
Normal file
27
vllm_ascend/patch/worker/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
import vllm
|
||||||
|
from packaging.version import Version
|
||||||
|
|
||||||
|
# Import common patches for all versions
|
||||||
|
from vllm_ascend.patch.worker import patch_common # noqa: F401
|
||||||
|
|
||||||
|
# Import specific patches for different versions
|
||||||
|
if Version(vllm.__version__) == Version("0.8.4"):
|
||||||
|
from vllm_ascend.patch.worker import patch_0_8_4 # noqa: F401
|
||||||
|
else:
|
||||||
|
from vllm_ascend.patch.worker import patch_main # noqa: F401
|
||||||
16
vllm_ascend/patch/worker/patch_0_8_4/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_0_8_4/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
16
vllm_ascend/patch/worker/patch_common/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_common/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
16
vllm_ascend/patch/worker/patch_main/__init__.py
Normal file
16
vllm_ascend/patch/worker/patch_main/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
# This file is a part of the vllm-ascend project.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
@@ -51,3 +51,10 @@ def current_stream() -> torch.npu.Stream:
|
|||||||
# we return the default stream.
|
# we return the default stream.
|
||||||
_current_stream = torch.npu.current_stream()
|
_current_stream = torch.npu.current_stream()
|
||||||
return _current_stream
|
return _current_stream
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_patch(is_global_patch: bool = False):
|
||||||
|
if is_global_patch:
|
||||||
|
from vllm_ascend.patch import platform # noqa: F401
|
||||||
|
else:
|
||||||
|
from vllm_ascend.patch import worker # noqa: F401
|
||||||
|
|||||||
@@ -64,6 +64,9 @@ class NPUWorker(LocalOrDistributedWorkerBase):
|
|||||||
distributed_init_method: str,
|
distributed_init_method: str,
|
||||||
is_driver_worker: bool = False,
|
is_driver_worker: bool = False,
|
||||||
model_runner_cls: Optional[Type[ModelRunnerBase]] = None):
|
model_runner_cls: Optional[Type[ModelRunnerBase]] = None):
|
||||||
|
# register patch for vllm
|
||||||
|
from vllm_ascend.utils import adapt_patch
|
||||||
|
adapt_patch()
|
||||||
# Register ops when worker init.
|
# Register ops when worker init.
|
||||||
from vllm_ascend import ops # noqa: F401
|
from vllm_ascend import ops # noqa: F401
|
||||||
|
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ class NPUWorker(WorkerBase):
|
|||||||
# Additional parameters for compatibility with vllm
|
# Additional parameters for compatibility with vllm
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""Initialize the worker for Ascend."""
|
"""Initialize the worker for Ascend."""
|
||||||
|
# register patch for vllm
|
||||||
|
from vllm_ascend.utils import adapt_patch
|
||||||
|
adapt_patch()
|
||||||
# Register ops when worker init.
|
# Register ops when worker init.
|
||||||
from vllm_ascend import ops # noqa: F401
|
from vllm_ascend import ops # noqa: F401
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user