[bugfix] fix test_camem failed with triton-ascend (#5492)
### What this PR does / why we need it?
This fixes a bug that occurred when running `test_camem.py` in the
triton-ascend environment `NPU function error:
aclrtGetMemInfo(ACL_HBM_MEM, &device_free, &device_total)`
- vLLM version: v0.13.0
- vLLM main:
5326c89803
---------
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
11
.github/workflows/_e2e_test.yaml
vendored
11
.github/workflows/_e2e_test.yaml
vendored
@@ -68,15 +68,6 @@ jobs:
|
|||||||
pip install -r requirements-dev.txt
|
pip install -r requirements-dev.txt
|
||||||
pip install -v -e .
|
pip install -v -e .
|
||||||
|
|
||||||
- name: Run vllm-project/vllm-ascend test (non triton)
|
|
||||||
env:
|
|
||||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
||||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
||||||
if: ${{ inputs.type == 'full' }}
|
|
||||||
run: |
|
|
||||||
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
|
|
||||||
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
|
|
||||||
|
|
||||||
- name: Install Ascend toolkit & triton_ascend
|
- name: Install Ascend toolkit & triton_ascend
|
||||||
shell: bash -l {0}
|
shell: bash -l {0}
|
||||||
run: |
|
run: |
|
||||||
@@ -94,6 +85,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
# pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
|
# pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
|
||||||
# pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
|
# pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
|
||||||
|
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
|
||||||
|
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
|
||||||
pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
||||||
pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_qwen_pooling_classify_correctness
|
pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_qwen_pooling_classify_correctness
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
from vllm.triton_utils import HAS_TRITON, tl, triton
|
from vllm.triton_utils import tl, triton
|
||||||
|
|
||||||
if HAS_TRITON:
|
|
||||||
import torch_npu._inductor # noqa: F401
|
|
||||||
|
|
||||||
from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
|
from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
|
||||||
|
|
||||||
|
|||||||
@@ -10,10 +10,7 @@
|
|||||||
# ruff: noqa: E501
|
# ruff: noqa: E501
|
||||||
# mypy: ignore-errors
|
# mypy: ignore-errors
|
||||||
import torch
|
import torch
|
||||||
from vllm.triton_utils import HAS_TRITON, tl, triton
|
from vllm.triton_utils import tl, triton
|
||||||
|
|
||||||
if HAS_TRITON:
|
|
||||||
import torch_npu._inductor # noqa: F401
|
|
||||||
|
|
||||||
|
|
||||||
@triton.jit
|
@triton.jit
|
||||||
|
|||||||
@@ -14,10 +14,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# This file is a part of the vllm-ascend project.
|
# This file is a part of the vllm-ascend project.
|
||||||
#
|
#
|
||||||
from vllm.triton_utils import HAS_TRITON, tl, triton
|
from vllm.triton_utils import tl, triton
|
||||||
|
|
||||||
if HAS_TRITON:
|
|
||||||
import torch_npu._inductor # noqa: F401
|
|
||||||
|
|
||||||
from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
|
from vllm_ascend.ops.triton.triton_utils import get_vectorcore_num
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,11 @@ class NPUWorker(WorkerBase):
|
|||||||
# register patch for vllm
|
# register patch for vllm
|
||||||
from vllm_ascend.utils import adapt_patch
|
from vllm_ascend.utils import adapt_patch
|
||||||
adapt_patch()
|
adapt_patch()
|
||||||
|
# Import _inductor for graph mode execution with triton
|
||||||
|
# This lazy import avoids torch_npu re-initialization in patch
|
||||||
|
from vllm.triton_utils import HAS_TRITON
|
||||||
|
if HAS_TRITON:
|
||||||
|
import torch_npu._inductor # noqa: F401
|
||||||
# Register ops when worker init.
|
# Register ops when worker init.
|
||||||
from vllm_ascend import ops
|
from vllm_ascend import ops
|
||||||
ops.register_dummy_fusion_op()
|
ops.register_dummy_fusion_op()
|
||||||
|
|||||||
Reference in New Issue
Block a user