[Kernel] add custom op DispatchGmmCombineDecode (#4139)
#### What this PR does / why we need it? add custom opapi DispatchGmmCombineDecode for A3, include kernel inpl, python Api, pytest. vLLM version: v0.11.0 vLLM main:24d6314718- vLLM version: v0.12.0 - vLLM main:ad32e3e19cSigned-off-by: wangqiankun <wangqiankun13@huawei.com> Co-authored-by: wangqiankun <wangqiankun13@huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -138,3 +138,21 @@ jobs:
|
||||
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
name: ${{ matrix.test_config.name }}
|
||||
custom-ops-tests:
|
||||
name: test ops
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
needs: multi-node-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test_config:
|
||||
- name: custom-op-dispatch_gmm_combine_decode
|
||||
os: linux-aarch64-a3-16
|
||||
tests: tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py
|
||||
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
|
||||
with:
|
||||
runner: ${{ matrix.test_config.os }}
|
||||
vllm: v0.12.0
|
||||
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
name: ${{ matrix.test_config.name }}
|
||||
|
||||
Reference in New Issue
Block a user