### What this PR does / why we need it?
This PR restores #7029, which adds W8A8C8 support for dsv3.2/glm5 using
the `lightning_indexer_quant` ops in the pd-mix stage.
The original PR was reverted by #7288 because the patch did not work
with the recompute scheduler.
This PR also fixes the patching issue so that it works correctly with
the recompute scheduler.
### Does this PR introduce _any_ user-facing change?
Yes. To enable LI C8, users need to set the `enable_sparse_c8` option to
`"true"` in `additional_config`.
- vLLM version: v0.17.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: rjg-lyh <1318825571@qq.com>
42 lines
1.6 KiB
CMake
42 lines
1.6 KiB
CMake
# This program is free software, you can redistribute it and/or modify it.
|
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd.
|
|
# This file is a part of the CANN Open Software.
|
|
# Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
|
|
# Please refer to the License for details. You may not use this file except in compliance with the License.
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
|
# See LICENSE in the root of the software repository for the full text of the License.
|
|
# ======================================================================================================================
|
|
|
|
add_ops_compile_options(
|
|
OP_NAME LightningIndexerQuant
|
|
OPTIONS --cce-auto-sync=off
|
|
-Wno-deprecated-declarations
|
|
-Werror
|
|
-mllvm -cce-aicore-hoist-movemask=false
|
|
--op_relocatable_kernel_binary=true
|
|
)
|
|
|
|
set(lightning_indexer_quant_depends transformer/attention/lightning_indexer_quant PARENT_SCOPE)
|
|
|
|
target_sources(op_host_aclnn PRIVATE
|
|
lightning_indexer_quant_def.cpp
|
|
)
|
|
|
|
target_sources(optiling PRIVATE
|
|
lightning_indexer_quant_tiling.cpp
|
|
)
|
|
|
|
if (NOT BUILD_OPEN_PROJECT)
|
|
target_sources(opmaster_ct PRIVATE
|
|
lightning_indexer_quant_tiling.cpp
|
|
)
|
|
endif ()
|
|
|
|
target_include_directories(optiling PRIVATE
|
|
${CMAKE_CURRENT_SOURCE_DIR}/op_host
|
|
)
|
|
|
|
target_sources(opsproto PRIVATE
|
|
lightning_indexer_quant_proto.cpp
|
|
)
|