[dev] support compressed-tensors w8a8 quantization (#75)

* [dev] support compressed-tensors w8a8 quantization

Co-authored-by: Li Wei <liwei.109@outlook.com>

* [refact]update KunlunScaleMMKernel impl

* [rebase]resolve conflicts and remove redundant code

---------

Co-authored-by: tangshiwen <tangshiwen@baidu.com>
This commit is contained in:
Li Wei
2026-01-06 13:51:53 +08:00
committed by GitHub
parent ee0f50e68f
commit 515a4eeda9
8 changed files with 952 additions and 523 deletions

View File

@@ -21,7 +21,8 @@ import vllm_kunlun.ops.quantization.awq
import vllm_kunlun.ops.quantization.gptq
import vllm_kunlun.ops.vocab_parallel_embedding
import vllm_kunlun.ops.linear
import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass
import vllm_kunlun.ops.vocab_parallel_embedding
import vllm_kunlun.ops.quantization.compressed_tensors_moe
import vllm_kunlun.ops.fused_moe.layer
# import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass
import vllm_kunlun.ops.fused_moe.layer
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe
import vllm_kunlun.ops.quantization.kernels.scaled_mm.kunlun