[dev] support compressed-tensors w8a8 quantization (#75)
* [dev] support compressed-tensors w8a8 quantization Co-authored-by: Li Wei <liwei.109@outlook.com> * [refact]update KunlunScaleMMKernel impl * [rebase]resolve conflicts and remove redundant code --------- Co-authored-by: tangshiwen <tangshiwen@baidu.com>
This commit is contained in:
@@ -21,7 +21,8 @@ import vllm_kunlun.ops.quantization.awq
|
||||
import vllm_kunlun.ops.quantization.gptq
|
||||
import vllm_kunlun.ops.vocab_parallel_embedding
|
||||
import vllm_kunlun.ops.linear
|
||||
import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass
|
||||
import vllm_kunlun.ops.vocab_parallel_embedding
|
||||
import vllm_kunlun.ops.quantization.compressed_tensors_moe
|
||||
import vllm_kunlun.ops.fused_moe.layer
|
||||
# import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass
|
||||
import vllm_kunlun.ops.fused_moe.layer
|
||||
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors
|
||||
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe
|
||||
import vllm_kunlun.ops.quantization.kernels.scaled_mm.kunlun
|
||||
Reference in New Issue
Block a user