[Feature] Support Mixed-Precision Quantization for MoE (#112)

This commit is contained in:
Shiwen Tang
2026-01-14 18:42:18 +08:00
committed by GitHub
parent 6706651646
commit 8988ad08b2
2 changed files with 24 additions and 3 deletions

View File

@@ -21,7 +21,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather,
)
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm_kunlun.ops.fused_moe.layer import FusedMoE
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.linear import (
MergedColumnParallelLinear,