clean pr for ds.2 mtp support (#164)

* Add MTP support in eagle.py

Signed-off-by: wanghao129 <wanghao129@baidu.com>

* new pr for mtp

Signed-off-by: wanghao129 <wanghao129@baidu.com>

* Revert formatting changes in deepseek_v2.py

Signed-off-by: wanghao129 <wanghao129@baidu.com>

---------

Signed-off-by: wanghao129 <wanghao129@baidu.com>
Co-authored-by: wanghao129 <wanghao129@baidu.com>
This commit is contained in:
WANG HAO
2026-02-02 15:23:33 +08:00
committed by GitHub
parent 42a2d38f47
commit 6f30bc439d
3 changed files with 295 additions and 229 deletions

View File

@@ -15,20 +15,22 @@
# This file is a part of the vllm-ascend project.
#
# embedding
import vllm_kunlun.ops.rotary_embedding
import vllm_kunlun.ops.vocab_parallel_embedding
# quantization
import vllm_kunlun.ops.quantization.awq
import vllm_kunlun.ops.quantization.gptq
import vllm_kunlun.ops.quantization.moe_wna16
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe
import vllm_kunlun.ops.quantization.kernels.kunlun_scale_mm
import vllm_kunlun.ops.quantization.kernels.kunlun_exllama_linear
import vllm_kunlun.ops.fused_moe.layer
# base layers
import vllm_kunlun.ops.layernorm
import vllm_kunlun.ops.linear
import vllm_kunlun.ops.fused_moe.layer
# quantization
import vllm_kunlun.ops.quantization.awq
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe
import vllm_kunlun.ops.quantization.gptq
import vllm_kunlun.ops.quantization.kernels.kunlun_exllama_linear
import vllm_kunlun.ops.quantization.kernels.kunlun_scale_mm
import vllm_kunlun.ops.quantization.moe_wna16
# embedding
import vllm_kunlun.ops.rotary_embedding
import vllm_kunlun.ops.vocab_parallel_embedding
import vllm_kunlun.v1.sample.spec_decode.eagle