port deepseekv2 and mtp to main branch (#429)
### What this PR does / why we need it? This PR ports all the deepseek graph mode code and mtp code from v0.7.3 to the main branch --------- Signed-off-by: SidaoY <1024863041@qq.com> Signed-off-by: linfeng-yuan <1102311262@qq.com> Signed-off-by: Yizhou Liu <liuyizhou5@h-partners.com> Signed-off-by: mengwei805 <mengwei25@huawei.com> Signed-off-by: libaokui <libaokui@huawei.com> Signed-off-by: q00832892 <qiaoyang19@huawei.com> Signed-off-by: ganyi <pleaplusone.gy@gmail.com> Co-authored-by: SidaoY <1024863041@qq.com> Co-authored-by: linfeng-yuan <1102311262@qq.com> Co-authored-by: Yizhou Liu <liuyizhou5@h-partners.com> Co-authored-by: mengwei805 <mengwei25@huawei.com> Co-authored-by: libaokui <libaokui@huawei.com>
This commit is contained in:
@@ -160,6 +160,8 @@ class NPUPlatform(Platform):
|
||||
@classmethod
|
||||
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
|
||||
kv_cache_dtype, block_size, use_v1, use_mla):
|
||||
if use_v1 and use_mla:
|
||||
return "vllm_ascend.attention.mla_v1.AscendMLABackend"
|
||||
if use_v1:
|
||||
return "vllm_ascend.attention.attention_v1.AscendAttentionBackend"
|
||||
if use_mla:
|
||||
@@ -191,3 +193,30 @@ class NPUPlatform(Platform):
|
||||
model configuration.
|
||||
"""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def destroy_platform_model_parallel(cls) -> None:
|
||||
from vllm_ascend.distributed.parallel_state import \
|
||||
destory_ascend_model_parallel
|
||||
destory_ascend_model_parallel()
|
||||
|
||||
@classmethod
|
||||
def platform_has_backend_register(cls) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def platform_register_backend(cls, pg, prefix_store, group_rank,
|
||||
group_size, backend_options,
|
||||
timeout) -> None:
|
||||
from torch.distributed import ProcessGroup, is_hccl_available
|
||||
assert is_hccl_available()
|
||||
import torch_npu # noqa
|
||||
from torch_npu._C._distributed_c10d import ProcessGroupHCCL
|
||||
backend_options = ProcessGroupHCCL.Options()
|
||||
backend_options._timeout = timeout
|
||||
backend_class = ProcessGroupHCCL(prefix_store, group_rank, group_size,
|
||||
backend_options)
|
||||
device = torch.device("npu")
|
||||
backend_class._set_sequence_number_for_group()
|
||||
backend_type = ProcessGroup.BackendType.CUSTOM
|
||||
pg._register_backend(device, backend_type, backend_class)
|
||||
|
||||
Reference in New Issue
Block a user