port deepseekv2 and mtp to main branch (#429)
### What this PR does / why we need it? This PR ports all the deepseek graph mode code and mtp code from v0.7.3 to the main branch --------- Signed-off-by: SidaoY <1024863041@qq.com> Signed-off-by: linfeng-yuan <1102311262@qq.com> Signed-off-by: Yizhou Liu <liuyizhou5@h-partners.com> Signed-off-by: mengwei805 <mengwei25@huawei.com> Signed-off-by: libaokui <libaokui@huawei.com> Signed-off-by: q00832892 <qiaoyang19@huawei.com> Signed-off-by: ganyi <pleaplusone.gy@gmail.com> Co-authored-by: SidaoY <1024863041@qq.com> Co-authored-by: linfeng-yuan <1102311262@qq.com> Co-authored-by: Yizhou Liu <liuyizhou5@h-partners.com> Co-authored-by: mengwei805 <mengwei25@huawei.com> Co-authored-by: libaokui <libaokui@huawei.com>
This commit is contained in:
@@ -462,4 +462,4 @@ class LLMDataDistConnector(KVConnectorBase):
|
||||
|
||||
def close(self, ):
|
||||
self.llm_datadist_engine.data_dist.unlink_clusters([self.cluster],
|
||||
5000)
|
||||
5000)
|
||||
|
||||
75
vllm_ascend/distributed/parallel_state.py
Normal file
75
vllm_ascend/distributed/parallel_state.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from vllm.distributed.parallel_state import (GroupCoordinator, get_world_group,
|
||||
init_model_parallel_group)
|
||||
|
||||
# vllm-ascend will maintain its own EP GroupCoordinator and ETP GroupCoordinator for
|
||||
# customize parallel solution
|
||||
_EP: Optional[GroupCoordinator] = None
|
||||
_ETP: Optional[list[GroupCoordinator]] = None
|
||||
|
||||
|
||||
def get_ep_group() -> GroupCoordinator:
|
||||
assert _EP is not None, ("expert model parallel group is not initialized")
|
||||
return _EP
|
||||
|
||||
|
||||
def get_etp_group() -> GroupCoordinator:
|
||||
assert _ETP is not None, (
|
||||
"expert tensor parallel group is not initialized")
|
||||
return _ETP
|
||||
|
||||
|
||||
def init_ascend_model_parallel(
|
||||
tensor_model_parallel_size: int = 1,
|
||||
pipeline_model_parallel_size: int = 1,
|
||||
expert_tensor_parallel_size: int = 1,
|
||||
backend: Optional[str] = None,
|
||||
):
|
||||
assert torch.distributed.is_initialized()
|
||||
world_size: int = torch.distributed.get_world_size()
|
||||
backend = backend or torch.distributed.get_backend(
|
||||
get_world_group().device_group)
|
||||
num_expert_parallel_groups: int = expert_tensor_parallel_size
|
||||
num_expert_tensor_parallel_groups: int = (world_size //
|
||||
expert_tensor_parallel_size)
|
||||
|
||||
global _EP
|
||||
assert _EP is None, ("expert parallel group is already initialized")
|
||||
group_ranks = []
|
||||
for i in range(num_expert_parallel_groups):
|
||||
ranks = list(range(i, world_size, num_expert_parallel_groups))
|
||||
group_ranks.append(ranks)
|
||||
|
||||
_EP = init_model_parallel_group(group_ranks,
|
||||
get_world_group().local_rank,
|
||||
backend,
|
||||
group_name="ep")
|
||||
|
||||
group_ranks = []
|
||||
global _ETP
|
||||
assert _ETP is None, (
|
||||
"expert tensor parallel group is already initialized")
|
||||
for i in range(num_expert_tensor_parallel_groups):
|
||||
ranks = list(
|
||||
range(i * expert_tensor_parallel_size,
|
||||
(i + 1) * expert_tensor_parallel_size))
|
||||
group_ranks.append(ranks)
|
||||
|
||||
_ETP = init_model_parallel_group(group_ranks,
|
||||
get_world_group().local_rank,
|
||||
backend,
|
||||
group_name="etp")
|
||||
|
||||
|
||||
def destory_ascend_model_parallel():
|
||||
global _EP
|
||||
if _EP:
|
||||
_EP.destroy()
|
||||
_EP = None
|
||||
|
||||
global _ETP
|
||||
if _ETP:
|
||||
_ETP.destroy()
|
||||
_ETP = None
|
||||
Reference in New Issue
Block a user