forked from EngineX-Ascend/enginex-ascend-910-vllm
init v0.11.0rc0
This commit is contained in:
@@ -11,7 +11,7 @@ from vllm_ascend.ascend_config import get_ascend_config
|
||||
# Currently, mc2 op need their own group coordinator.
|
||||
_MC2: Optional[GroupCoordinator] = None
|
||||
_MLP_TP: Optional[GroupCoordinator] = None
|
||||
|
||||
_OTP: Optional[GroupCoordinator] = None
|
||||
_LMTP: Optional[GroupCoordinator] = None
|
||||
|
||||
|
||||
@@ -20,6 +20,12 @@ def get_mc2_group() -> GroupCoordinator:
|
||||
return _MC2
|
||||
|
||||
|
||||
def get_otp_group() -> GroupCoordinator:
|
||||
assert _OTP is not None, (
|
||||
"output tensor parallel group is not initialized")
|
||||
return _OTP
|
||||
|
||||
|
||||
def get_lmhead_tp_group() -> GroupCoordinator:
|
||||
assert _LMTP is not None, (
|
||||
"lm head tensor parallel group is not initialized")
|
||||
@@ -74,6 +80,20 @@ def init_ascend_model_parallel(parallel_config: ParallelConfig, ):
|
||||
backend,
|
||||
group_name="mlp_tp")
|
||||
|
||||
# If oproj tensor parallel size is set, we will create a group for it.
|
||||
otp_size = get_ascend_config().oproj_tensor_parallel_size
|
||||
if otp_size is not None:
|
||||
group_ranks = []
|
||||
global _OTP
|
||||
num_oproj_tensor_parallel_groups: int = (world_size // otp_size)
|
||||
for i in range(num_oproj_tensor_parallel_groups):
|
||||
ranks = list(range(i * otp_size, (i + 1) * otp_size))
|
||||
group_ranks.append(ranks)
|
||||
_OTP = init_model_parallel_group(group_ranks,
|
||||
get_world_group().local_rank,
|
||||
backend,
|
||||
group_name="otp")
|
||||
|
||||
lmhead_tensor_parallel_size = get_ascend_config(
|
||||
).lmhead_tensor_parallel_size
|
||||
if lmhead_tensor_parallel_size is not None:
|
||||
@@ -117,3 +137,8 @@ def destroy_ascend_model_parallel():
|
||||
if _LMTP:
|
||||
_LMTP.destroy()
|
||||
_LMTP = None
|
||||
|
||||
global _OTP
|
||||
if _OTP:
|
||||
_OTP.destroy()
|
||||
_OTP = None
|
||||
|
||||
Reference in New Issue
Block a user