[long_seq] remove long_seq env (#4660)
### What this PR does / why we need it? remove env VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL - vLLM version: v0.12.0 --------- Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: ZhangMingWei716 <2894054457@qq.com> Co-authored-by: ZhangMingWei716 <2894054457@qq.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -9,8 +9,7 @@ from vllm.distributed.parallel_state import (GroupCoordinator, get_dp_group,
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.utils import (flashcomm2_enable,
|
||||
prefill_context_parallel_enable)
|
||||
from vllm_ascend.utils import flashcomm2_enable
|
||||
|
||||
# Currently, mc2 op need their own group coordinator.
|
||||
_MC2: Optional[GroupCoordinator] = None
|
||||
@@ -74,15 +73,10 @@ def init_ascend_model_parallel(parallel_config: ParallelConfig, ):
|
||||
# The layout of all ranks: ExternalDP * EP
|
||||
# ExternalDP is the data parallel group that is not part of the model,
|
||||
# every dp rank can generate independently (in verl integration).
|
||||
if prefill_context_parallel_enable():
|
||||
all_ranks = torch.arange(world_size).reshape(
|
||||
-1, parallel_config.data_parallel_size *
|
||||
parallel_config.prefill_context_parallel_size *
|
||||
parallel_config.tensor_parallel_size)
|
||||
else:
|
||||
all_ranks = torch.arange(world_size).reshape(
|
||||
-1, parallel_config.data_parallel_size *
|
||||
parallel_config.tensor_parallel_size)
|
||||
all_ranks = torch.arange(world_size).reshape(
|
||||
-1, parallel_config.data_parallel_size *
|
||||
parallel_config.prefill_context_parallel_size *
|
||||
parallel_config.tensor_parallel_size)
|
||||
|
||||
pd_tp_ratio = get_ascend_config().pd_tp_ratio
|
||||
pd_head_ratio = get_ascend_config().pd_head_ratio
|
||||
|
||||
Reference in New Issue
Block a user