### What this PR does / why we need it? - This PR proposes a P2P version of Disaggregated Prefill based on llm_datadist which manages data transfer. - This solution reconstructs previous offline single-node Disaggregated Prefill solution, and supports multi-node and online serveing now. - Currently this solution supports 1P1D situation of Deepseek hybrid parallelism (P: TP+EP, D: DP+EP). Note that xPyD situation is considered in the solution design, and will be supported soon within v1 engine. --------- Signed-off-by: hw_whx <wanghexiang7@huawei.com> Signed-off-by: ganyi <pleaplusone.gy@gmail.com> Co-authored-by: hw_whx <wanghexiang7@huawei.com> Co-authored-by: ganyi <pleaplusone.gy@gmail.com>
40 lines
1.5 KiB
Python
40 lines
1.5 KiB
Python
#
|
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
# This file is a part of the vllm-ascend project.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import llm_datadist # type: ignore
|
|
import torch
|
|
|
|
TORCH_DTYPE_TO_NPU_DTYPE = {
|
|
torch.half: llm_datadist.DataType.DT_FLOAT16,
|
|
torch.float16: llm_datadist.DataType.DT_FLOAT16,
|
|
torch.bfloat16: llm_datadist.DataType.DT_BF16,
|
|
torch.float: llm_datadist.DataType.DT_FLOAT,
|
|
torch.float32: llm_datadist.DataType.DT_FLOAT,
|
|
torch.int8: llm_datadist.DataType.DT_INT8,
|
|
torch.int64: llm_datadist.DataType.DT_INT64,
|
|
torch.int32: llm_datadist.DataType.DT_INT32,
|
|
}
|
|
|
|
NPU_DTYPE_TO_TORCH_DTYPE = {
|
|
llm_datadist.DataType.DT_FLOAT16: torch.half,
|
|
llm_datadist.DataType.DT_FLOAT16: torch.float16,
|
|
llm_datadist.DataType.DT_BF16: torch.bfloat16,
|
|
llm_datadist.DataType.DT_FLOAT: torch.float,
|
|
llm_datadist.DataType.DT_FLOAT: torch.float32,
|
|
llm_datadist.DataType.DT_INT8: torch.int8,
|
|
llm_datadist.DataType.DT_INT64: torch.int64,
|
|
llm_datadist.DataType.DT_INT32: torch.int32,
|
|
} |