Files
xc-llm-ascend/tests/e2e/run_disagg_pd.sh

59 lines
2.0 KiB
Bash
Raw Normal View History

#!/bin/bash
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
set -eo errexit
. $(dirname "$0")/common.sh
. $(dirname "$0")/pd_disaggreate/setup_pd.sh
export VLLM_USE_MODELSCOPE="True"
MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
# TODO: add tp case
TP_SIZE=1
# TODO: support multi-card
prefill_ip=$(/usr/local/Ascend/driver/tools/hccn_tool -i 0 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs)
PREFILL_DEVICE_IPS="[\"$prefill_ip\"]"
decode_ip=$(/usr/local/Ascend/driver/tools/hccn_tool -i 1 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs)
DECODE_DEVICE_IPS="[\"$decode_ip\"]"
_info "====> Start pd disaggregated test"
REGISTER_PORT=10101
PREOXY_PORT=10102
run_proxy_server $REGISTER_PORT $PREOXY_PORT
_info "Started pd disaggregated proxy server"
PREFILL_PROC_NAME="Prefill-instance"
PREFILL_PORT=8001
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
_info "Starting prefill instance"
wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions"
DECODE_PROC_NAME="Decode-instance"
DECODE_PORT=8002
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
_info "Starting decode instance"
wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions"
_info "pd disaggregated system is ready for handling request"