[CI] Fix PD job (#1129)
Fix e2e test for Pd job Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -14,7 +14,7 @@ _err() { _red "Error: $*" && exit 1; }
|
|||||||
|
|
||||||
CURL_TIMEOUT=1
|
CURL_TIMEOUT=1
|
||||||
CURL_COOLDOWN=5
|
CURL_COOLDOWN=5
|
||||||
CURL_MAX_TRIES=120
|
CURL_MAX_TRIES=180
|
||||||
|
|
||||||
function wait_url_ready() {
|
function wait_url_ready() {
|
||||||
local serve_name="$1"
|
local serve_name="$1"
|
||||||
@@ -31,7 +31,7 @@ function wait_url_ready() {
|
|||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
if [ "$i" -gt "$CURL_MAX_TRIES" ]; then
|
if [ "$i" -gt "$CURL_MAX_TRIES" ]; then
|
||||||
_info "===> \$CURL_MAX_TRIES exceeded waiting for ${serve_name} to be ready"
|
_info "===> ${CURL_MAX_TRIES}s exceeded waiting for ${serve_name} to be ready"
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
sleep "$CURL_COOLDOWN"
|
sleep "$CURL_COOLDOWN"
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ function run_prefill_instance() {
|
|||||||
--served-model-name Deepseek \
|
--served-model-name Deepseek \
|
||||||
--max-model-len 2000 \
|
--max-model-len 2000 \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
--kv-transfer-config "$KV_CONFIG" &
|
--kv-transfer-config "$KV_CONFIG"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ function run_decode_instance() {
|
|||||||
--max-num-batched-tokens 2000 \
|
--max-num-batched-tokens 2000 \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
--gpu-memory-utilization 0.9 \
|
--gpu-memory-utilization 0.9 \
|
||||||
--kv-transfer-config "$KV_CONFIG" &
|
--kv-transfer-config "$KV_CONFIG"
|
||||||
}
|
}
|
||||||
|
|
||||||
function run_proxy_server() {
|
function run_proxy_server() {
|
||||||
|
|||||||
@@ -43,16 +43,16 @@ _info "Started pd disaggregated proxy server"
|
|||||||
|
|
||||||
PREFILL_PROC_NAME="Prefill-instance"
|
PREFILL_PROC_NAME="Prefill-instance"
|
||||||
PREFILL_PORT=8001
|
PREFILL_PORT=8001
|
||||||
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
|
|
||||||
_info "Starting prefill instance"
|
_info "Starting prefill instance"
|
||||||
|
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
|
||||||
|
_info "Waiting for prefill instance ready"
|
||||||
wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions"
|
wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions"
|
||||||
|
|
||||||
DECODE_PROC_NAME="Decode-instance"
|
DECODE_PROC_NAME="Decode-instance"
|
||||||
DECODE_PORT=8002
|
DECODE_PORT=8002
|
||||||
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
|
|
||||||
_info "Starting decode instance"
|
_info "Starting decode instance"
|
||||||
|
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
|
||||||
|
_info "Waiting for decode instance ready"
|
||||||
wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions"
|
wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions"
|
||||||
|
|
||||||
_info "pd disaggregated system is ready for handling request"
|
_info "pd disaggregated system is ready for handling request"
|
||||||
|
|||||||
Reference in New Issue
Block a user