[CI/UT] Fix disaggregated prefill ci (#1313)
### What this PR does / why we need it? Use eager mode to run disaggregated prefill ci ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new existing test. --------- Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
6
.github/workflows/vllm_ascend_test_pd.yaml
vendored
6
.github/workflows/vllm_ascend_test_pd.yaml
vendored
@@ -41,7 +41,11 @@ jobs:
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_verison: [main, v0.9.1]
|
||||
vllm_verison: [
|
||||
# revert me when V1 disaggregation prefill is merged in main
|
||||
# main,
|
||||
v0.9.1
|
||||
]
|
||||
name: vLLM Ascend prefilling decoding disaggregation test
|
||||
runs-on: linux-arm64-npu-static-8
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@ function run_prefill_instance() {
|
||||
--served-model-name Deepseek \
|
||||
--max-model-len 2000 \
|
||||
--trust-remote-code \
|
||||
--enforce-eager \
|
||||
--kv-transfer-config "$KV_CONFIG"
|
||||
}
|
||||
|
||||
@@ -119,6 +120,7 @@ function run_decode_instance() {
|
||||
--max-num-batched-tokens 2000 \
|
||||
--trust-remote-code \
|
||||
--gpu-memory-utilization 0.9 \
|
||||
--enforce-eager \
|
||||
--kv-transfer-config "$KV_CONFIG"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user