diff --git a/.github/workflows/vllm_ascend_test_pd.yaml b/.github/workflows/vllm_ascend_test_pd.yaml index 932b3e5..a824f60 100644 --- a/.github/workflows/vllm_ascend_test_pd.yaml +++ b/.github/workflows/vllm_ascend_test_pd.yaml @@ -41,7 +41,11 @@ jobs: if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }} strategy: matrix: - vllm_verison: [main, v0.9.1] + vllm_verison: [ + # revert me when V1 disaggregation prefill is merged in main + # main, + v0.9.1 + ] name: vLLM Ascend prefilling decoding disaggregation test runs-on: linux-arm64-npu-static-8 diff --git a/tests/e2e/pd_disaggreate/setup_pd.sh b/tests/e2e/pd_disaggreate/setup_pd.sh index 675bee4..c15f109 100644 --- a/tests/e2e/pd_disaggreate/setup_pd.sh +++ b/tests/e2e/pd_disaggreate/setup_pd.sh @@ -66,6 +66,7 @@ function run_prefill_instance() { --served-model-name Deepseek \ --max-model-len 2000 \ --trust-remote-code \ + --enforce-eager \ --kv-transfer-config "$KV_CONFIG" } @@ -119,6 +120,7 @@ function run_decode_instance() { --max-num-batched-tokens 2000 \ --trust-remote-code \ --gpu-memory-utilization 0.9 \ + --enforce-eager \ --kv-transfer-config "$KV_CONFIG" }