diff --git a/examples/offline_disaggregated_prefill_npu.py b/examples/offline_disaggregated_prefill_npu.py index f37b508..0bf69fc 100644 --- a/examples/offline_disaggregated_prefill_npu.py +++ b/examples/offline_disaggregated_prefill_npu.py @@ -79,7 +79,7 @@ def run_prefill(prefill_done, process_close): def run_decode(prefill_done): - os.environ['VLLM_LLMDD_RPC_PORT'] = '6634' + os.environ['VLLM_ASCEND_LLMDD_RPC_PORT'] = '6634' # ranktable.json needs be generated using gen_ranktable.sh # from the examples/disaggregated_prefill_v1 module in the main branch. os.environ['DISAGGREGATED_PREFILL_RANK_TABLE_PATH'] = "./ranktable.json" diff --git a/tests/e2e/pd_disaggreate/run_edge_case_test.sh b/tests/e2e/pd_disaggreate/run_edge_case_test.sh index a086df0..49e06e5 100644 --- a/tests/e2e/pd_disaggreate/run_edge_case_test.sh +++ b/tests/e2e/pd_disaggreate/run_edge_case_test.sh @@ -70,7 +70,7 @@ run_tests_for_model() { # Start prefill instance PREFILL_PORT=8001 - BASE_CMD="ASCEND_RT_VISIBLE_DEVICES=0 VLLM_LLMDD_RPC_PORT=5559 vllm serve $model_name \ + BASE_CMD="ASCEND_RT_VISIBLE_DEVICES=0 VLLM_ASCEND_LLMDD_RPC_PORT=5559 vllm serve $model_name \ --port $PREFILL_PORT \ --seed 1024 \ --enforce-eager \ @@ -90,7 +90,7 @@ run_tests_for_model() { DECODE_PORT=8002 # Build the command with or without model-specific args - BASE_CMD="ASCEND_RT_VISIBLE_DEVICES=1 VLLM_LLMDD_RPC_PORT=6000 vllm serve $model_name \ + BASE_CMD="ASCEND_RT_VISIBLE_DEVICES=1 VLLM_ASCEND_LLMDD_RPC_PORT=6000 vllm serve $model_name \ --port $DECODE_PORT \ --seed 1024 \ --enforce-eager \