From 270c5cb8cd01c4bb852e4f43a5e15bd10ddb39f6 Mon Sep 17 00:00:00 2001 From: LoganJane <42287016+LoganJane@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:02:29 +0800 Subject: [PATCH] [CI] Add nightly CI test cases for the Kimi-K2.5 (#7416) ### What this PR does / why we need it? Add nightly CI test cases for the Kimi-K2.5. - vLLM version: v0.17.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4497431df654e46fb1fb5e64bf8611e762ae5d87 --------- Signed-off-by: LoganJane Signed-off-by: LoganJane <42287016+LoganJane@users.noreply.github.com> --- .../workflows/schedule_nightly_test_a3.yaml | 3 + .../single_node/models/configs/Kimi-K2.5.yaml | 62 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/e2e/nightly/single_node/models/configs/Kimi-K2.5.yaml diff --git a/.github/workflows/schedule_nightly_test_a3.yaml b/.github/workflows/schedule_nightly_test_a3.yaml index e584b196..5d9c2b7c 100644 --- a/.github/workflows/schedule_nightly_test_a3.yaml +++ b/.github/workflows/schedule_nightly_test_a3.yaml @@ -261,6 +261,9 @@ jobs: - name: kimi-k2-thinking os: linux-aarch64-a3-16 config_file_path: Kimi-K2-Thinking.yaml + - name: kimi-k2.5 + os: linux-aarch64-a3-16 + config_file_path: Kimi-K2.5.yaml - name: minimax-m2-5 os: linux-aarch64-a3-16 config_file_path: MiniMax-M2.5-A3.yaml diff --git a/tests/e2e/nightly/single_node/models/configs/Kimi-K2.5.yaml b/tests/e2e/nightly/single_node/models/configs/Kimi-K2.5.yaml new file mode 100644 index 00000000..b6ef8fbb --- /dev/null +++ b/tests/e2e/nightly/single_node/models/configs/Kimi-K2.5.yaml @@ -0,0 +1,62 @@ +# ========================================== +# Shared Configurations +# ========================================== + +_envs: &envs + HCCL_BUFFSIZE: "512" + SERVER_PORT: "DEFAULT_PORT" + HCCL_OP_EXPANSION_MODE: "AIV" + OMP_PROC_BIND: "false" + OMP_NUM_THREADS: "1" + PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" + VLLM_ASCEND_BALANCE_SCHEDULING: "1" + +_server_cmd: &server_cmd + - "--enable-expert-parallel" + - "--tensor-parallel-size" + - "8" + - "--data-parallel-size" + - "2" + - "--port" + - "$SERVER_PORT" + - "--max-model-len" + - "8192" + - "--max-num-batched-tokens" + - "8192" + - "--max-num-seqs" + - "32" + - "--async-scheduling" + - "--quantization" + - "ascend" + - "--trust-remote-code" + - "--gpu-memory-utilization" + - "0.9" + - "--additional-config" + - '{"multistream_overlap_shared_expert":true}' + +_benchmarks: &benchmarks + acc: + case_type: accuracy + dataset_path: vllm-ascend/gsm8k-lite + request_conf: vllm_api_general_chat + dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt + max_out_len: 4096 + batch_size: 8 + baseline: 95 + threshold: 5 + +# ========================================== +# ACTUAL TEST CASES +# ========================================== + +test_cases: + - name: "Kimi-K2.5-W4A8-TP8-DP2-Case" + model: "Eco-Tech/Kimi-K2.5-W4A8" + envs: + <<: *envs + server_cmd: *server_cmd + server_cmd_extra: + - "--compilation-config" + - '{"cudagraph_capture_sizes": [1,2,4,8,16,32], "cudagraph_mode": "FULL_DECODE_ONLY"}' + benchmarks: + <<: *benchmarks \ No newline at end of file