[CI][Benchmark] Add new model and v1 test to perf benchmarks (#1099)

### What this PR does / why we need it? - Add qwen2.5-7b-instruct test - Add v1 test --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-06-12 10:46:41 +08:00
parent 2498d297ae
commit dd207cb261
4 changed files with 62 additions and 11 deletions
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -45,13 +45,18 @@ jobs:
  test:
    if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
-    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
+    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
    runs-on: 'linux-arm64-npu-static-8'
    strategy:
      matrix:
        include:
          - vllm_branch: v0.9.1
            vllm_ascend_branch: main
            vllm_use_v1: 0
          - vllm_branch: v0.9.0
            vllm_ascend_branch: main
            vllm_use_v1: 1
      max-parallel: 1
    container:
      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
      volumes:
@@ -71,6 +76,7 @@ jobs:
        HF_TOKEN: ${{ secrets.HF_TOKEN }}
        ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
        ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
        VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
    steps:
      - name: Check npu and CANN info
        run: |
@@ -140,7 +146,7 @@ jobs:
      - name: Install elastic_tool
        if: github.event_name != 'pull_request'
        run: |
-          pip install escli-tool==0.2.1
+          pip install escli-tool==0.2.2
      - name: Collect pr info from vllm-project/vllm-ascend
        if: github.event_name != 'pull_request'
@@ -177,17 +183,17 @@ jobs:
            echo "vllm branch: ${{ matrix.vllm_branch }}"
            echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
            echo "------------------------"
            cd /github/home
            bash benchmarks/scripts/run-performance-benchmarks.sh
            # send the result to es
            if [[ "${{ github.event_name }}" != "pull request" ]]; then
            escli add --vllm_branch ${{ matrix.vllm_branch }} \
            --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
            --commit_id $commit_id \
            --commit_title "$commit_title" \
            --created_at "$commit_time_no_tz" \
-              --res_dir ./benchmarks/results 
+            --res_dir ./benchmarks/results \
            --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
            rm -rf ./benchmarks/results
            fi
            cd -
          done < commit_log.txt
--- a/benchmarks/tests/latency-tests.json
+++ b/benchmarks/tests/latency-tests.json
@@ -9,5 +9,15 @@
      "num_iters_warmup": 5,
      "num_iters": 15
    }
  },
  {
    "test_name": "latency_qwen2_5_7B_tp1",
    "parameters": {
      "model": "Qwen/Qwen2.5-7B-Instruct",
      "tensor_parallel_size": 1,
      "load_format": "dummy",
      "num_iters_warmup": 5,
      "num_iters": 15
    }
  }
 ]
--- a/benchmarks/tests/serving-tests.json
+++ b/benchmarks/tests/serving-tests.json
@@ -49,5 +49,29 @@
      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
      "num_prompts": 200
    }
  },
  {
    "test_name": "serving_qwen2_5_7B_tp1",
    "qps_list": [
      1,
      4,
      16,
      "inf"
    ],
    "server_parameters": {
      "model": "Qwen/Qwen2.5-7B-Instruct",
      "tensor_parallel_size": 1,
      "swap_space": 16,
      "disable_log_stats": "",
      "disable_log_requests": "",
      "load_format": "dummy"
    },
    "client_parameters": {
      "model": "Qwen/Qwen2.5-7B-Instruct",
      "backend": "vllm",
      "dataset_name": "sharegpt",
      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
      "num_prompts": 200
    }
  }
 ]
--- a/benchmarks/tests/throughput-tests.json
+++ b/benchmarks/tests/throughput-tests.json
@@ -22,6 +22,17 @@
      "dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
      "num_prompts": 200
    }
  },
  {
    "test_name": "throughput_qwen2_5_7B_tp1",
    "parameters": {
      "model": "Qwen/Qwen2.5-7B-Instruct",
      "tensor_parallel_size": 1,
      "load_format": "dummy",
      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
      "num_prompts": 200,
      "backend": "vllm"
    }
  }
 ]