diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 6ee1b45..27ddb65 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -45,13 +45,18 @@ jobs: test: if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} - name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }} + name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }} runs-on: 'linux-arm64-npu-static-8' strategy: matrix: include: - vllm_branch: v0.9.1 vllm_ascend_branch: main + vllm_use_v1: 0 + - vllm_branch: v0.9.0 + vllm_ascend_branch: main + vllm_use_v1: 1 + max-parallel: 1 container: image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 volumes: @@ -71,6 +76,7 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }} ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }} + VLLM_USE_V1: ${{ matrix.vllm_use_v1 }} steps: - name: Check npu and CANN info run: | @@ -140,7 +146,7 @@ jobs: - name: Install elastic_tool if: github.event_name != 'pull_request' run: | - pip install escli-tool==0.2.1 + pip install escli-tool==0.2.2 - name: Collect pr info from vllm-project/vllm-ascend if: github.event_name != 'pull_request' @@ -177,17 +183,17 @@ jobs: echo "vllm branch: ${{ matrix.vllm_branch }}" echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}" echo "------------------------" + cd /github/home bash benchmarks/scripts/run-performance-benchmarks.sh # send the result to es - if [[ "${{ github.event_name }}" != "pull request" ]]; then - escli add --vllm_branch ${{ matrix.vllm_branch }} \ - --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \ - --commit_id $commit_id \ - --commit_title "$commit_title" \ - --created_at "$commit_time_no_tz" \ - --res_dir ./benchmarks/results - rm -rf ./benchmarks/results - fi + escli add --vllm_branch ${{ matrix.vllm_branch }} \ + --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \ + --commit_id $commit_id \ + --commit_title "$commit_title" \ + --created_at "$commit_time_no_tz" \ + --res_dir ./benchmarks/results \ + --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}' + rm -rf ./benchmarks/results cd - done < commit_log.txt diff --git a/benchmarks/tests/latency-tests.json b/benchmarks/tests/latency-tests.json index 576ced2..40cec4c 100644 --- a/benchmarks/tests/latency-tests.json +++ b/benchmarks/tests/latency-tests.json @@ -9,5 +9,15 @@ "num_iters_warmup": 5, "num_iters": 15 } + }, + { + "test_name": "latency_qwen2_5_7B_tp1", + "parameters": { + "model": "Qwen/Qwen2.5-7B-Instruct", + "tensor_parallel_size": 1, + "load_format": "dummy", + "num_iters_warmup": 5, + "num_iters": 15 + } } ] diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json index d8ad2be..c8d5cda 100644 --- a/benchmarks/tests/serving-tests.json +++ b/benchmarks/tests/serving-tests.json @@ -49,5 +49,29 @@ "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_qwen2_5_7B_tp1", + "qps_list": [ + 1, + 4, + 16, + "inf" + ], + "server_parameters": { + "model": "Qwen/Qwen2.5-7B-Instruct", + "tensor_parallel_size": 1, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy" + }, + "client_parameters": { + "model": "Qwen/Qwen2.5-7B-Instruct", + "backend": "vllm", + "dataset_name": "sharegpt", + "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json", + "num_prompts": 200 + } } ] diff --git a/benchmarks/tests/throughput-tests.json b/benchmarks/tests/throughput-tests.json index 551d238..3698e69 100644 --- a/benchmarks/tests/throughput-tests.json +++ b/benchmarks/tests/throughput-tests.json @@ -22,6 +22,17 @@ "dataset_path": "lmarena-ai/vision-arena-bench-v0.1", "num_prompts": 200 } + }, + { + "test_name": "throughput_qwen2_5_7B_tp1", + "parameters": { + "model": "Qwen/Qwen2.5-7B-Instruct", + "tensor_parallel_size": 1, + "load_format": "dummy", + "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json", + "num_prompts": 200, + "backend": "vllm" + } } ]