[CI][Benchmark] Add new model and v1 test to perf benchmarks (#1099)
### What this PR does / why we need it? - Add qwen2.5-7b-instruct test - Add v1 test --------- Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
16
.github/workflows/nightly_benchmarks.yaml
vendored
16
.github/workflows/nightly_benchmarks.yaml
vendored
@@ -45,13 +45,18 @@ jobs:
|
|||||||
test:
|
test:
|
||||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
||||||
|
|
||||||
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
|
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
|
||||||
runs-on: 'linux-arm64-npu-static-8'
|
runs-on: 'linux-arm64-npu-static-8'
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- vllm_branch: v0.9.1
|
- vllm_branch: v0.9.1
|
||||||
vllm_ascend_branch: main
|
vllm_ascend_branch: main
|
||||||
|
vllm_use_v1: 0
|
||||||
|
- vllm_branch: v0.9.0
|
||||||
|
vllm_ascend_branch: main
|
||||||
|
vllm_use_v1: 1
|
||||||
|
max-parallel: 1
|
||||||
container:
|
container:
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
||||||
volumes:
|
volumes:
|
||||||
@@ -71,6 +76,7 @@ jobs:
|
|||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
|
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
|
||||||
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
|
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
|
||||||
|
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
|
||||||
steps:
|
steps:
|
||||||
- name: Check npu and CANN info
|
- name: Check npu and CANN info
|
||||||
run: |
|
run: |
|
||||||
@@ -140,7 +146,7 @@ jobs:
|
|||||||
- name: Install elastic_tool
|
- name: Install elastic_tool
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
run: |
|
run: |
|
||||||
pip install escli-tool==0.2.1
|
pip install escli-tool==0.2.2
|
||||||
|
|
||||||
- name: Collect pr info from vllm-project/vllm-ascend
|
- name: Collect pr info from vllm-project/vllm-ascend
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
@@ -177,17 +183,17 @@ jobs:
|
|||||||
echo "vllm branch: ${{ matrix.vllm_branch }}"
|
echo "vllm branch: ${{ matrix.vllm_branch }}"
|
||||||
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
|
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
|
||||||
echo "------------------------"
|
echo "------------------------"
|
||||||
|
|
||||||
cd /github/home
|
cd /github/home
|
||||||
bash benchmarks/scripts/run-performance-benchmarks.sh
|
bash benchmarks/scripts/run-performance-benchmarks.sh
|
||||||
# send the result to es
|
# send the result to es
|
||||||
if [[ "${{ github.event_name }}" != "pull request" ]]; then
|
|
||||||
escli add --vllm_branch ${{ matrix.vllm_branch }} \
|
escli add --vllm_branch ${{ matrix.vllm_branch }} \
|
||||||
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
|
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
|
||||||
--commit_id $commit_id \
|
--commit_id $commit_id \
|
||||||
--commit_title "$commit_title" \
|
--commit_title "$commit_title" \
|
||||||
--created_at "$commit_time_no_tz" \
|
--created_at "$commit_time_no_tz" \
|
||||||
--res_dir ./benchmarks/results
|
--res_dir ./benchmarks/results \
|
||||||
|
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
|
||||||
rm -rf ./benchmarks/results
|
rm -rf ./benchmarks/results
|
||||||
fi
|
|
||||||
cd -
|
cd -
|
||||||
done < commit_log.txt
|
done < commit_log.txt
|
||||||
|
|||||||
@@ -9,5 +9,15 @@
|
|||||||
"num_iters_warmup": 5,
|
"num_iters_warmup": 5,
|
||||||
"num_iters": 15
|
"num_iters": 15
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"test_name": "latency_qwen2_5_7B_tp1",
|
||||||
|
"parameters": {
|
||||||
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
||||||
|
"tensor_parallel_size": 1,
|
||||||
|
"load_format": "dummy",
|
||||||
|
"num_iters_warmup": 5,
|
||||||
|
"num_iters": 15
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -49,5 +49,29 @@
|
|||||||
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
|
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||||
"num_prompts": 200
|
"num_prompts": 200
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"test_name": "serving_qwen2_5_7B_tp1",
|
||||||
|
"qps_list": [
|
||||||
|
1,
|
||||||
|
4,
|
||||||
|
16,
|
||||||
|
"inf"
|
||||||
|
],
|
||||||
|
"server_parameters": {
|
||||||
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
||||||
|
"tensor_parallel_size": 1,
|
||||||
|
"swap_space": 16,
|
||||||
|
"disable_log_stats": "",
|
||||||
|
"disable_log_requests": "",
|
||||||
|
"load_format": "dummy"
|
||||||
|
},
|
||||||
|
"client_parameters": {
|
||||||
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
||||||
|
"backend": "vllm",
|
||||||
|
"dataset_name": "sharegpt",
|
||||||
|
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||||
|
"num_prompts": 200
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -22,6 +22,17 @@
|
|||||||
"dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
|
"dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
|
||||||
"num_prompts": 200
|
"num_prompts": 200
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"test_name": "throughput_qwen2_5_7B_tp1",
|
||||||
|
"parameters": {
|
||||||
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
||||||
|
"tensor_parallel_size": 1,
|
||||||
|
"load_format": "dummy",
|
||||||
|
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||||
|
"num_prompts": 200,
|
||||||
|
"backend": "vllm"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user