[Generative Scores API] add performance tests to CICD (#10830)

This commit is contained in:
Vedant V Jhaveri
2025-10-02 19:57:55 -07:00
committed by GitHub
parent 3c699772c9
commit 7e61737d3f
3 changed files with 250 additions and 0 deletions

View File

@@ -460,6 +460,39 @@ jobs:
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency
performance-test-1-gpu-part-3:
needs: [check-changes, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
- name: Benchmark Scores online latency and throughput
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_latency_throughput
- name: Benchmark Scores online latency and throughput (batch size scaling)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_score_api_batch_scaling
performance-test-2-gpu:
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&