[main2main] upgrade vllm to 0308 (#7213)
### What this PR does / why we need it?
Update main2main to vllm 0308.
breaks:
* https://github.com/vllm-project/vllm/pull/30681
* https://github.com/vllm-project/vllm/pull/35552 remove
self.cudagraph_batch_sizes
* https://github.com/vllm-project/vllm/pull/35158 clear_metadata ->
defer_finalize
* https://github.com/vllm-project/vllm/pull/36006 remove
CacheConfig.cpu_offload_gb
* https://github.com/vllm-project/vllm/pull/35472
* https://github.com/vllm-project/vllm/pull/34552 attn_metadata_builder
* https://github.com/vllm-project/vllm/pull/30515 profile_seq_lens
* https://github.com/vllm-project/vllm/pull/28053
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: MrZ20 <2609716663@qq.com>
Signed-off-by: menogrey <1299267905@qq.com>
Co-authored-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
10
.github/workflows/_e2e_test.yaml
vendored
10
.github/workflows/_e2e_test.yaml
vendored
@@ -110,7 +110,7 @@ jobs:
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
if: ${{ inputs.continue_on_error == true && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
name: timing-data-singlecard-light-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
if: ${{ inputs.continue_on_error == true && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
name: timing-data-singlecard-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
@@ -289,7 +289,7 @@ jobs:
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
if: ${{ inputs.continue_on_error == true && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
name: timing-data-2card-light-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
@@ -378,7 +378,7 @@ jobs:
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
if: ${{ inputs.continue_on_error == true && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
name: timing-data-2card-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
@@ -475,7 +475,7 @@ jobs:
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
if: ${{ inputs.continue_on_error == true && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
name: timing-data-4card-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
|
||||
2
.github/workflows/bot_pr_create.yaml
vendored
2
.github/workflows/bot_pr_create.yaml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
steps:
|
||||
- name: Get vLLM version
|
||||
run: |
|
||||
VLLM_COMMIT=4034c3d32e30d01639459edd3ab486f56993876d
|
||||
VLLM_COMMIT=4497431df654e46fb1fb5e64bf8611e762ae5d87
|
||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Checkout repository
|
||||
|
||||
@@ -27,7 +27,7 @@ RUN apt-get update -y && \
|
||||
|
||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||
# For lint purpose, actually we need make a main2main matching.
|
||||
ARG VLLM_COMMIT=4034c3d32e30d01639459edd3ab486f56993876d
|
||||
ARG VLLM_COMMIT=4497431df654e46fb1fb5e64bf8611e762ae5d87
|
||||
RUN git clone $VLLM_REPO /vllm-workspace/vllm && \
|
||||
cd /vllm-workspace/vllm && \
|
||||
git checkout $VLLM_COMMIT
|
||||
|
||||
2
.github/workflows/pr_test_full.yaml
vendored
2
.github/workflows/pr_test_full.yaml
vendored
@@ -75,7 +75,7 @@ jobs:
|
||||
name: e2e-full
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
|
||||
vllm_version: [4497431df654e46fb1fb5e64bf8611e762ae5d87, v0.17.0]
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
|
||||
uses: ./.github/workflows/_e2e_test.yaml
|
||||
|
||||
6
.github/workflows/pr_test_light.yaml
vendored
6
.github/workflows/pr_test_light.yaml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
lint:
|
||||
uses: ./.github/workflows/_pre_commit.yml
|
||||
with:
|
||||
vllm: 4034c3d32e30d01639459edd3ab486f56993876d
|
||||
vllm: 4497431df654e46fb1fb5e64bf8611e762ae5d87
|
||||
changes:
|
||||
runs-on: linux-aarch64-a2b3-0
|
||||
outputs:
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
|
||||
vllm_version: [4497431df654e46fb1fb5e64bf8611e762ae5d87, v0.17.0]
|
||||
uses: ./.github/workflows/_unit_test.yaml
|
||||
with:
|
||||
vllm: ${{ matrix.vllm_version }}
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
name: e2e-light
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
|
||||
vllm_version: [4497431df654e46fb1fb5e64bf8611e762ae5d87, v0.17.0]
|
||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||
needs: [lint, changes]
|
||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
name: refresh codecov
|
||||
strategy:
|
||||
matrix:
|
||||
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d]
|
||||
vllm_version: [4497431df654e46fb1fb5e64bf8611e762ae5d87]
|
||||
uses: ./.github/workflows/_unit_test.yaml
|
||||
with:
|
||||
vllm: ${{ matrix.vllm_version }}
|
||||
|
||||
Reference in New Issue
Block a user