Support compressed tensors fp8w8a8 (#4743)

2025-03-27 04:21:25 +08:00
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions
--- a/.github/workflows/vllm-dependency-test.yml
+++ b/.github/workflows/vllm-dependency-test.yml
@@ -0,0 +1,45 @@
+name: VLLM Dependency Test
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "python/pyproject.toml"
+      - "python/sglang/**"
+      - "test/**"
+      - "docs/**"
+      - "scripts/**"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "python/pyproject.toml"
+      - "python/sglang/**"
+      - "test/**"
+      - "docs/**"
+      - "scripts/**"
+
+concurrency:
+  group: vllm-dependency-test-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  vllm-dependency-test:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+        github.event.pull_request.draft == false
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        env:
+          FLASHINFER_REPO: 'https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python'
+        run: |
+          bash scripts/ci_install_dependency.sh
+          pip install "vllm>=0.6.4.post1,<=0.7.2"
+
+      - name: Run VLLM dependency tests
+        timeout-minutes: 60
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 3600