feat: add pr e2e test (#822)

2024-07-30 18:31:26 +10:00
parent 17af39c5dc
commit a30d5d75bf
1 changed files with 30 additions and 13 deletions
--- a/.github/workflows/pr-e2e-test.yml
+++ b/.github/workflows/pr-e2e-test.yml
@@ -12,19 +12,36 @@ jobs:
    runs-on: self-hosted
    env:
      CUDA_VISIBLE_DEVICES: 6
    steps:
-    - uses: actions/checkout@v2
+    - name: Checkout code
-    - name: Check GPU
+      uses: actions/checkout@v3
    - name: Install dependencies
      run: |
-        if ! command -v nvidia-smi &> /dev/null; then
+        pip install --upgrade pip
-          echo "nvidia-smi not found. Is CUDA installed?"
+        pip install -e "python[all]"
-          exit 1
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
-        fi
+        pip install --upgrade transformers
-        nvidia-smi || exit 1
+
-    - name: Environment Info
+    - name: Launch server and run benchmark
      run: |
-        echo "Working directory: $(pwd)"
+        python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 &
-        echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
+
-    - name: Run Tests
+        echo "Waiting for server to start..."
-      run: |
+        for i in {1..60}; do
-        echo "Running tests..."
+          if curl -s http://127.0.0.1:8413/health; then
            echo "Server is up!"
            break
          fi
          if [ $i -eq 60 ]; then
            echo "Server failed to start within 60 seconds"
            exit 1
          fi
          sleep 1
        done
        python3 -m sglang.bench_serving --backend sglang --port 8413
        echo "Stopping server..."
        kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}')