Update CI workflows (#1210)

2024-08-25 16:43:07 -07:00
parent 308d024092
commit 15f1a49d2d
8 changed files with 43 additions and 48 deletions
--- a/.github/workflows/accuracy-test.yml
+++ b/.github/workflows/accuracy-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  accuracy-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: accuracy
+    runs-on: accuracy-test

    steps:
    - name: Checkout code
@@ -28,9 +28,6 @@ jobs:

    - name: Install dependencies
      run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
        pip install -e .

    - name: Evaluate Accuracy
+      timeout-minutes: 20
      run: |
        cd test/srt
        python3 test_eval_accuracy_large.py
-      timeout-minutes: 20
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  e2e-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: e2e
+    runs-on: e2e-test

    steps:
    - name: Checkout code
@@ -28,27 +28,24 @@ jobs:

    - name: Install dependencies
      run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

    - name: Benchmark Serving Throughput
+      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
-      timeout-minutes: 10

    - name: Benchmark Serving Throughput (w/o RadixAttention)
+      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-      timeout-minutes: 10

    - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
+      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
-      timeout-minutes: 10
--- a/.github/workflows/moe-test.yml
+++ b/.github/workflows/moe-test.yml
@@ -18,30 +18,28 @@ concurrency:
  cancel-in-progress: true

 jobs:
-    moe-test:
-        if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-        runs-on: accuracy
-    
-        steps:
-            - name: Checkout code
-              uses: actions/checkout@v3
-        
-            - name: Install dependencies
-              run: |
-                source $HOME/venv/bin/activate
-                echo "$HOME/venv/bin" >> $GITHUB_PATH
-        
-                pip install --upgrade pip
-                pip install -e "python[all]"
-                pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+  moe-test:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: moe-test

-            - name: Benchmark MOE Serving Throughput
-              uses: nick-fields/retry@v3
-              with:
-                timeout_minutes: 15
-                max_attempts: 2
-                retry_on: error
-                command: |
-                  cd test/srt
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -e "python[all]"
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+    - name: Benchmark MoE Serving Throughput
+      timeout_minutes: 10
+      run: |
+        cd test/srt
+        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+
+    - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
+      timeout_minutes: 10
+      run: |
+        cd test/srt
+        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  unit-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: unit
+    runs-on: unit-test

    steps:
    - name: Checkout code
@@ -28,9 +28,6 @@ jobs:

    - name: Install dependencies
      run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
        pip install sentence_transformers

    - name: Test Backend Runtime
+      timeout-minutes: 20
      run: |
        cd test/srt
        python3 run_suite.py --suite minimal
-      timeout-minutes: 20

    - name: Test Frontend Language
+      timeout-minutes: 10
      run: |
        cd test/lang
        python3 run_suite.py --suite minimal
-      timeout-minutes: 10
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -24,6 +24,7 @@ import torch.nn.functional as F
 from transformers import AutoModelForCausalLM, AutoTokenizer

 from sglang.srt.server import Runtime
+from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER

 DEFAULT_PROMPTS = [
    # the output of gemma-2-2b from SRT is unstable on the commented prompt
@@ -171,7 +172,7 @@ class SRTRunner:
        torch_dtype,
        is_generation,
        tp_size=1,
-        port=5157,
+        port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
    ):
        self.is_generation = is_generation
        self.runtime = Runtime(
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"

 if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
+    DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
    DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
    DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
    DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
--- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
@@ -5,7 +5,11 @@ from multiprocessing import Process
 import requests

 from sglang.srt.utils import kill_child_process
-from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
+from sglang.test.test_utils import (
+    DEFAULT_MODEL_NAME_FOR_TEST,
+    DEFAULT_URL_FOR_UNIT_TEST,
+    popen_launch_server,
+)


 class TestBatchPenalizerE2E(unittest.TestCase):
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
-        cls.base_url = f"http://127.0.0.1:{8157}"
+        cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
        assert response.choices[0].message.role == "assistant"
        text = response.choices[0].message.content
        assert isinstance(text, str)
-        assert "car" in text or "taxi" in text, text
+        assert "logo" in text, text
        assert response.id
        assert response.created
        assert response.usage.prompt_tokens > 0