Update CI workflows (#1210)

2024-08-25 16:43:07 -07:00
parent 308d024092
commit 15f1a49d2d
8 changed files with 43 additions and 48 deletions
--- a/.github/workflows/accuracy-test.yml
+++ b/.github/workflows/accuracy-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  accuracy-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: accuracy
+    runs-on: accuracy-test
    steps:
    - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
    - name: Install dependencies
      run: |
        source $HOME/venv/bin/activate
        echo "$HOME/venv/bin" >> $GITHUB_PATH
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
        pip install -e .
    - name: Evaluate Accuracy
      timeout-minutes: 20
      run: |
        cd test/srt
        python3 test_eval_accuracy_large.py
      timeout-minutes: 20
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  e2e-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: e2e
+    runs-on: e2e-test
    steps:
    - name: Checkout code
@@ -28,27 +28,24 @@ jobs:
    - name: Install dependencies
      run: |
        source $HOME/venv/bin/activate
        echo "$HOME/venv/bin" >> $GITHUB_PATH
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
    - name: Benchmark Serving Throughput
      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
      timeout-minutes: 10
    - name: Benchmark Serving Throughput (w/o RadixAttention)
      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
      timeout-minutes: 10
    - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
      timeout-minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
      timeout-minutes: 10
--- a/.github/workflows/moe-test.yml
+++ b/.github/workflows/moe-test.yml
@@ -18,30 +18,28 @@ concurrency:
  cancel-in-progress: true
 jobs:
-    moe-test:
+  moe-test:
-        if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-        runs-on: accuracy
+    runs-on: moe-test
        steps:
            - name: Checkout code
              uses: actions/checkout@v3
            - name: Install dependencies
              run: |
                source $HOME/venv/bin/activate
                echo "$HOME/venv/bin" >> $GITHUB_PATH
                pip install --upgrade pip
                pip install -e "python[all]"
                pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-            - name: Benchmark MOE Serving Throughput
+    steps:
-              uses: nick-fields/retry@v3
+    - name: Checkout code
-              with:
+      uses: actions/checkout@v3
-                timeout_minutes: 15
+
-                max_attempts: 2
+    - name: Install dependencies
-                retry_on: error
+      run: |
-                command: |
+        pip install --upgrade pip
-                  cd test/srt
+        pip install -e "python[all]"
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
    - name: Benchmark MoE Serving Throughput
      timeout_minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
    - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
      timeout_minutes: 10
      run: |
        cd test/srt
        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
  unit-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: unit
+    runs-on: unit-test
    steps:
    - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
    - name: Install dependencies
      run: |
        source $HOME/venv/bin/activate
        echo "$HOME/venv/bin" >> $GITHUB_PATH
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
        pip install sentence_transformers
    - name: Test Backend Runtime
      timeout-minutes: 20
      run: |
        cd test/srt
        python3 run_suite.py --suite minimal
      timeout-minutes: 20
    - name: Test Frontend Language
      timeout-minutes: 10
      run: |
        cd test/lang
        python3 run_suite.py --suite minimal
      timeout-minutes: 10
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -24,6 +24,7 @@ import torch.nn.functional as F
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from sglang.srt.server import Runtime
 from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
 DEFAULT_PROMPTS = [
    # the output of gemma-2-2b from SRT is unstable on the commented prompt
@@ -171,7 +172,7 @@ class SRTRunner:
        torch_dtype,
        is_generation,
        tp_size=1,
-        port=5157,
+        port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
    ):
        self.is_generation = is_generation
        self.runtime = Runtime(
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
    DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
    DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
    DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
    DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
--- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
@@ -5,7 +5,11 @@ from multiprocessing import Process
 import requests
 from sglang.srt.utils import kill_child_process
-from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
+from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_UNIT_TEST,
    popen_launch_server,
 )
 class TestBatchPenalizerE2E(unittest.TestCase):
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
-        cls.base_url = f"http://127.0.0.1:{8157}"
+        cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
        assert response.choices[0].message.role == "assistant"
        text = response.choices[0].message.content
        assert isinstance(text, str)
-        assert "car" in text or "taxi" in text, text
+        assert "logo" in text, text
        assert response.id
        assert response.created
        assert response.usage.prompt_tokens > 0