diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml
index 16bb584f4..b60a9c6d4 100644
--- a/.github/workflows/accuracy-test.yml
+++ b/.github/workflows/accuracy-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
   accuracy-test:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: accuracy
+    runs-on: accuracy-test
 
     steps:
     - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
 
     - name: Install dependencies
       run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
         pip install --upgrade pip
         pip install -e "python[all]"
         pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
         pip install -e .
 
     - name: Evaluate Accuracy
+      timeout-minutes: 20
       run: |
         cd test/srt
         python3 test_eval_accuracy_large.py
-      timeout-minutes: 20
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
index ad271c37e..8d3387041 100644
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
   e2e-test:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: e2e
+    runs-on: e2e-test
 
     steps:
     - name: Checkout code
@@ -28,27 +28,24 @@ jobs:
 
     - name: Install dependencies
       run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
         pip install --upgrade pip
         pip install -e "python[all]"
         pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
 
     - name: Benchmark Serving Throughput
+      timeout-minutes: 10
       run: |
         cd test/srt
         python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
-      timeout-minutes: 10
 
     - name: Benchmark Serving Throughput (w/o RadixAttention)
+      timeout-minutes: 10
       run: |
         cd test/srt
         python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-      timeout-minutes: 10
 
     - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
+      timeout-minutes: 10
       run: |
         cd test/srt
         python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
-      timeout-minutes: 10
diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml
index 51f7d0226..dd5665a3f 100644
--- a/.github/workflows/moe-test.yml
+++ b/.github/workflows/moe-test.yml
@@ -18,30 +18,28 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-    moe-test:
-        if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-        runs-on: accuracy
-    
-        steps:
-            - name: Checkout code
-              uses: actions/checkout@v3
-        
-            - name: Install dependencies
-              run: |
-                source $HOME/venv/bin/activate
-                echo "$HOME/venv/bin" >> $GITHUB_PATH
-        
-                pip install --upgrade pip
-                pip install -e "python[all]"
-                pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+  moe-test:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: moe-test
 
-            - name: Benchmark MOE Serving Throughput
-              uses: nick-fields/retry@v3
-              with:
-                timeout_minutes: 15
-                max_attempts: 2
-                retry_on: error
-                command: |
-                  cd test/srt
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-                  python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -e "python[all]"
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+    - name: Benchmark MoE Serving Throughput
+      timeout_minutes: 10
+      run: |
+        cd test/srt
+        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+
+    - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
+      timeout_minutes: 10
+      run: |
+        cd test/srt
+        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 607cb865d..e2d7951be 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -20,7 +20,7 @@ concurrency:
 jobs:
   unit-test:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: unit
+    runs-on: unit-test
 
     steps:
     - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
 
     - name: Install dependencies
       run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-
         pip install --upgrade pip
         pip install -e "python[all]"
         pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
         pip install sentence_transformers
 
     - name: Test Backend Runtime
+      timeout-minutes: 20
       run: |
         cd test/srt
         python3 run_suite.py --suite minimal
-      timeout-minutes: 20
 
     - name: Test Frontend Language
+      timeout-minutes: 10
       run: |
         cd test/lang
         python3 run_suite.py --suite minimal
-      timeout-minutes: 10
diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py
index 9a5bd4fd5..37ed2cf9a 100644
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -24,6 +24,7 @@ import torch.nn.functional as F
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from sglang.srt.server import Runtime
+from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
 
 DEFAULT_PROMPTS = [
     # the output of gemma-2-2b from SRT is unstable on the commented prompt
@@ -171,7 +172,7 @@ class SRTRunner:
         torch_dtype,
         is_generation,
         tp_size=1,
-        port=5157,
+        port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
     ):
         self.is_generation = is_generation
         self.runtime = Runtime(
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
index ac19d9370..3389e619c 100644
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
 if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
+    DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
     DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
     DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
     DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
diff --git a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
index e72dc30f9..4e91f7235 100644
--- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
@@ -5,7 +5,11 @@ from multiprocessing import Process
 import requests
 
 from sglang.srt.utils import kill_child_process
-from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
+from sglang.test.test_utils import (
+    DEFAULT_MODEL_NAME_FOR_TEST,
+    DEFAULT_URL_FOR_UNIT_TEST,
+    popen_launch_server,
+)
 
 
 class TestBatchPenalizerE2E(unittest.TestCase):
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.model = DEFAULT_MODEL_NAME_FOR_TEST
-        cls.base_url = f"http://127.0.0.1:{8157}"
+        cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
         cls.process = popen_launch_server(
             cls.model,
             cls.base_url,
diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py
index 0f136fe6e..48157b8db 100644
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
         assert response.choices[0].message.role == "assistant"
         text = response.choices[0].message.content
         assert isinstance(text, str)
-        assert "car" in text or "taxi" in text, text
+        assert "logo" in text, text
         assert response.id
         assert response.created
         assert response.usage.prompt_tokens > 0