diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index c08e3e25c..aea02c5e5 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -92,7 +92,7 @@ jobs:
   unittest-test-backend-8-gpu:
     if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
         github.event.pull_request.draft == false
-    needs: [unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu]
+    needs: [unit-test-frontend, unit-test-backend-2-gpu]
     runs-on: 8-gpu-runner
     steps:
       - name: Checkout code
@@ -271,24 +271,6 @@ jobs:
           cd test/srt
           python3 test_moe_eval_accuracy_large.py
 
-  unit-test-backend-pd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
-    runs-on: 8-gpu-runner
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Install dependencies
-        run: |
-          bash scripts/ci_install_dependency.sh
-
-      - name: Run test
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_disaggregation.TestDisaggregationMooncake.test_gsm8k
-
   finish:
     if: always()
     needs: [
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index a6ff1a888..030355e0c 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -305,6 +305,12 @@ class ServerArgs:
         if self.grammar_backend is None:
             self.grammar_backend = "xgrammar"
 
+        if self.pp_size > 1:
+            self.disable_overlap_schedule = True
+            logger.warning(
+                "Overlap scheduler is disabled because of using pipeline parallelism."
+            )
+
         # Data parallelism attention
         if self.enable_dp_attention:
             self.schedule_conservativeness = self.schedule_conservativeness * 0.3
diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh
index 4e50cbbdb..7344d1f86 100755
--- a/scripts/ci_install_dependency.sh
+++ b/scripts/ci_install_dependency.sh
@@ -5,25 +5,22 @@ set -euxo pipefail
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 bash "${SCRIPT_DIR}/killall_sglang.sh"
 
+# Update pip
+pip install --upgrade pip
+
 # Clean up existing installations
-pip uninstall -y flashinfer flashinfer_python sgl-kernel sglang vllm || true
+pip uninstall -y flashinfer flashinfer_python sgl-kernel sglang vllm
 pip cache purge
 rm -rf /root/.cache/flashinfer
 rm -rf /usr/local/lib/python3.10/dist-packages/flashinfer*
 rm -rf /usr/local/lib/python3.10/dist-packages/sgl_kernel*
 
-# Update pip
-pip install --upgrade pip
-
-# Install sgl-kernel
-pip install sgl-kernel==0.1.2.post1 --no-cache-dir
-
 # Install the main package
 pip install -e "python[all]"
 
 # Install additional dependencies
 pip install torch_memory_saver
-pip install transformers==4.51.0 sentence_transformers accelerate peft pandas datasets timm torchaudio==2.6.0
+pip install transformers==4.51.0 timm torchaudio==2.6.0 sentence_transformers accelerate peft pandas datasets mooncake-transfer-engine
 
 # For compiling xgrammar kernels
 pip install cuda-python nvidia-cuda-nvrtc-cu12
diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
index 50cc0d9aa..a70679f50 100644
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -85,9 +85,6 @@ suites = {
         TestFile("test_w8a8_quantization.py", 46),
         TestFile("models/lora/test_lora_cuda_graph.py", 250),
     ],
-    "per-commit-pd": [
-        TestFile("test_disaggregation.py", 90),
-    ],
     "per-commit-2-gpu": [
         TestFile("models/lora/test_lora_tp.py", 116),
         TestFile("test_data_parallelism.py", 73),
@@ -105,6 +102,7 @@ suites = {
         # TestFile("test_deepep_low_latency.py", 50),
         # TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
         TestFile("test_local_attn.py", 250),
+        TestFile("test_disaggregation.py", 90),
         TestFile("test_full_deepseek_v3.py", 250),
         TestFile("test_pp_single_node.py", 150),
     ],
diff --git a/test/srt/test_disaggregation.py b/test/srt/test_disaggregation.py
index ee8bca0b3..e3008598a 100644
--- a/test/srt/test_disaggregation.py
+++ b/test/srt/test_disaggregation.py
@@ -1,11 +1,9 @@
 import subprocess
-import threading
 import time
 import unittest
 from types import SimpleNamespace
 
 import requests
-import torch
 
 from sglang.srt.utils import kill_process_tree
 from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
diff --git a/test/srt/test_pp_single_node.py b/test/srt/test_pp_single_node.py
index b69659403..4588c1326 100644
--- a/test/srt/test_pp_single_node.py
+++ b/test/srt/test_pp_single_node.py
@@ -9,13 +9,10 @@ import time
 import unittest
 from types import SimpleNamespace
 
-import requests
-
 from sglang.bench_one_batch_server import BenchArgs as OneBatchBenchArgs
 from sglang.srt.server_args import ServerArgs
 from sglang.srt.utils import kill_process_tree
 from sglang.test.few_shot_gsm8k import run_eval
-from sglang.test.runners import DEFAULT_PROMPTS
 from sglang.test.test_utils import (
     DEFAULT_MODEL_NAME_FOR_TEST,
     DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -28,17 +25,16 @@ from sglang.test.test_utils import (
 class TestPPAccuracy(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        # These config helps find a leak.
-        os.environ["SGLANG_IS_IN_CI"] = "1"
         cls.base_url = "http://127.0.0.1:23333"
         cls.process = popen_launch_server(
             DEFAULT_MODEL_NAME_FOR_TEST,
             cls.base_url,
             timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
             other_args=[
+                "--tp-size",
+                2,
                 "--pp-size",
                 4,
-                "--disable-overlap-schedule",
                 "--chunked-prefill-size",
                 256,
             ],
@@ -66,49 +62,6 @@ class TestPPAccuracy(unittest.TestCase):
         time.sleep(5)
 
 
-# class TestPPAccuracyFlashInfer(unittest.TestCase):
-#     @classmethod
-#     def setUpClass(cls):
-#         # These config helps find a leak.
-#         os.environ["SGLANG_IS_IN_CI"] = "1"
-#         cls.base_url = "http://127.0.0.1:23333"
-#         cls.process = popen_launch_server(
-#             DEFAULT_MODEL_NAME_FOR_TEST,
-#             cls.base_url,
-#             timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-#             other_args=[
-#                 "--pp-size",
-#                 4,
-#                 "--disable-overlap-schedule",
-#                 "--attention-backend",
-#                 "flashinfer",
-#                 "--chunked-prefill-size",
-#                 256,
-#             ],
-#         )
-#
-#     @classmethod
-#     def tearDownClass(cls):
-#         kill_process_tree(cls.process.pid)
-#
-#     def test_gsm8k(self):
-#         args = SimpleNamespace(
-#             num_shots=5,
-#             data_path=None,
-#             num_questions=200,
-#             max_new_tokens=512,
-#             parallel=128,
-#             host="http://127.0.0.1",
-#             port=int(self.base_url.split(":")[-1]),
-#         )
-#         metrics = run_eval(args)
-#         print(f"{metrics=}")
-#
-#         self.assertGreater(metrics["accuracy"], 0.75)
-#         # Wait a little bit so that the memory check happens.
-#         time.sleep(5)
-
-
 class TestFixedBugs(unittest.TestCase):
     def test_chunked_prefill_with_small_bs(self):
         model = DEFAULT_MODEL_NAME_FOR_TEST
@@ -124,7 +77,6 @@ class TestFixedBugs(unittest.TestCase):
             2,
             "--pp-size",
             2,
-            "--disable-overlap-schedule",
             "--chunked-prefill",
             256,
             "--max-running-requests",