Fix data parallel perf regression (#6183)

This commit is contained in:
Lianmin Zheng
2025-05-10 19:18:35 -07:00
committed by GitHub
parent 03dd785cd0
commit 4319978c73
2 changed files with 4 additions and 7 deletions

View File

@@ -22,11 +22,6 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
bash scripts/ci_install_dependency.sh
@@ -35,6 +30,8 @@ jobs:
apt-get install -y pandoc
apt-get update && apt-get install -y parallel retry
ln -sf "$(which python3)" /usr/bin/python
- name: Setup Jupyter Kernel
run: |
python -m ipykernel install --user --name python3 --display-name "Python 3"

View File

@@ -17,13 +17,13 @@ import logging
import multiprocessing as mp
import signal
import threading
import time
from enum import Enum, auto
import psutil
import setproctitle
import zmq
from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
from sglang.srt.managers.io_struct import (
TokenizedEmbeddingReqInput,
@@ -158,7 +158,7 @@ class DataParallelController:
# This thread cannot be closed because otherwise the `kill_itself_when_parent_died`
# function in scheduler.py will kill the scheduler.
while True:
pass
time.sleep(30 * 24 * 3600)
def launch_dp_attention_schedulers(self, server_args, port_args):
self.launch_tensor_parallel_group(server_args, port_args, 0, None)