Simplify Router arguments passing and build it in docker image (#9964)

This commit is contained in:
Liangsheng Yin
2025-09-05 12:13:55 +08:00
committed by GitHub
parent 0e9387a95d
commit 6e95f5e5bd
24 changed files with 1157 additions and 1587 deletions

View File

@@ -1,6 +1,5 @@
import json
import os
import subprocess
import time
import unittest
from types import SimpleNamespace
@@ -18,6 +17,7 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_pd_server,
popen_with_error_check,
)
@@ -47,7 +47,9 @@ class TestDisaggregationAccuracy(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -59,9 +61,7 @@ class TestDisaggregationAccuracy(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
@@ -228,7 +228,9 @@ class TestDisaggregationMooncakeFailure(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -240,9 +242,7 @@ class TestDisaggregationMooncakeFailure(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
@@ -383,7 +383,9 @@ class TestDisaggregationMooncakeSpec(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -395,9 +397,7 @@ class TestDisaggregationMooncakeSpec(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
@@ -509,7 +509,9 @@ class TestDisaggregationSimulatedRetract(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -521,9 +523,7 @@ class TestDisaggregationSimulatedRetract(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod

View File

@@ -15,7 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_pd_server,
run_with_timeout,
popen_with_error_check,
)
@@ -49,7 +49,9 @@ class TestDisaggregationMooncakePrefillLargerTP(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -61,9 +63,7 @@ class TestDisaggregationMooncakePrefillLargerTP(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
@@ -183,7 +183,9 @@ class TestDisaggregationMooncakeDecodeLargerTP(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
@@ -195,9 +197,7 @@ class TestDisaggregationMooncakeDecodeLargerTP(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod

View File

@@ -49,7 +49,9 @@ class TestPDPPAccuracy(unittest.TestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",