[PP] Add pipeline parallelism (#5724)

This commit is contained in:
Ying Sheng
2025-04-30 18:18:07 -07:00
committed by GitHub
parent e97e57e699
commit 11383cec3c
25 changed files with 1150 additions and 308 deletions

View File

@@ -770,6 +770,34 @@ def run_bench_offline_throughput(model, other_args):
return output_throughput
def run_bench_one_batch_server(
model,
base_url,
server_args,
bench_args,
other_server_args,
simulate_spec_acc_lens=None,
):
from sglang.bench_one_batch_server import run_benchmark
if simulate_spec_acc_lens is not None:
env = {**os.environ, "SIMULATE_ACC_LEN": str(simulate_spec_acc_lens)}
else:
env = None
process = popen_launch_server(
model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_server_args,
env=env,
)
try:
run_benchmark(server_args=server_args, bench_args=bench_args)
finally:
kill_process_tree(process.pid)
def lcs(X, Y):
m = len(X)
n = len(Y)