44 lines
1.1 KiB
Python
44 lines
1.1 KiB
Python
import os
|
|
import subprocess
|
|
import unittest
|
|
|
|
from sglang.srt.utils import kill_child_process
|
|
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
|
|
|
|
|
|
class TestServingLatency(unittest.TestCase):
|
|
def test_default(self):
|
|
command = [
|
|
"python3",
|
|
"-m",
|
|
"sglang.bench_latency",
|
|
"--model",
|
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
|
"--batch-size",
|
|
"1",
|
|
"--input",
|
|
"128",
|
|
"--output",
|
|
"8",
|
|
]
|
|
process = subprocess.Popen(
|
|
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
stdout, stderr = process.communicate()
|
|
output = stdout.decode()
|
|
error = stderr.decode()
|
|
print(f"Output: {output}")
|
|
print(f"Error: {error}")
|
|
|
|
lastline = output.split("\n")[-3]
|
|
value = float(lastline.split(" ")[-2])
|
|
|
|
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
|
assert value > 130
|
|
|
|
kill_child_process(process.pid)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|