Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988)
Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu>
This commit is contained in:
27
test/srt/test_health_check.py
Normal file
27
test/srt/test_health_check.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import unittest
|
||||
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
popen_launch_server,
|
||||
)
|
||||
|
||||
|
||||
class TestHealthCheck(unittest.TestCase):
|
||||
def test_health_check(self):
|
||||
"""Test that metrics endpoint returns data when enabled"""
|
||||
with self.assertRaises(TimeoutError):
|
||||
popen_launch_server(
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
timeout=60,
|
||||
other_args=[
|
||||
"--disable-cuda-graph",
|
||||
"--json-model-override-args",
|
||||
'{"architectures": ["LlamaForCausalLMForHealthTest"]}',
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user