From 06008bc295e5e83761c2e332642b783f334e8a3c Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 12 Feb 2024 04:43:14 -0800 Subject: [PATCH] Fix server launch for jupyter notebook (#186) --- python/sglang/srt/server.py | 84 +++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index e72359374..d132f5814 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -464,7 +464,6 @@ def launch_server(server_args, pipe_finish_writer): assert proc_router.is_alive() and proc_detoken.is_alive() def _launch_server(): - # Launch api server uvicorn.run( app, host=server_args.host, @@ -474,49 +473,54 @@ def launch_server(server_args, pipe_finish_writer): loop="uvloop", ) - t = threading.Thread(target=_launch_server) - t.start() + def _wait_and_warmup(): + url = server_args.url() + for _ in range(60): + time.sleep(1) + try: + requests.get(url + "/get_model_info", timeout=5) + break + except requests.exceptions.RequestException as e: + pass + else: + if pipe_finish_writer is not None: + pipe_finish_writer.send(str(e)) + else: + print(e, flush=True) + return - url = server_args.url() - for _ in range(60): - time.sleep(1) + # Warmup try: - requests.get(url + "/get_model_info", timeout=5) - break - except requests.exceptions.RequestException as e: - pass - else: - if pipe_finish_writer is not None: - pipe_finish_writer.send(str(e)) - else: - print(e, flush=True) - return - - # Warmup - try: - # print("Warmup...", flush=True) - res = requests.post( - url + "/generate", - json={ - "text": "Say this is a warmup request.", - "sampling_params": { - "temperature": 0, - "max_new_tokens": 16, + # print("Warmup...", flush=True) + res = requests.post( + url + "/generate", + json={ + "text": "Say this is a warmup request.", + "sampling_params": { + "temperature": 0, + "max_new_tokens": 16, + }, }, - }, - timeout=60, - ) - # print(f"Warmup done. model response: {res.json()['text']}") - # print("=" * 20, "Server is ready", "=" * 20, flush=True) - except requests.exceptions.RequestException as e: - if pipe_finish_writer is not None: - pipe_finish_writer.send(str(e)) - else: - print(e, flush=True) - return + timeout=60, + ) + # print(f"Warmup done. model response: {res.json()['text']}") + # print("=" * 20, "Server is ready", "=" * 20, flush=True) + except requests.exceptions.RequestException as e: + if pipe_finish_writer is not None: + pipe_finish_writer.send(str(e)) + else: + print(e, flush=True) + return - if pipe_finish_writer is not None: - pipe_finish_writer.send("init ok") + if pipe_finish_writer is not None: + pipe_finish_writer.send("init ok") + + t = threading.Thread(target=_wait_and_warmup) + t.start() + try: + _launch_server() + finally: + t.join() class Runtime: