Fix server launch for jupyter notebook (#186)
This commit is contained in:
@@ -464,7 +464,6 @@ def launch_server(server_args, pipe_finish_writer):
|
|||||||
assert proc_router.is_alive() and proc_detoken.is_alive()
|
assert proc_router.is_alive() and proc_detoken.is_alive()
|
||||||
|
|
||||||
def _launch_server():
|
def _launch_server():
|
||||||
# Launch api server
|
|
||||||
uvicorn.run(
|
uvicorn.run(
|
||||||
app,
|
app,
|
||||||
host=server_args.host,
|
host=server_args.host,
|
||||||
@@ -474,49 +473,54 @@ def launch_server(server_args, pipe_finish_writer):
|
|||||||
loop="uvloop",
|
loop="uvloop",
|
||||||
)
|
)
|
||||||
|
|
||||||
t = threading.Thread(target=_launch_server)
|
def _wait_and_warmup():
|
||||||
t.start()
|
url = server_args.url()
|
||||||
|
for _ in range(60):
|
||||||
|
time.sleep(1)
|
||||||
|
try:
|
||||||
|
requests.get(url + "/get_model_info", timeout=5)
|
||||||
|
break
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if pipe_finish_writer is not None:
|
||||||
|
pipe_finish_writer.send(str(e))
|
||||||
|
else:
|
||||||
|
print(e, flush=True)
|
||||||
|
return
|
||||||
|
|
||||||
url = server_args.url()
|
# Warmup
|
||||||
for _ in range(60):
|
|
||||||
time.sleep(1)
|
|
||||||
try:
|
try:
|
||||||
requests.get(url + "/get_model_info", timeout=5)
|
# print("Warmup...", flush=True)
|
||||||
break
|
res = requests.post(
|
||||||
except requests.exceptions.RequestException as e:
|
url + "/generate",
|
||||||
pass
|
json={
|
||||||
else:
|
"text": "Say this is a warmup request.",
|
||||||
if pipe_finish_writer is not None:
|
"sampling_params": {
|
||||||
pipe_finish_writer.send(str(e))
|
"temperature": 0,
|
||||||
else:
|
"max_new_tokens": 16,
|
||||||
print(e, flush=True)
|
},
|
||||||
return
|
|
||||||
|
|
||||||
# Warmup
|
|
||||||
try:
|
|
||||||
# print("Warmup...", flush=True)
|
|
||||||
res = requests.post(
|
|
||||||
url + "/generate",
|
|
||||||
json={
|
|
||||||
"text": "Say this is a warmup request.",
|
|
||||||
"sampling_params": {
|
|
||||||
"temperature": 0,
|
|
||||||
"max_new_tokens": 16,
|
|
||||||
},
|
},
|
||||||
},
|
timeout=60,
|
||||||
timeout=60,
|
)
|
||||||
)
|
# print(f"Warmup done. model response: {res.json()['text']}")
|
||||||
# print(f"Warmup done. model response: {res.json()['text']}")
|
# print("=" * 20, "Server is ready", "=" * 20, flush=True)
|
||||||
# print("=" * 20, "Server is ready", "=" * 20, flush=True)
|
except requests.exceptions.RequestException as e:
|
||||||
except requests.exceptions.RequestException as e:
|
if pipe_finish_writer is not None:
|
||||||
if pipe_finish_writer is not None:
|
pipe_finish_writer.send(str(e))
|
||||||
pipe_finish_writer.send(str(e))
|
else:
|
||||||
else:
|
print(e, flush=True)
|
||||||
print(e, flush=True)
|
return
|
||||||
return
|
|
||||||
|
|
||||||
if pipe_finish_writer is not None:
|
if pipe_finish_writer is not None:
|
||||||
pipe_finish_writer.send("init ok")
|
pipe_finish_writer.send("init ok")
|
||||||
|
|
||||||
|
t = threading.Thread(target=_wait_and_warmup)
|
||||||
|
t.start()
|
||||||
|
try:
|
||||||
|
_launch_server()
|
||||||
|
finally:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
|
||||||
class Runtime:
|
class Runtime:
|
||||||
|
|||||||
Reference in New Issue
Block a user