Mixed style of chunked prefill (#1013)
This commit is contained in:
@@ -445,15 +445,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer):
|
||||
print(f"Initialization failed. warmup error: {last_traceback}", flush=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Print warnings here
|
||||
if server_args.disable_radix_cache and server_args.chunked_prefill_size is not None:
|
||||
logger.warning(
|
||||
"You set both `--disable-radix-cache` and `--chunked-prefill-size`. "
|
||||
"This combination is an experimental feature and we noticed it can lead to "
|
||||
"wrong generation results. If you want to use chunked prefill, it is recommended "
|
||||
"not using `--disable-radix-cache`."
|
||||
)
|
||||
|
||||
logger.info("The server is fired up and ready to roll!")
|
||||
if pipe_finish_writer is not None:
|
||||
pipe_finish_writer.send("init ok")
|
||||
|
||||
Reference in New Issue
Block a user