Mixed style of chunked prefill (#1013)

This commit is contained in:
Liangsheng Yin
2024-08-16 02:13:00 -07:00
committed by GitHub
parent 5a261bd055
commit 3694f8f996
14 changed files with 195 additions and 59 deletions

View File

@@ -445,15 +445,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer):
print(f"Initialization failed. warmup error: {last_traceback}", flush=True)
sys.exit(1)
# Print warnings here
if server_args.disable_radix_cache and server_args.chunked_prefill_size is not None:
logger.warning(
"You set both `--disable-radix-cache` and `--chunked-prefill-size`. "
"This combination is an experimental feature and we noticed it can lead to "
"wrong generation results. If you want to use chunked prefill, it is recommended "
"not using `--disable-radix-cache`."
)
logger.info("The server is fired up and ready to roll!")
if pipe_finish_writer is not None:
pipe_finish_writer.send("init ok")