Re-organize CI tests (#1052)

This commit is contained in:
Lianmin Zheng
2024-08-12 03:39:01 -07:00
committed by GitHub
parent 0c1c72a0b4
commit c877292cc1
9 changed files with 148 additions and 22 deletions

View File

@@ -54,7 +54,7 @@ class BaseToolCache:
return val
def init_value(self, key):
raise NotImplementedError
raise NotImplementedError()
def get_cache_hit_rate(self):
if self.metrics["total"] == 0:

View File

@@ -410,13 +410,16 @@ class ModelTpServer:
# Print stats
if self.tp_rank == 0:
self.tree_cache_metrics["total"] += (
adder.log_input_tokens + adder.log_hit_tokens
) / 10**9
self.tree_cache_metrics["hit"] += (adder.log_hit_tokens) / 10**9
tree_cache_hit_rate = (
self.tree_cache_metrics["hit"] / self.tree_cache_metrics["total"]
)
if isinstance(self.tree_cache, RadixCache):
self.tree_cache_metrics["total"] += (
adder.log_input_tokens + adder.log_hit_tokens
) / 10**9
self.tree_cache_metrics["hit"] += (adder.log_hit_tokens) / 10**9
tree_cache_hit_rate = (
self.tree_cache_metrics["hit"] / self.tree_cache_metrics["total"]
)
else:
tree_cache_hit_rate = 0.0
logger.info(
f"[gpu={self.gpu_id}] Prefill batch. "
f"#new-seq: {len(can_run_list)}, "

View File

@@ -68,7 +68,7 @@ class ChunkCache(BasePrefixCache):
req.last_node = entry
def insert(self):
raise NotImplementedError
raise NotImplementedError()
def evict(self, num_tokens: int, evict_callback: Callable):
pass

View File

@@ -447,6 +447,15 @@ def _wait_and_warmup(server_args, pipe_finish_writer):
print(f"Initialization failed. warmup error: {last_traceback}", flush=True)
sys.exit(1)
# Print warnings here
if server_args.disable_radix_cache and server_args.chunked_prefill_size is not None:
logger.warning(
"You set both `--disable-radix-cache` and `--chunked-prefill-size`. "
"This combination is an experimental feature and we noticed it can lead to "
"wrong generation results. If you want to use chunked prefill, it is recommended "
"not using `--disable-radix-cache`."
)
logger.info("The server is fired up and ready to roll!")
if pipe_finish_writer is not None:
pipe_finish_writer.send("init ok")