Re-organize CI tests (#1052)
This commit is contained in:
@@ -54,7 +54,7 @@ class BaseToolCache:
|
||||
return val
|
||||
|
||||
def init_value(self, key):
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_cache_hit_rate(self):
|
||||
if self.metrics["total"] == 0:
|
||||
|
||||
@@ -410,13 +410,16 @@ class ModelTpServer:
|
||||
|
||||
# Print stats
|
||||
if self.tp_rank == 0:
|
||||
self.tree_cache_metrics["total"] += (
|
||||
adder.log_input_tokens + adder.log_hit_tokens
|
||||
) / 10**9
|
||||
self.tree_cache_metrics["hit"] += (adder.log_hit_tokens) / 10**9
|
||||
tree_cache_hit_rate = (
|
||||
self.tree_cache_metrics["hit"] / self.tree_cache_metrics["total"]
|
||||
)
|
||||
if isinstance(self.tree_cache, RadixCache):
|
||||
self.tree_cache_metrics["total"] += (
|
||||
adder.log_input_tokens + adder.log_hit_tokens
|
||||
) / 10**9
|
||||
self.tree_cache_metrics["hit"] += (adder.log_hit_tokens) / 10**9
|
||||
tree_cache_hit_rate = (
|
||||
self.tree_cache_metrics["hit"] / self.tree_cache_metrics["total"]
|
||||
)
|
||||
else:
|
||||
tree_cache_hit_rate = 0.0
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Prefill batch. "
|
||||
f"#new-seq: {len(can_run_list)}, "
|
||||
|
||||
@@ -68,7 +68,7 @@ class ChunkCache(BasePrefixCache):
|
||||
req.last_node = entry
|
||||
|
||||
def insert(self):
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
|
||||
def evict(self, num_tokens: int, evict_callback: Callable):
|
||||
pass
|
||||
|
||||
@@ -447,6 +447,15 @@ def _wait_and_warmup(server_args, pipe_finish_writer):
|
||||
print(f"Initialization failed. warmup error: {last_traceback}", flush=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Print warnings here
|
||||
if server_args.disable_radix_cache and server_args.chunked_prefill_size is not None:
|
||||
logger.warning(
|
||||
"You set both `--disable-radix-cache` and `--chunked-prefill-size`. "
|
||||
"This combination is an experimental feature and we noticed it can lead to "
|
||||
"wrong generation results. If you want to use chunked prefill, it is recommended "
|
||||
"not using `--disable-radix-cache`."
|
||||
)
|
||||
|
||||
logger.info("The server is fired up and ready to roll!")
|
||||
if pipe_finish_writer is not None:
|
||||
pipe_finish_writer.send("init ok")
|
||||
|
||||
Reference in New Issue
Block a user