Warmup cublas (#566)

This commit is contained in:
Lianmin Zheng
2024-06-25 12:46:00 -07:00
committed by GitHub
parent eb1ae6ae0c
commit a385ee27bd
6 changed files with 17 additions and 4 deletions

View File

@@ -410,7 +410,7 @@ class ModelTpServer:
self.tree_cache_metrics["hit"] / self.tree_cache_metrics["total"]
)
logger.info(
f"[gpu_id={self.gpu_id}] Prefil batch. "
f"[gpu_id={self.gpu_id}] Prefill batch. "
f"#new-seq: {len(can_run_list)}, "
f"#new-token: {new_batch_input_tokens}, "
f"#cached-token: {hit_tokens}, "