clean up step function (#635)
This commit is contained in:
@@ -228,6 +228,18 @@ class ModelTpServer:
|
|||||||
|
|
||||||
# Print stats
|
# Print stats
|
||||||
if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0:
|
if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0:
|
||||||
|
self.print_stats()
|
||||||
|
|
||||||
|
if self.running_batch.is_empty():
|
||||||
|
self.running_batch = None
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.out_pyobjs and self.running_batch.has_stream():
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.check_memory()
|
||||||
|
|
||||||
|
def print_stats(self):
|
||||||
num_used = self.max_total_num_tokens - (
|
num_used = self.max_total_num_tokens - (
|
||||||
self.token_to_kv_pool.available_size()
|
self.token_to_kv_pool.available_size()
|
||||||
+ self.tree_cache.evictable_size()
|
+ self.tree_cache.evictable_size()
|
||||||
@@ -246,14 +258,7 @@ class ModelTpServer:
|
|||||||
f"#queue-req: {len(self.forward_queue)}"
|
f"#queue-req: {len(self.forward_queue)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.running_batch.is_empty():
|
def check_memory(self):
|
||||||
self.running_batch = None
|
|
||||||
break
|
|
||||||
|
|
||||||
if self.out_pyobjs and self.running_batch.has_stream():
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Check the available size
|
|
||||||
available_size = (
|
available_size = (
|
||||||
self.token_to_kv_pool.available_size()
|
self.token_to_kv_pool.available_size()
|
||||||
+ self.tree_cache.evictable_size()
|
+ self.tree_cache.evictable_size()
|
||||||
|
|||||||
Reference in New Issue
Block a user