clean up step function (#635)
This commit is contained in:
@@ -228,6 +228,18 @@ class ModelTpServer:
|
||||
|
||||
# Print stats
|
||||
if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0:
|
||||
self.print_stats()
|
||||
|
||||
if self.running_batch.is_empty():
|
||||
self.running_batch = None
|
||||
break
|
||||
|
||||
if self.out_pyobjs and self.running_batch.has_stream():
|
||||
break
|
||||
else:
|
||||
self.check_memory()
|
||||
|
||||
def print_stats(self):
|
||||
num_used = self.max_total_num_tokens - (
|
||||
self.token_to_kv_pool.available_size()
|
||||
+ self.tree_cache.evictable_size()
|
||||
@@ -246,14 +258,7 @@ class ModelTpServer:
|
||||
f"#queue-req: {len(self.forward_queue)}"
|
||||
)
|
||||
|
||||
if self.running_batch.is_empty():
|
||||
self.running_batch = None
|
||||
break
|
||||
|
||||
if self.out_pyobjs and self.running_batch.has_stream():
|
||||
break
|
||||
else:
|
||||
# Check the available size
|
||||
def check_memory(self):
|
||||
available_size = (
|
||||
self.token_to_kv_pool.available_size()
|
||||
+ self.tree_cache.evictable_size()
|
||||
|
||||
Reference in New Issue
Block a user