Some warnings to crash when CI (#1009)
This commit is contained in:
@@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
@@ -285,6 +286,7 @@ class ModelTpServer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def check_memory(self):
|
def check_memory(self):
|
||||||
|
crash = os.getenv("CI", "false") == "true"
|
||||||
available_size = (
|
available_size = (
|
||||||
self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()
|
self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()
|
||||||
)
|
)
|
||||||
@@ -294,6 +296,7 @@ class ModelTpServer:
|
|||||||
f"available_size={available_size}, max_total_num_tokens={self.max_total_num_tokens}\n"
|
f"available_size={available_size}, max_total_num_tokens={self.max_total_num_tokens}\n"
|
||||||
"KV cache pool leak detected!"
|
"KV cache pool leak detected!"
|
||||||
)
|
)
|
||||||
|
exit(1) if crash else None
|
||||||
|
|
||||||
if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size:
|
if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
@@ -302,6 +305,7 @@ class ModelTpServer:
|
|||||||
f"total slots={self.req_to_token_pool.size}\n"
|
f"total slots={self.req_to_token_pool.size}\n"
|
||||||
"Memory pool leak detected!"
|
"Memory pool leak detected!"
|
||||||
)
|
)
|
||||||
|
exit(1) if crash else None
|
||||||
|
|
||||||
def handle_generate_request(
|
def handle_generate_request(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Reference in New Issue
Block a user