Add storage read/write bandwidth logs to monitor kvcache performance (#9965)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -20,6 +20,7 @@ from sglang.srt.mem_cache.memory_pool_host import (
|
||||
MLATokenToKVPoolHost,
|
||||
)
|
||||
from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
|
||||
from sglang.srt.metrics.collector import StorageMetricsCollector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -37,6 +38,7 @@ class HiRadixCache(RadixCache):
|
||||
hicache_write_policy: str,
|
||||
hicache_io_backend: str,
|
||||
hicache_mem_layout: str,
|
||||
enable_metrics: bool,
|
||||
hicache_storage_backend: Optional[str] = None,
|
||||
hicache_storage_prefetch_policy: Optional[str] = "best_effort",
|
||||
model_name: Optional[str] = None,
|
||||
@@ -73,6 +75,8 @@ class HiRadixCache(RadixCache):
|
||||
self.tp_group = tp_cache_group
|
||||
self.tp_world_size = torch.distributed.get_world_size(group=self.tp_group)
|
||||
self.enable_storage = hicache_storage_backend is not None
|
||||
self.enable_storage_metrics = self.enable_storage and enable_metrics
|
||||
|
||||
# todo: customizable storage prefetch threshold and timeout
|
||||
self.prefetch_threshold = 256
|
||||
self.prefetch_timeout = 3 # seconds
|
||||
@@ -92,6 +96,14 @@ class HiRadixCache(RadixCache):
|
||||
model_name=model_name,
|
||||
storage_backend_extra_config=storage_backend_extra_config,
|
||||
)
|
||||
if self.enable_storage_metrics:
|
||||
# TODO: support pp
|
||||
labels = {
|
||||
"storage_backend": hicache_storage_backend,
|
||||
"tp_rank": self.cache_controller.tp_rank,
|
||||
"dp_rank": self.cache_controller.dp_rank,
|
||||
}
|
||||
self.metrics_collector = StorageMetricsCollector(labels=labels)
|
||||
|
||||
# record the nodes with ongoing write through
|
||||
self.ongoing_write_through = {}
|
||||
@@ -379,6 +391,10 @@ class HiRadixCache(RadixCache):
|
||||
self.loading_check()
|
||||
if self.enable_storage:
|
||||
self.drain_storage_control_queues()
|
||||
if self.enable_storage_metrics:
|
||||
self.metrics_collector.log_storage_metrics(
|
||||
self.cache_controller.storage_backend.get_stats()
|
||||
)
|
||||
|
||||
def drain_storage_control_queues(self):
|
||||
"""
|
||||
@@ -414,10 +430,13 @@ class HiRadixCache(RadixCache):
|
||||
|
||||
# process backup acks
|
||||
for _ in range(n_backup):
|
||||
ack_id = cc.ack_backup_queue.get()
|
||||
operation = cc.ack_backup_queue.get()
|
||||
ack_id = operation.id
|
||||
entry = self.ongoing_backup.pop(ack_id, None)
|
||||
if entry is not None:
|
||||
entry.release_host()
|
||||
if self.enable_storage_metrics:
|
||||
self.metrics_collector.log_backuped_tokens(operation.completed_tokens)
|
||||
|
||||
# release host memory
|
||||
host_indices_list = []
|
||||
@@ -515,6 +534,11 @@ class HiRadixCache(RadixCache):
|
||||
del self.ongoing_prefetch[req_id]
|
||||
self.cache_controller.prefetch_tokens_occupied -= len(token_ids)
|
||||
|
||||
if self.enable_storage_metrics:
|
||||
self.metrics_collector.log_prefetched_tokens(
|
||||
min_completed_tokens - matched_length
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def match_prefix(self, key: List[int], **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user