[PD] Optimize time out logic and add env var doc for mooncake (#6761)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -677,14 +677,15 @@ class MooncakeKVSender(BaseKVSender):
|
||||
self.kv_mgr.update_status(bootstrap_room, KVPoll.Bootstrapping)
|
||||
self.aux_index = None
|
||||
self.bootstrap_server_url = bootstrap_addr
|
||||
self.init_time = time.time()
|
||||
self.conclude_state = None
|
||||
self.init_time = None
|
||||
# inner state
|
||||
self.curr_idx = 0
|
||||
|
||||
def init(self, num_kv_indices: int, aux_index: Optional[int] = None):
|
||||
self.num_kv_indices = num_kv_indices
|
||||
self.aux_index = aux_index
|
||||
self.init_time = time.time()
|
||||
|
||||
def send(
|
||||
self,
|
||||
@@ -713,15 +714,16 @@ class MooncakeKVSender(BaseKVSender):
|
||||
if status in (KVPoll.Success, KVPoll.Failed):
|
||||
self.conclude_state = status
|
||||
elif status == KVPoll.Bootstrapping:
|
||||
now = time.time()
|
||||
elapsed = now - self.init_time
|
||||
if elapsed >= self.kv_mgr.bootstrap_time_out:
|
||||
self.kv_mgr.record_failure(
|
||||
self.bootstrap_room,
|
||||
f"Request {self.bootstrap_room} timed out after {elapsed:.1f}s in KVPoll.Bootstrapping",
|
||||
)
|
||||
self.conclude_state = KVPoll.Failed
|
||||
return KVPoll.Failed
|
||||
if self.init_time is not None:
|
||||
now = time.time()
|
||||
elapsed = now - self.init_time
|
||||
if elapsed >= self.kv_mgr.bootstrap_time_out:
|
||||
self.kv_mgr.record_failure(
|
||||
self.bootstrap_room,
|
||||
f"Request {self.bootstrap_room} timed out after {elapsed:.1f}s in KVPoll.Bootstrapping",
|
||||
)
|
||||
self.conclude_state = KVPoll.Failed
|
||||
return KVPoll.Failed
|
||||
|
||||
return status
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user