[Bugfix][PD] Set conclude state before clear when failure happens (#7362)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -742,12 +742,12 @@ class MooncakeKVSender(BaseKVSender):
|
||||
self.kv_mgr.request_status.pop(self.bootstrap_room)
|
||||
|
||||
def failure_exception(self):
|
||||
self.clear()
|
||||
|
||||
# Explicitly set the status to failure since this request has failed in another rank
|
||||
if self.conclude_state is None:
|
||||
self.conclude_state = KVPoll.Failed
|
||||
|
||||
self.clear()
|
||||
|
||||
with self.kv_mgr.failure_lock:
|
||||
failure_reason = self.kv_mgr.failure_records.pop(
|
||||
self.bootstrap_room, "Failed due to an unknown reason from another rank"
|
||||
@@ -1003,12 +1003,12 @@ class MooncakeKVReceiver(BaseKVReceiver):
|
||||
self.kv_mgr.request_status.pop(self.bootstrap_room)
|
||||
|
||||
def failure_exception(self):
|
||||
self.clear()
|
||||
|
||||
# Explicitly set the status to failure since this request has failed in another rank
|
||||
if self.conclude_state is None:
|
||||
self.conclude_state = KVPoll.Failed
|
||||
|
||||
self.clear()
|
||||
|
||||
with self.kv_mgr.failure_lock:
|
||||
failure_reason = self.kv_mgr.failure_records.pop(
|
||||
self.bootstrap_room, "Failed due to an unknown reason from another rank"
|
||||
|
||||
Reference in New Issue
Block a user