From f88e70853e7fecce825c4a864dce44a729ddf5d9 Mon Sep 17 00:00:00 2001 From: Shangming Cai Date: Fri, 20 Jun 2025 02:26:53 +0800 Subject: [PATCH] [Bugfix][PD] Set conclude state before clear when failure happens (#7362) Signed-off-by: Shangming Cai --- python/sglang/srt/disaggregation/mooncake/conn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py index 8f9d527dd..29e861e9f 100644 --- a/python/sglang/srt/disaggregation/mooncake/conn.py +++ b/python/sglang/srt/disaggregation/mooncake/conn.py @@ -742,12 +742,12 @@ class MooncakeKVSender(BaseKVSender): self.kv_mgr.request_status.pop(self.bootstrap_room) def failure_exception(self): - self.clear() - # Explicitly set the status to failure since this request has failed in another rank if self.conclude_state is None: self.conclude_state = KVPoll.Failed + self.clear() + with self.kv_mgr.failure_lock: failure_reason = self.kv_mgr.failure_records.pop( self.bootstrap_room, "Failed due to an unknown reason from another rank" @@ -1003,12 +1003,12 @@ class MooncakeKVReceiver(BaseKVReceiver): self.kv_mgr.request_status.pop(self.bootstrap_room) def failure_exception(self): - self.clear() - # Explicitly set the status to failure since this request has failed in another rank if self.conclude_state is None: self.conclude_state = KVPoll.Failed + self.clear() + with self.kv_mgr.failure_lock: failure_reason = self.kv_mgr.failure_records.pop( self.bootstrap_room, "Failed due to an unknown reason from another rank"