From 97710ccd1a0d65191a44505f0d24e8ddf30052da Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Tue, 21 Oct 2025 23:01:16 +0800 Subject: [PATCH] Fix flush cache API for spec v2 (#11918) --- python/sglang/srt/speculative/base_spec_worker.py | 5 +++++ python/sglang/srt/speculative/eagle_worker.py | 4 ++-- python/sglang/srt/speculative/eagle_worker_v2.py | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/speculative/base_spec_worker.py b/python/sglang/srt/speculative/base_spec_worker.py index c77d9b86b..aab993191 100644 --- a/python/sglang/srt/speculative/base_spec_worker.py +++ b/python/sglang/srt/speculative/base_spec_worker.py @@ -27,3 +27,8 @@ class BaseSpecWorker(ABC): @abstractmethod def draft_worker(self) -> BaseDraftWorker: pass + + @abstractmethod + def clear_cache_pool(self): + # TODO: move this abstract method to BaseTpWorker and call through self.model_runner + pass diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py index cb59b31f8..4c94cbac9 100644 --- a/python/sglang/srt/speculative/eagle_worker.py +++ b/python/sglang/srt/speculative/eagle_worker.py @@ -613,8 +613,8 @@ class EAGLEWorker(TpModelWorker): return parent_list, top_scores_index, draft_tokens def clear_cache_pool(self): - self.model_runner.req_to_token_pool.clear() - self.model_runner.token_to_kv_pool_allocator.clear() + # allocator and kv cache pool are shared with target worker + pass def verify(self, batch: ScheduleBatch, spec_info: EagleVerifyInput): spec_info.prepare_for_verify(batch, self.page_size) diff --git a/python/sglang/srt/speculative/eagle_worker_v2.py b/python/sglang/srt/speculative/eagle_worker_v2.py index 832f6b5a8..5bf48ed55 100644 --- a/python/sglang/srt/speculative/eagle_worker_v2.py +++ b/python/sglang/srt/speculative/eagle_worker_v2.py @@ -539,6 +539,10 @@ class EAGLEWorkerV2(BaseSpecWorker): def draft_worker(self): return self._draft_worker + def clear_cache_pool(self): + # allocator and kv cache pool are shared with target worker, which are cleared in scheduler + pass + def forward_batch_generation(self, model_worker_batch: ModelWorkerBatch): if model_worker_batch.forward_mode.is_decode(): draft_input: EagleDraftInput = model_worker_batch.spec_info