diff --git a/python/sglang/srt/speculative/base_spec_worker.py b/python/sglang/srt/speculative/base_spec_worker.py index c77d9b86b..aab993191 100644 --- a/python/sglang/srt/speculative/base_spec_worker.py +++ b/python/sglang/srt/speculative/base_spec_worker.py @@ -27,3 +27,8 @@ class BaseSpecWorker(ABC): @abstractmethod def draft_worker(self) -> BaseDraftWorker: pass + + @abstractmethod + def clear_cache_pool(self): + # TODO: move this abstract method to BaseTpWorker and call through self.model_runner + pass diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py index cb59b31f8..4c94cbac9 100644 --- a/python/sglang/srt/speculative/eagle_worker.py +++ b/python/sglang/srt/speculative/eagle_worker.py @@ -613,8 +613,8 @@ class EAGLEWorker(TpModelWorker): return parent_list, top_scores_index, draft_tokens def clear_cache_pool(self): - self.model_runner.req_to_token_pool.clear() - self.model_runner.token_to_kv_pool_allocator.clear() + # allocator and kv cache pool are shared with target worker + pass def verify(self, batch: ScheduleBatch, spec_info: EagleVerifyInput): spec_info.prepare_for_verify(batch, self.page_size) diff --git a/python/sglang/srt/speculative/eagle_worker_v2.py b/python/sglang/srt/speculative/eagle_worker_v2.py index 832f6b5a8..5bf48ed55 100644 --- a/python/sglang/srt/speculative/eagle_worker_v2.py +++ b/python/sglang/srt/speculative/eagle_worker_v2.py @@ -539,6 +539,10 @@ class EAGLEWorkerV2(BaseSpecWorker): def draft_worker(self): return self._draft_worker + def clear_cache_pool(self): + # allocator and kv cache pool are shared with target worker, which are cleared in scheduler + pass + def forward_batch_generation(self, model_worker_batch: ModelWorkerBatch): if model_worker_batch.forward_mode.is_decode(): draft_input: EagleDraftInput = model_worker_batch.spec_info