diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 14da84e42..8cab1b69f 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -100,6 +100,7 @@ def freeze_gc(enable_cudagraph_gc: bool): finally: if should_freeze: gc.unfreeze() + gc.collect() def _to_torch(model: torch.nn.Module, reverse: bool, num_tokens: int):