[Fix] use torch.inference_mode() instead of torch.no_grad() (#4372)

2025-03-17 13:54:16 +08:00
parent 8cc300f536
commit 0212d2e288
4 changed files with 120 additions and 4 deletions
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -101,6 +101,7 @@ from sglang.srt.server_args import PortArgs, ServerArgs
 from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
 from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
 from sglang.srt.utils import (
+    DynamicGradMode,
    broadcast_pyobj,
    configure_logger,
    crash_on_warnings,
@@ -487,7 +488,7 @@ class Scheduler(SchedulerOutputProcessorMixin):
                },
            )

-    @torch.no_grad()
+    @DynamicGradMode()
    def event_loop_normal(self):
        """A normal scheduler loop."""
        while True:
@@ -507,7 +508,7 @@ class Scheduler(SchedulerOutputProcessorMixin):

            self.last_batch = batch

-    @torch.no_grad()
+    @DynamicGradMode()
    def event_loop_overlap(self):
        """A scheduler loop that overlaps the CPU processing and GPU computation."""
        self.result_queue = deque()