[Fix] use torch.inference_mode() instead of torch.no_grad() (#4372)

This commit is contained in:
JieXin Liang
2025-03-17 13:54:16 +08:00
committed by GitHub
parent 8cc300f536
commit 0212d2e288
4 changed files with 120 additions and 4 deletions

View File

@@ -101,6 +101,7 @@ from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
from sglang.srt.utils import (
DynamicGradMode,
broadcast_pyobj,
configure_logger,
crash_on_warnings,
@@ -487,7 +488,7 @@ class Scheduler(SchedulerOutputProcessorMixin):
},
)
@torch.no_grad()
@DynamicGradMode()
def event_loop_normal(self):
"""A normal scheduler loop."""
while True:
@@ -507,7 +508,7 @@ class Scheduler(SchedulerOutputProcessorMixin):
self.last_batch = batch
@torch.no_grad()
@DynamicGradMode()
def event_loop_overlap(self):
"""A scheduler loop that overlaps the CPU processing and GPU computation."""
self.result_queue = deque()