[Fix] use torch.inference_mode() instead of torch.no_grad() (#4372)
This commit is contained in:
@@ -101,6 +101,7 @@ from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
|
||||
from sglang.srt.utils import (
|
||||
DynamicGradMode,
|
||||
broadcast_pyobj,
|
||||
configure_logger,
|
||||
crash_on_warnings,
|
||||
@@ -487,7 +488,7 @@ class Scheduler(SchedulerOutputProcessorMixin):
|
||||
},
|
||||
)
|
||||
|
||||
@torch.no_grad()
|
||||
@DynamicGradMode()
|
||||
def event_loop_normal(self):
|
||||
"""A normal scheduler loop."""
|
||||
while True:
|
||||
@@ -507,7 +508,7 @@ class Scheduler(SchedulerOutputProcessorMixin):
|
||||
|
||||
self.last_batch = batch
|
||||
|
||||
@torch.no_grad()
|
||||
@DynamicGradMode()
|
||||
def event_loop_overlap(self):
|
||||
"""A scheduler loop that overlaps the CPU processing and GPU computation."""
|
||||
self.result_queue = deque()
|
||||
|
||||
Reference in New Issue
Block a user