Support setting use_thread in the run_program for easier debugging. (#1823)

Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Yanyi Liu
2024-10-29 14:51:47 +08:00
committed by GitHub
parent 680cad2023
commit 5e6c32657e
2 changed files with 19 additions and 2 deletions

View File

@@ -54,7 +54,14 @@ def run_internal(state, program, func_args, func_kwargs, sync):
def run_program(
program, backend, func_args, func_kwargs, default_sampling_para, stream, sync=False
program,
backend,
func_args,
func_kwargs,
default_sampling_para,
stream,
sync=False,
use_thread=True,
):
if hasattr(backend, "endpoint"):
backend = backend.endpoint
@@ -67,6 +74,7 @@ def run_program(
chat_template=None,
stream=stream,
num_api_spec_tokens=program.num_api_spec_tokens,
use_thread=use_thread,
)
state = ProgramState(stream_executor)

View File

@@ -168,6 +168,7 @@ class SglFunction:
return_text_in_logprobs: Optional[bool] = None,
stream: bool = False,
backend=None,
use_thread: bool = True,
**kwargs,
):
from sglang.lang.interpreter import run_program
@@ -195,7 +196,15 @@ class SglFunction:
return_text_in_logprobs=return_text_in_logprobs,
)
backend = backend or global_config.default_backend
return run_program(self, backend, args, kwargs, default_sampling_para, stream)
return run_program(
self,
backend,
args,
kwargs,
default_sampling_para,
stream,
use_thread=use_thread,
)
def run_batch(
self,