Fix env vars in bench_latency (#1472)
This commit is contained in:
@@ -64,8 +64,13 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||
from sglang.srt.sampling.sampling_params import SamplingParams
|
||||
from sglang.srt.server import _set_envs_and_config
|
||||
from sglang.srt.server_args import ServerArgs
|
||||
from sglang.srt.utils import kill_child_process, suppress_other_loggers
|
||||
from sglang.srt.utils import (
|
||||
configure_logger,
|
||||
kill_child_process,
|
||||
suppress_other_loggers,
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -341,6 +346,8 @@ def latency_test(
|
||||
bench_args,
|
||||
tp_rank,
|
||||
):
|
||||
configure_logger(server_args, prefix=f" TP{tp_rank}")
|
||||
_set_envs_and_config(server_args)
|
||||
rank_print = print if tp_rank == 0 else lambda *args, **kwargs: None
|
||||
|
||||
# Load the model
|
||||
|
||||
Reference in New Issue
Block a user