Fix env vars in bench_latency (#1472)

This commit is contained in:
Lianmin Zheng
2024-09-19 03:19:26 -07:00
committed by GitHub
parent 5ce55aee15
commit 2cd7e181dd
2 changed files with 37 additions and 1 deletions

View File

@@ -64,8 +64,13 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server import _set_envs_and_config
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process, suppress_other_loggers
from sglang.srt.utils import (
configure_logger,
kill_child_process,
suppress_other_loggers,
)
@dataclasses.dataclass
@@ -341,6 +346,8 @@ def latency_test(
bench_args,
tp_rank,
):
configure_logger(server_args, prefix=f" TP{tp_rank}")
_set_envs_and_config(server_args)
rank_print = print if tp_rank == 0 else lambda *args, **kwargs: None
# Load the model