Simplify tokenizer manager (#2254)

This commit is contained in:
Lianmin Zheng
2024-11-29 02:18:51 -08:00
committed by GitHub
parent 8b48496aaf
commit fe97a2d40f
7 changed files with 34 additions and 103 deletions

View File

@@ -21,14 +21,13 @@ from typing import Dict, List, Optional, Tuple
import numpy as np
from sglang.api import Engine
from sglang.bench_serving import (
get_dataset,
get_tokenizer,
sample_random_requests,
set_ulimit,
)
from sglang.srt.server import Runtime, start_profile, stop_profile
from sglang.srt.server import Engine, Runtime
from sglang.srt.server_args import ServerArgs
@@ -204,12 +203,12 @@ def throughput_test_once(
st = time.perf_counter()
if profile:
start_profile()
backend.start_profile()
gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
if profile:
stop_profile()
backend.stop_profile()
monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
latency = time.perf_counter() - st