Simplify tokenizer manager (#2254)

2024-11-29 02:18:51 -08:00
parent 8b48496aaf
commit fe97a2d40f
7 changed files with 34 additions and 103 deletions
--- a/python/sglang/bench_offline_throughput.py
+++ b/python/sglang/bench_offline_throughput.py
@@ -21,14 +21,13 @@ from typing import Dict, List, Optional, Tuple

 import numpy as np

-from sglang.api import Engine
 from sglang.bench_serving import (
    get_dataset,
    get_tokenizer,
    sample_random_requests,
    set_ulimit,
 )
-from sglang.srt.server import Runtime, start_profile, stop_profile
+from sglang.srt.server import Engine, Runtime
 from sglang.srt.server_args import ServerArgs


@@ -204,12 +203,12 @@ def throughput_test_once(

    st = time.perf_counter()
    if profile:
-        start_profile()
+        backend.start_profile()

    gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)

    if profile:
-        stop_profile()
+        backend.stop_profile()
        monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))

    latency = time.perf_counter() - st