Use int64 as indices for set_kv_buffer (#3039)
This commit is contained in:
@@ -99,10 +99,7 @@ class BenchArgs:
|
||||
parser.add_argument("--correctness-test", action="store_true")
|
||||
parser.add_argument("--cut-len", type=int, default=BenchArgs.cut_len)
|
||||
parser.add_argument(
|
||||
"--profile",
|
||||
action="store_true",
|
||||
help="Use Torch Profiler. The endpoint must be launched with "
|
||||
"SGLANG_TORCH_PROFILER_DIR to enable profiler.",
|
||||
"--profile", action="store_true", help="Use Torch Profiler."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profile-filename-prefix",
|
||||
@@ -381,6 +378,7 @@ def latency_test_run_once(
|
||||
parent_dir = os.path.dirname(os.path.abspath(profile_filename))
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
profiler.export_chrome_trace(profile_filename)
|
||||
rank_print(f"torch profiler chrome trace saved to {profile_filename}")
|
||||
|
||||
# Record decode timing from 2nd output
|
||||
if output_len > 1:
|
||||
@@ -451,7 +449,7 @@ def latency_test(
|
||||
il,
|
||||
ol,
|
||||
server_args.device,
|
||||
bench_args.profile,
|
||||
bench_args.profile if tp_rank == 0 else None,
|
||||
bench_args.profile_filename_prefix,
|
||||
)
|
||||
if ret is not None:
|
||||
|
||||
Reference in New Issue
Block a user