From 969660c7626534e3b49a3edfa051a4c87905f6ab Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Thu, 22 May 2025 08:13:54 +0800 Subject: [PATCH] Recover from corrupted cache file in bench serving (#6510) --- python/sglang/bench_serving.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 9bab23b61..772e18021 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -24,6 +24,7 @@ import warnings from argparse import ArgumentParser from dataclasses import dataclass, field from datetime import datetime +from json import JSONDecodeError from pathlib import Path from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union @@ -588,7 +589,7 @@ def download_and_cache_file(url: str, filename: Optional[str] = None): filename = os.path.join("/tmp", url.split("/")[-1]) # Check if the cache file already exists - if os.path.exists(filename): + if is_file_valid_json(filename): return filename print(f"Downloading from {url} to {filename}") @@ -616,6 +617,22 @@ def download_and_cache_file(url: str, filename: Optional[str] = None): return filename +def is_file_valid_json(path): + if not os.path.isfile(path): + return False + + # TODO can fuse into the real file open later + try: + with open(path) as f: + json.load(f) + return True + except JSONDecodeError as e: + print( + f"{path} exists but json loading fails ({e=}), thus treat as invalid file" + ) + return False + + @dataclass class DatasetRow: prompt: str @@ -755,7 +772,7 @@ def sample_sharegpt_requests( raise ValueError("output_len too small") # Download sharegpt if necessary - if not os.path.isfile(dataset_path) and dataset_path == "": + if not is_file_valid_json(dataset_path) and dataset_path == "": dataset_path = download_and_cache_file(SHAREGPT_URL) # Load the dataset. @@ -853,7 +870,7 @@ def sample_random_requests( # Sample token ids from ShareGPT and repeat/truncate them to satisfy the input_lens # Download sharegpt if necessary - if not os.path.isfile(dataset_path): + if not is_file_valid_json(dataset_path): dataset_path = download_and_cache_file(SHAREGPT_URL) # Load the dataset.