fix(hicahce-long-bench): adjust context workload generator to use full query set (#9847)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -31,9 +31,10 @@ class ContextWorkloadGenerator(WorkloadGenerator):
|
||||
self.completed_requests = 0
|
||||
|
||||
self.dataset = json.load(open(args.dataset_path))
|
||||
num_requests = min(args.num_clients, len(self.dataset["queries"]))
|
||||
|
||||
init_requests = []
|
||||
for i in range(min(args.num_clients, len(self.dataset["queries"]))):
|
||||
for i in range(num_requests):
|
||||
context_id = self.dataset["queries"][i]["context"]
|
||||
init_requests.append(
|
||||
(
|
||||
@@ -52,13 +53,14 @@ class ContextWorkloadGenerator(WorkloadGenerator):
|
||||
self.ready_queue = ReadyQueue(init_requests=init_requests)
|
||||
|
||||
self.response_queue = queue.Queue()
|
||||
self.pbar = tqdm(total=args.num_clients * args.num_rounds)
|
||||
self.pbar = tqdm(total=num_requests)
|
||||
self.performance_metrics = {
|
||||
"ttft": [],
|
||||
"latency": [],
|
||||
"itl": [],
|
||||
"prompt_len": [],
|
||||
"cached_tokens": [],
|
||||
"generated_len": [],
|
||||
}
|
||||
|
||||
self.max_parallel = args.max_parallel
|
||||
@@ -75,6 +77,9 @@ class ContextWorkloadGenerator(WorkloadGenerator):
|
||||
self.performance_metrics["ttft"].append(response.ttft)
|
||||
self.performance_metrics["itl"].extend(response.itl)
|
||||
self.performance_metrics["latency"].append(response.latency)
|
||||
self.performance_metrics["prompt_len"].append(response.prompt_len)
|
||||
self.performance_metrics["cached_tokens"].append(response.cached_tokens)
|
||||
self.performance_metrics["generated_len"].append(response.generated_len)
|
||||
self.completed_requests += 1
|
||||
|
||||
except queue.Empty:
|
||||
@@ -85,7 +90,7 @@ class ContextWorkloadGenerator(WorkloadGenerator):
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
args.num_rounds = 1
|
||||
args.max_parallel = 128
|
||||
args.max_parallel = 24
|
||||
flush_cache_url = f"http://{args.host}:{args.port}/flush_cache"
|
||||
|
||||
for request_rate in [24, 16, 12, 8, 4, 2, 1]:
|
||||
|
||||
Reference in New Issue
Block a user