support non-streaming benchmark (#682)

This commit is contained in:
Lianmin Zheng
2024-07-20 18:36:42 -07:00
committed by GitHub
parent caaad53b52
commit 77e592e8e0
3 changed files with 16 additions and 4 deletions

View File

@@ -28,11 +28,16 @@ class ScheduleHeuristic:
# longest prefix match
forward_queue.sort(key=lambda x: -len(x.prefix_indices))
return forward_queue
elif self.schedule_heuristic == "fcfs":
# first come first serve
return forward_queue
elif self.schedule_heuristic == "lof":
# longest output first
forward_queue.sort(key=lambda x: -x.sampling_params.max_new_tokens)
return forward_queue
elif self.schedule_heuristic == "random":
random.shuffle(forward_queue)
return forward_queue
elif self.schedule_heuristic == "fcfs":
return forward_queue
elif self.schedule_heuristic == "dfs-weight":
last_node_to_reqs = defaultdict(list)
for req in forward_queue: