Support random dataset in bench_serving.py (#669)

This commit is contained in:
Lianmin Zheng
2024-07-20 01:06:43 -07:00
committed by GitHub
parent 8f4b1559e7
commit 35759efa91
4 changed files with 82 additions and 15 deletions

View File

@@ -233,7 +233,7 @@ class ModelRunner:
return
logger.info(f"[gpu_id={self.gpu_id}] Capture cuda graph begin.")
batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 16)]
batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 17)]
self.cuda_graph_runner = CudaGraphRunner(
self, max_batch_size_to_capture=max(batch_size_list)
)

View File

@@ -40,7 +40,7 @@ class GenerateReqInput:
self.text is not None and self.input_ids is not None
):
raise ValueError("Either text or input_ids should be provided.")
if "n" in self.sampling_params and self.sampling_params["n"] != 1:
if self.sampling_params.get("n", 1) != 1:
is_single = False
else:
if self.text is not None:

View File

@@ -196,14 +196,14 @@ class TokenizerManager:
event = asyncio.Event()
state = ReqState([], False, event)
self.rid_to_state[rid] = state
if is_prefill == False:
if is_prefill:
await self._wait_for_prefill_response(event, state, obj, request, rid)
yield input_ids
else:
async for response in self._wait_for_response(
event, state, obj, rid, request
):
yield response
else:
await self._wait_for_prefill_response(event, state, obj, request, rid)
yield input_ids
async def _handle_batch_request(self, obj, request):
batch_size = obj.batch_size