Support random dataset in bench_serving.py (#669)
This commit is contained in:
@@ -233,7 +233,7 @@ class ModelRunner:
|
||||
return
|
||||
|
||||
logger.info(f"[gpu_id={self.gpu_id}] Capture cuda graph begin.")
|
||||
batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 16)]
|
||||
batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 17)]
|
||||
self.cuda_graph_runner = CudaGraphRunner(
|
||||
self, max_batch_size_to_capture=max(batch_size_list)
|
||||
)
|
||||
|
||||
@@ -40,7 +40,7 @@ class GenerateReqInput:
|
||||
self.text is not None and self.input_ids is not None
|
||||
):
|
||||
raise ValueError("Either text or input_ids should be provided.")
|
||||
if "n" in self.sampling_params and self.sampling_params["n"] != 1:
|
||||
if self.sampling_params.get("n", 1) != 1:
|
||||
is_single = False
|
||||
else:
|
||||
if self.text is not None:
|
||||
|
||||
@@ -196,14 +196,14 @@ class TokenizerManager:
|
||||
event = asyncio.Event()
|
||||
state = ReqState([], False, event)
|
||||
self.rid_to_state[rid] = state
|
||||
if is_prefill == False:
|
||||
if is_prefill:
|
||||
await self._wait_for_prefill_response(event, state, obj, request, rid)
|
||||
yield input_ids
|
||||
else:
|
||||
async for response in self._wait_for_response(
|
||||
event, state, obj, rid, request
|
||||
):
|
||||
yield response
|
||||
else:
|
||||
await self._wait_for_prefill_response(event, state, obj, request, rid)
|
||||
yield input_ids
|
||||
|
||||
async def _handle_batch_request(self, obj, request):
|
||||
batch_size = obj.batch_size
|
||||
|
||||
Reference in New Issue
Block a user