Fix failed ci tests on long prompts; Better error messages for embedding models (#1700)
This commit is contained in:
@@ -56,6 +56,9 @@ class GenerateReqInput:
|
||||
# LoRA related
|
||||
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
||||
|
||||
# Whether it is a single request or a batch request
|
||||
is_single: bool = True
|
||||
|
||||
def post_init(self):
|
||||
if (self.text is None and self.input_ids is None) or (
|
||||
self.text is not None and self.input_ids is not None
|
||||
|
||||
@@ -150,9 +150,13 @@ class TokenizerManager:
|
||||
while self.model_update_lock.locked():
|
||||
await asyncio.sleep(0.001)
|
||||
|
||||
if isinstance(obj, EmbeddingReqInput) and self.is_generation:
|
||||
raise ValueError(
|
||||
"This model does not appear to be an embedding model by default. Please add `--is-embedding` when launching the server or try another model."
|
||||
)
|
||||
|
||||
obj.post_init()
|
||||
is_single = obj.is_single
|
||||
|
||||
if is_single:
|
||||
async for response in self._handle_single_request(obj, request):
|
||||
yield response
|
||||
|
||||
@@ -542,8 +542,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
|
||||
kill_child_process(pid, including_parent=False)
|
||||
return
|
||||
|
||||
print(f"{res.json()=}")
|
||||
|
||||
logger.info("The server is fired up and ready to roll!")
|
||||
if pipe_finish_writer is not None:
|
||||
pipe_finish_writer.send("ready")
|
||||
|
||||
Reference in New Issue
Block a user