Fix failed ci tests on long prompts; Better error messages for embedding models (#1700)

2024-10-17 09:23:29 -07:00
parent 30ee36305e
commit 7feba41584
4 changed files with 24 additions and 13 deletions
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -56,6 +56,9 @@ class GenerateReqInput:
    # LoRA related
    lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None

+    # Whether it is a single request or a batch request
+    is_single: bool = True
+
    def post_init(self):
        if (self.text is None and self.input_ids is None) or (
            self.text is not None and self.input_ids is not None
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -150,9 +150,13 @@ class TokenizerManager:
        while self.model_update_lock.locked():
            await asyncio.sleep(0.001)

+        if isinstance(obj, EmbeddingReqInput) and self.is_generation:
+            raise ValueError(
+                "This model does not appear to be an embedding model by default. Please add `--is-embedding` when launching the server or try another model."
+            )
+
        obj.post_init()
        is_single = obj.is_single
-
        if is_single:
            async for response in self._handle_single_request(obj, request):
                yield response
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -542,8 +542,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
        kill_child_process(pid, including_parent=False)
        return

-    print(f"{res.json()=}")
-
    logger.info("The server is fired up and ready to roll!")
    if pipe_finish_writer is not None:
        pipe_finish_writer.send("ready")