Cleanup readme, llava examples, usage examples and nccl init (#1194)

2024-08-24 08:02:23 -07:00
parent c9064e6fd9
commit f6af3a6561
65 changed files with 174 additions and 317 deletions
--- a/examples/frontend_language/quick_start/local_example_llava_next.py
+++ b/examples/frontend_language/quick_start/local_example_llava_next.py
@@ -0,0 +1,83 @@
+"""
+Usage: python3 local_example_llava_next.py
+"""
+
+from PIL import ImageFile
+
+import sglang as sgl
+from sglang.lang.chat_template import get_chat_template
+from sglang.srt.utils import load_image
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True  # Allow loading of truncated images
+
+
+@sgl.function
+def image_qa(s, image_path, question):
+    s += sgl.user(sgl.image(image_path) + question)
+    s += sgl.assistant(sgl.gen("answer"))
+
+
+def single():
+    state = image_qa.run(
+        image_path="images/cat.jpeg", question="What is this?", max_new_tokens=128
+    )
+    print(state["answer"], "\n")
+
+
+def stream():
+    state = image_qa.run(
+        image_path="images/cat.jpeg",
+        question="What is this?",
+        max_new_tokens=64,
+        stream=True,
+    )
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = image_qa.run_batch(
+        [
+            {"image_path": "images/cat.jpeg", "question": "What is this?"},
+            {"image_path": "images/dog.jpeg", "question": "What is this?"},
+        ],
+        max_new_tokens=128,
+    )
+    for s in states:
+        print(s["answer"], "\n")
+
+
+if __name__ == "__main__":
+    import multiprocessing as mp
+
+    mp.set_start_method("spawn", force=True)
+
+    runtime = sgl.Runtime(model_path="lmms-lab/llama3-llava-next-8b")
+    runtime.endpoint.chat_template = get_chat_template("llama-3-instruct")
+
+    # Or you can use the 72B model
+    # runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8)
+    # runtime.endpoint.chat_template = get_chat_template("chatml-llava")
+
+    sgl.set_default_backend(runtime)
+    print(f"chat template: {runtime.endpoint.chat_template.name}")
+
+    # Or you can use API models
+    # sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview"))
+    # sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
+
+    runtime.shutdown()