Cleanup readme, llava examples, usage examples and nccl init (#1194)
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Usage: python3 local_example_llava_next.py
|
||||
"""
|
||||
|
||||
from PIL import ImageFile
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.srt.utils import load_image
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images
|
||||
|
||||
|
||||
@sgl.function
|
||||
def image_qa(s, image_path, question):
|
||||
s += sgl.user(sgl.image(image_path) + question)
|
||||
s += sgl.assistant(sgl.gen("answer"))
|
||||
|
||||
|
||||
def single():
|
||||
state = image_qa.run(
|
||||
image_path="images/cat.jpeg", question="What is this?", max_new_tokens=128
|
||||
)
|
||||
print(state["answer"], "\n")
|
||||
|
||||
|
||||
def stream():
|
||||
state = image_qa.run(
|
||||
image_path="images/cat.jpeg",
|
||||
question="What is this?",
|
||||
max_new_tokens=64,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for out in state.text_iter("answer"):
|
||||
print(out, end="", flush=True)
|
||||
print()
|
||||
|
||||
|
||||
def batch():
|
||||
states = image_qa.run_batch(
|
||||
[
|
||||
{"image_path": "images/cat.jpeg", "question": "What is this?"},
|
||||
{"image_path": "images/dog.jpeg", "question": "What is this?"},
|
||||
],
|
||||
max_new_tokens=128,
|
||||
)
|
||||
for s in states:
|
||||
print(s["answer"], "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import multiprocessing as mp
|
||||
|
||||
mp.set_start_method("spawn", force=True)
|
||||
|
||||
runtime = sgl.Runtime(model_path="lmms-lab/llama3-llava-next-8b")
|
||||
runtime.endpoint.chat_template = get_chat_template("llama-3-instruct")
|
||||
|
||||
# Or you can use the 72B model
|
||||
# runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8)
|
||||
# runtime.endpoint.chat_template = get_chat_template("chatml-llava")
|
||||
|
||||
sgl.set_default_backend(runtime)
|
||||
print(f"chat template: {runtime.endpoint.chat_template.name}")
|
||||
|
||||
# Or you can use API models
|
||||
# sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview"))
|
||||
# sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))
|
||||
|
||||
# Run a single request
|
||||
print("\n========== single ==========\n")
|
||||
single()
|
||||
|
||||
# Stream output
|
||||
print("\n========== stream ==========\n")
|
||||
stream()
|
||||
|
||||
# Run a batch of requests
|
||||
print("\n========== batch ==========\n")
|
||||
batch()
|
||||
|
||||
runtime.shutdown()
|
||||
Reference in New Issue
Block a user