Cleanup readme, llava examples, usage examples and nccl init (#1194)
This commit is contained in:
|
Before Width: | Height: | Size: 337 KiB After Width: | Height: | Size: 337 KiB |
|
Before Width: | Height: | Size: 407 KiB After Width: | Height: | Size: 407 KiB |
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
Usage:
|
||||
python3 srt_example_chat.py
|
||||
python3 local_example_chat.py
|
||||
"""
|
||||
|
||||
import sglang as sgl
|
||||
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
Usage:
|
||||
python3 srt_example_complete.py
|
||||
python3 local_example_complete.py
|
||||
"""
|
||||
|
||||
import sglang as sgl
|
||||
@@ -1,8 +1,14 @@
|
||||
"""
|
||||
Usage: python3 srt_example_llava.py
|
||||
Usage: python3 local_example_llava_next.py
|
||||
"""
|
||||
|
||||
from PIL import ImageFile
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.srt.utils import load_image
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images
|
||||
|
||||
|
||||
@sgl.function
|
||||
@@ -44,10 +50,17 @@ def batch():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runtime = sgl.Runtime(
|
||||
model_path="liuhaotian/llava-v1.6-vicuna-7b",
|
||||
tokenizer_path="llava-hf/llava-1.5-7b-hf",
|
||||
)
|
||||
import multiprocessing as mp
|
||||
|
||||
mp.set_start_method("spawn", force=True)
|
||||
|
||||
runtime = sgl.Runtime(model_path="lmms-lab/llama3-llava-next-8b")
|
||||
runtime.endpoint.chat_template = get_chat_template("llama-3-instruct")
|
||||
|
||||
# Or you can use the 72B model
|
||||
# runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8)
|
||||
# runtime.endpoint.chat_template = get_chat_template("chatml-llava")
|
||||
|
||||
sgl.set_default_backend(runtime)
|
||||
print(f"chat template: {runtime.endpoint.chat_template.name}")
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Usage:
|
||||
pip install opencv-python-headless
|
||||
python3 srt_example_llava.py
|
||||
|
||||
python3 srt_example_llava_v.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -9,6 +10,8 @@ import csv
|
||||
import os
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
"""
|
||||
Usage: python3 srt_example_yi_vl.py
|
||||
|
||||
Requirements: transformers==4.38
|
||||
"""
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
|
||||
@sgl.function
|
||||
def image_qa(s, image_path, question):
|
||||
s += sgl.user(sgl.image(image_path) + question)
|
||||
s += sgl.assistant(sgl.gen("answer"))
|
||||
|
||||
|
||||
def single():
|
||||
state = image_qa.run(
|
||||
image_path="images/cat.jpeg",
|
||||
question="What is this?",
|
||||
max_new_tokens=64,
|
||||
stop="###",
|
||||
)
|
||||
print(state["answer"], "\n")
|
||||
|
||||
|
||||
def stream():
|
||||
state = image_qa.run(
|
||||
image_path="images/cat.jpeg",
|
||||
question="What is this?",
|
||||
max_new_tokens=64,
|
||||
stream=True,
|
||||
stop="###",
|
||||
)
|
||||
|
||||
for out in state.text_iter("answer"):
|
||||
print(out, end="", flush=True)
|
||||
print()
|
||||
|
||||
|
||||
def batch():
|
||||
states = image_qa.run_batch(
|
||||
[
|
||||
{"image_path": "images/cat.jpeg", "question": "What is this?"},
|
||||
{"image_path": "images/dog.jpeg", "question": "What is this?"},
|
||||
],
|
||||
max_new_tokens=64,
|
||||
stop="###",
|
||||
)
|
||||
for s in states:
|
||||
print(s["answer"], "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-6B")
|
||||
# runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-34B")
|
||||
sgl.set_default_backend(runtime)
|
||||
|
||||
# Run a single request
|
||||
print("\n========== single ==========\n")
|
||||
single()
|
||||
|
||||
# Stream output
|
||||
print("\n========== stream ==========\n")
|
||||
stream()
|
||||
|
||||
# Run a batch of requests
|
||||
print("\n========== batch ==========\n")
|
||||
batch()
|
||||
|
||||
runtime.shutdown()
|
||||
@@ -4,7 +4,7 @@ Usage:
|
||||
# Installing latest sglang.
|
||||
|
||||
# Endpoint Service CLI:
|
||||
# python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --tokenizer-path lmms-lab/llama3-llava-next-8b-tokenizer --port=30000 --host="127.0.0.1" --tp-size=4
|
||||
python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000
|
||||
|
||||
python3 http_llama3_llava_test.py
|
||||
|
||||
@@ -16,7 +16,6 @@ import argparse
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
@@ -1,3 +1,11 @@
|
||||
"""
|
||||
Usage:
|
||||
|
||||
python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384
|
||||
|
||||
python3 http_llava_onevision_test.py
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
@@ -74,7 +82,6 @@ def video_stream_request_test(client, video_path):
|
||||
print("------------------------Video Stream Request Test----------------------")
|
||||
messages = prepare_video_messages(video_path)
|
||||
|
||||
start_time = time.time()
|
||||
video_request = client.chat.completions.create(
|
||||
model="default",
|
||||
messages=messages,
|
||||
@@ -4,7 +4,7 @@ Usage:
|
||||
# Installing latest sglang.
|
||||
|
||||
# Endpoint Service CLI:
|
||||
# python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --tokenizer-path lmms-lab/llavanext-qwen-tokenizer --port=30000 --host="127.0.0.1" --tp-size=4
|
||||
python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --tp-size=8
|
||||
|
||||
python3 http_qwen_llava_test.py
|
||||
|
||||
@@ -16,7 +16,6 @@ import argparse
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
@@ -1,90 +0,0 @@
|
||||
"""
|
||||
Usage: python3 srt_example_llava.py
|
||||
"""
|
||||
|
||||
from PIL import ImageFile
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.srt.utils import load_image
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images
|
||||
|
||||
|
||||
@sgl.function
|
||||
def image_qa(s, image, question):
|
||||
s += sgl.user(sgl.image(image) + question)
|
||||
s += sgl.assistant(sgl.gen("answer"))
|
||||
|
||||
|
||||
def single():
|
||||
image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg"
|
||||
pil_image, _ = load_image(image_url)
|
||||
state = image_qa.run(image=pil_image, question="What is this?", max_new_tokens=512)
|
||||
print(state["answer"], "\n")
|
||||
|
||||
|
||||
def stream():
|
||||
image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg"
|
||||
pil_image, _ = load_image(image_url)
|
||||
state = image_qa.run(
|
||||
image=pil_image,
|
||||
question="Please generate short caption for this image.",
|
||||
max_new_tokens=512,
|
||||
temperature=0,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for out in state.text_iter("answer"):
|
||||
print(out, end="", flush=True)
|
||||
print()
|
||||
|
||||
|
||||
def batch():
|
||||
image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg"
|
||||
pil_image, _ = load_image(image_url)
|
||||
states = image_qa.run_batch(
|
||||
[
|
||||
{"image": pil_image, "question": "What is this?"},
|
||||
{"image": pil_image, "question": "What is this?"},
|
||||
],
|
||||
max_new_tokens=512,
|
||||
)
|
||||
for s in states:
|
||||
print(s["answer"], "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import multiprocessing as mp
|
||||
|
||||
mp.set_start_method("spawn", force=True)
|
||||
runtime = sgl.Runtime(
|
||||
model_path="lmms-lab/llama3-llava-next-8b",
|
||||
tokenizer_path="lmms-lab/llama3-llava-next-8b-tokenizer",
|
||||
)
|
||||
runtime.endpoint.chat_template = get_chat_template("llama-3-instruct")
|
||||
# runtime = sgl.Runtime(
|
||||
# model_path="lmms-lab/llava-next-72b",
|
||||
# tokenizer_path="lmms-lab/llavanext-qwen-tokenizer",
|
||||
# )
|
||||
# runtime.endpoint.chat_template = get_chat_template("chatml-llava")
|
||||
sgl.set_default_backend(runtime)
|
||||
print(f"chat template: {runtime.endpoint.chat_template.name}")
|
||||
|
||||
# Or you can use API models
|
||||
# sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview"))
|
||||
# sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))
|
||||
|
||||
# Run a single request
|
||||
print("\n========== single ==========\n")
|
||||
single()
|
||||
|
||||
# Stream output
|
||||
print("\n========== stream ==========\n")
|
||||
stream()
|
||||
|
||||
# Run a batch of requests
|
||||
print("\n========== batch ==========\n")
|
||||
batch()
|
||||
|
||||
runtime.shutdown()
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 132 KiB |
Reference in New Issue
Block a user