diff --git a/README.md b/README.md index b21e677c4..2667000cf 100644 --- a/README.md +++ b/README.md @@ -357,9 +357,11 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port - Llama - Mistral - Mixtral +- Qwen / Qwen 2 - LLaVA - `python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000` -- Qwen / Qwen 2 +- Yi-VL + - see [srt_example_yi_vl.py](examples/quick_start/srt_example_yi_vl.py). - AWQ quantization ## Benchmark And Performance diff --git a/examples/quick_start/srt_example_yi_vl.py b/examples/quick_start/srt_example_yi_vl.py index a19336924..e4f6ef16d 100644 --- a/examples/quick_start/srt_example_yi_vl.py +++ b/examples/quick_start/srt_example_yi_vl.py @@ -46,12 +46,9 @@ def batch(): if __name__ == "__main__": - runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-6B", - tokenizer_path="BabyChou/Yi-VL-6B") + runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-6B") + # runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-34B") sgl.set_default_backend(runtime) - # Or you can use API models - # sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview")) - # sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision")) # Run a single request print("\n========== single ==========\n") @@ -65,4 +62,4 @@ if __name__ == "__main__": print("\n========== batch ==========\n") batch() - runtime.shutdown() \ No newline at end of file + runtime.shutdown()