Support BNB quantization for llama/mllama (#5038)

Co-authored-by: Yuhao Yang <yyh073@foxmail.com>
This commit is contained in:
ryang
2025-04-16 09:00:31 +08:00
committed by GitHub
parent 3efc8e2d2a
commit bc24205b32
3 changed files with 60 additions and 11 deletions

View File

@@ -1,7 +1,7 @@
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
python3 -m unittest test_bnb.TestVisionModel.test_vlm
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
"""
import base64
@@ -31,10 +31,13 @@ from sglang.test.test_utils import (
VISION_MODELS = [
("unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
("unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", "llama_3_vision"),
("unsloth/Llama-3.2-11B-Vision-bnb-4bit", "llama_3_vision"),
]
LANGUAGE_MODELS = [
"unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
"unsloth/Qwen2-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
]
# image