Implement Siglip Vision model, and support BNB quantization for gemma3-mm (#5339)

This commit is contained in:
Jiajun Li
2025-05-20 23:53:46 -07:00
committed by GitHub
parent 5c0b38f369
commit 4024e1d2a8
4 changed files with 353 additions and 29 deletions

View File

@@ -33,11 +33,14 @@ VISION_MODELS = [
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-bnb-4bit",
"unsloth/gemma-3-4b-it-bnb-4bit",
"unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
]
LANGUAGE_MODELS = [
"unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
"unsloth/Qwen2-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
"unsloth/gemma-3-1b-it-bnb-4bit",
]
# image
@@ -256,6 +259,7 @@ class TestVisionModel(CustomTestCase):
"0.6",
"--load-format",
"bitsandbytes",
"--enable-multimodal",
]
try:
process = popen_launch_server_wrapper(