Support BNB quantization for llama/mllama (#5038)

Co-authored-by: Yuhao Yang <yyh073@foxmail.com>
2025-04-16 09:00:31 +08:00
parent 3efc8e2d2a
commit bc24205b32
3 changed files with 60 additions and 11 deletions
--- a/test/srt/test_bnb.py
+++ b/test/srt/test_bnb.py
@@ -1,7 +1,7 @@
 """
 Usage:
-python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
-python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
+python3 -m unittest test_bnb.TestVisionModel.test_vlm
+python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
 """

 import base64
@@ -31,10 +31,13 @@ from sglang.test.test_utils import (
 VISION_MODELS = [
    ("unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
    ("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
+    ("unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", "llama_3_vision"),
+    ("unsloth/Llama-3.2-11B-Vision-bnb-4bit", "llama_3_vision"),
 ]
 LANGUAGE_MODELS = [
    "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    "unsloth/Qwen2-7B-Instruct-bnb-4bit",
+    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
 ]

 # image