Fix chat template handling for OpenAI serving (#8635)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
@@ -954,20 +954,6 @@ register_conv_template(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
register_conv_template(
|
|
||||||
Conversation(
|
|
||||||
name="mimo-vl",
|
|
||||||
system_message="You are MiMo, an AI assistant developed by Xiaomi.",
|
|
||||||
system_template="<|im_start|>system\n{system_message}",
|
|
||||||
roles=("<|im_start|>user", "<|im_start|>assistant"),
|
|
||||||
sep="<|im_end|>\n",
|
|
||||||
sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
|
|
||||||
stop_str=["<|im_end|>"],
|
|
||||||
image_token="<|vision_start|><|image_pad|><|vision_end|>",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_conv_template(
|
register_conv_template(
|
||||||
Conversation(
|
Conversation(
|
||||||
name="qwen2-audio",
|
name="qwen2-audio",
|
||||||
@@ -981,51 +967,11 @@ register_conv_template(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
register_conv_template(
|
|
||||||
Conversation(
|
|
||||||
name="llama_4_vision",
|
|
||||||
system_message="You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.",
|
|
||||||
system_template="<|header_start|>system<|header_end|>\n\n{system_message}<|eot|>",
|
|
||||||
roles=("user", "assistant"),
|
|
||||||
sep_style=SeparatorStyle.LLAMA4,
|
|
||||||
sep="",
|
|
||||||
stop_str="<|eot|>",
|
|
||||||
image_token="<|image|>",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
register_conv_template(
|
|
||||||
Conversation(
|
|
||||||
name="step3-vl",
|
|
||||||
system_message="<|begin▁of▁sentence|>You are a helpful assistant",
|
|
||||||
system_template="{system_message}\n",
|
|
||||||
roles=(
|
|
||||||
"<|BOT|>user\n",
|
|
||||||
"<|BOT|>assistant\n<think>\n",
|
|
||||||
),
|
|
||||||
sep="<|EOT|>",
|
|
||||||
sep_style=SeparatorStyle.NO_COLON_SINGLE,
|
|
||||||
stop_str="<|EOT|>",
|
|
||||||
image_token="<im_patch>",
|
|
||||||
# add_bos=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
def match_internvl(model_path: str):
|
def match_internvl(model_path: str):
|
||||||
if re.search(r"internvl", model_path, re.IGNORECASE):
|
if re.search(r"internvl", model_path, re.IGNORECASE):
|
||||||
return "internvl-2-5"
|
return "internvl-2-5"
|
||||||
if re.search(r"intern.*s1", model_path, re.IGNORECASE):
|
|
||||||
return "interns1"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_llama_vision(model_path: str):
|
|
||||||
if re.search(r"llama.*3\.2.*vision", model_path, re.IGNORECASE):
|
|
||||||
return "llama_3_vision"
|
|
||||||
if re.search(r"llama.*4.*", model_path, re.IGNORECASE):
|
|
||||||
return "llama_4_vision"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
@@ -1040,22 +986,6 @@ def match_vicuna(model_path: str):
|
|||||||
return "vicuna_v1.1"
|
return "vicuna_v1.1"
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_llama2_chat(model_path: str):
|
|
||||||
if re.search(
|
|
||||||
r"llama-2.*chat|codellama.*instruct",
|
|
||||||
model_path,
|
|
||||||
re.IGNORECASE,
|
|
||||||
):
|
|
||||||
return "llama-2"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_mistral(model_path: str):
|
|
||||||
if re.search(r"pixtral|(mistral|mixtral).*instruct", model_path, re.IGNORECASE):
|
|
||||||
return "mistral"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
def match_deepseek_vl(model_path: str):
|
def match_deepseek_vl(model_path: str):
|
||||||
if re.search(r"deepseek.*vl2", model_path, re.IGNORECASE):
|
if re.search(r"deepseek.*vl2", model_path, re.IGNORECASE):
|
||||||
@@ -1064,12 +994,6 @@ def match_deepseek_vl(model_path: str):
|
|||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
def match_qwen_chat_ml(model_path: str):
|
def match_qwen_chat_ml(model_path: str):
|
||||||
if re.search(r"gme.*qwen.*vl", model_path, re.IGNORECASE):
|
|
||||||
return "gme-qwen2-vl"
|
|
||||||
if re.search(r"qwen.*vl", model_path, re.IGNORECASE):
|
|
||||||
return "qwen2-vl"
|
|
||||||
if re.search(r"qwen.*audio", model_path, re.IGNORECASE):
|
|
||||||
return "qwen2-audio"
|
|
||||||
if re.search(
|
if re.search(
|
||||||
r"llava-v1\.6-34b|llava-v1\.6-yi-34b|llava-next-video-34b|llava-onevision-qwen2",
|
r"llava-v1\.6-34b|llava-v1\.6-yi-34b|llava-next-video-34b|llava-onevision-qwen2",
|
||||||
model_path,
|
model_path,
|
||||||
@@ -1078,12 +1002,6 @@ def match_qwen_chat_ml(model_path: str):
|
|||||||
return "chatml-llava"
|
return "chatml-llava"
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_gemma3_instruct(model_path: str):
|
|
||||||
if re.search(r"gemma-3.*it", model_path, re.IGNORECASE):
|
|
||||||
return "gemma-it"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
def match_openbmb_minicpm(model_path: str):
|
def match_openbmb_minicpm(model_path: str):
|
||||||
if re.search(r"minicpm-v", model_path, re.IGNORECASE):
|
if re.search(r"minicpm-v", model_path, re.IGNORECASE):
|
||||||
@@ -1092,37 +1010,7 @@ def match_openbmb_minicpm(model_path: str):
|
|||||||
return "minicpmo"
|
return "minicpmo"
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_moonshot_kimivl(model_path: str):
|
|
||||||
if re.search(r"kimi.*vl", model_path, re.IGNORECASE):
|
|
||||||
return "kimi-vl"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_devstral(model_path: str):
|
|
||||||
if re.search(r"devstral", model_path, re.IGNORECASE):
|
|
||||||
return "devstral"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
@register_conv_template_matching_function
|
||||||
def match_phi_4_mm(model_path: str):
|
def match_phi_4_mm(model_path: str):
|
||||||
if "phi-4-multimodal" in model_path.lower():
|
if "phi-4-multimodal" in model_path.lower():
|
||||||
return "phi-4-mm"
|
return "phi-4-mm"
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_vila(model_path: str):
|
|
||||||
if re.search(r"vila", model_path, re.IGNORECASE):
|
|
||||||
return "chatml"
|
|
||||||
|
|
||||||
|
|
||||||
@register_conv_template_matching_function
|
|
||||||
def match_mimo_vl(model_path: str):
|
|
||||||
if re.search(r"mimo.*vl", model_path, re.IGNORECASE):
|
|
||||||
return "mimo-vl"
|
|
||||||
|
|
||||||
|
|
||||||
# @register_conv_template_matching_function
|
|
||||||
# def match_step3(model_path: str):
|
|
||||||
# if re.search(r"step3", model_path, re.IGNORECASE):
|
|
||||||
# return "step3-vl"
|
|
||||||
|
|||||||
@@ -84,26 +84,27 @@ class TemplateManager:
|
|||||||
if chat_template_arg:
|
if chat_template_arg:
|
||||||
self._load_explicit_chat_template(tokenizer_manager, chat_template_arg)
|
self._load_explicit_chat_template(tokenizer_manager, chat_template_arg)
|
||||||
else:
|
else:
|
||||||
# Try HuggingFace template first
|
# Guess chat template from model path
|
||||||
hf_template = self._resolve_hf_chat_template(tokenizer_manager)
|
|
||||||
if hf_template:
|
|
||||||
self._jinja_template_content_format = (
|
|
||||||
detect_jinja_template_content_format(hf_template)
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Using default HuggingFace chat template with detected content format: {self._jinja_template_content_format}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Fallback to SGLang template guessing
|
|
||||||
self.guess_chat_template_from_model_path(model_path)
|
self.guess_chat_template_from_model_path(model_path)
|
||||||
|
|
||||||
# Set default format if no template was found
|
# If no pre-defined template was found, fallback to HuggingFace template
|
||||||
if self._chat_template_name is None:
|
if self._chat_template_name is None:
|
||||||
self._jinja_template_content_format = "string"
|
# Try HuggingFace template first
|
||||||
logger.info(
|
hf_template = self._resolve_hf_chat_template(tokenizer_manager)
|
||||||
"No chat template found, defaulting to 'string' content format"
|
if hf_template:
|
||||||
)
|
# override the chat template
|
||||||
|
tokenizer_manager.tokenizer.chat_template = hf_template
|
||||||
|
self._jinja_template_content_format = (
|
||||||
|
detect_jinja_template_content_format(hf_template)
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Using default HuggingFace chat template with detected content format: {self._jinja_template_content_format}"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Default to string content format if no template was found
|
||||||
|
self._jinja_template_content_format = "string"
|
||||||
|
logger.info("No chat template found, defaulting to 'string' content format")
|
||||||
|
|
||||||
def _load_explicit_chat_template(
|
def _load_explicit_chat_template(
|
||||||
self, tokenizer_manager, chat_template_arg: str
|
self, tokenizer_manager, chat_template_arg: str
|
||||||
@@ -257,13 +258,15 @@ class TemplateManager:
|
|||||||
|
|
||||||
Returns the chat template string if found, None otherwise.
|
Returns the chat template string if found, None otherwise.
|
||||||
"""
|
"""
|
||||||
tokenizer = tokenizer_manager.tokenizer
|
|
||||||
|
|
||||||
# Try to get AutoTokenizer chat template
|
|
||||||
try:
|
try:
|
||||||
return tokenizer.get_chat_template()
|
if processor := tokenizer_manager.processor:
|
||||||
|
if hasattr(processor, "chat_template") and processor.chat_template:
|
||||||
|
return processor.chat_template
|
||||||
|
if tokenizer := tokenizer_manager.tokenizer:
|
||||||
|
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
||||||
|
return tokenizer.chat_template
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Error getting chat template via get_chat_template(): {e}")
|
logger.debug(f"Error getting chat template: {e}")
|
||||||
|
|
||||||
logger.debug("No HuggingFace chat template found")
|
logger.debug("No HuggingFace chat template found")
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -225,10 +225,10 @@ class TokenizerManager:
|
|||||||
self.tokenizer = get_tokenizer_from_processor(self.processor)
|
self.tokenizer = get_tokenizer_from_processor(self.processor)
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
else:
|
else:
|
||||||
self.mm_processor = None
|
self.mm_processor = self.processor = None
|
||||||
|
|
||||||
if server_args.skip_tokenizer_init:
|
if server_args.skip_tokenizer_init:
|
||||||
self.tokenizer = self.processor = None
|
self.tokenizer = None
|
||||||
else:
|
else:
|
||||||
self.tokenizer = get_tokenizer(
|
self.tokenizer = get_tokenizer(
|
||||||
server_args.tokenizer_path,
|
server_args.tokenizer_path,
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
import re
|
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processor import (
|
from sglang.srt.managers.multimodal_processor import (
|
||||||
@@ -38,14 +37,8 @@ class Gemma3nSGLangProcessor(SGLangBaseProcessor):
|
|||||||
self.mm_tokens = MultimodalSpecialTokens(
|
self.mm_tokens = MultimodalSpecialTokens(
|
||||||
image_token="<image_soft_token>",
|
image_token="<image_soft_token>",
|
||||||
image_token_id=hf_config.image_token_id,
|
image_token_id=hf_config.image_token_id,
|
||||||
image_token_regex=re.compile(
|
|
||||||
r"<start_of_image>(?:(?:<image_soft_token>)*<end_of_image>)?"
|
|
||||||
),
|
|
||||||
audio_token="<audio_soft_token>",
|
audio_token="<audio_soft_token>",
|
||||||
audio_token_id=hf_config.audio_token_id,
|
audio_token_id=hf_config.audio_token_id,
|
||||||
audio_token_regex=re.compile(
|
|
||||||
r"<start_of_audio>(?:(?:<audio_soft_token>)*<end_of_audio>)?"
|
|
||||||
),
|
|
||||||
).build(_processor)
|
).build(_processor)
|
||||||
|
|
||||||
async def process_mm_data_async(
|
async def process_mm_data_async(
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ class TestQwen2VLServer(TestOpenAIVisionServer):
|
|||||||
other_args=[
|
other_args=[
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.35",
|
"0.35",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -53,6 +55,8 @@ class TestQwen2_5_VLServer(TestOpenAIVisionServer):
|
|||||||
other_args=[
|
other_args=[
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.35",
|
"0.35",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -76,6 +80,8 @@ class TestVLMContextLengthIssue(CustomTestCase):
|
|||||||
"--context-length",
|
"--context-length",
|
||||||
"300",
|
"300",
|
||||||
"--mem-fraction-static=0.75",
|
"--mem-fraction-static=0.75",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -149,6 +155,8 @@ class TestMinicpmvServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.35",
|
"0.35",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -164,7 +172,11 @@ class TestInternVL2_5Server(TestOpenAIVisionServer):
|
|||||||
cls.model,
|
cls.model,
|
||||||
cls.base_url,
|
cls.base_url,
|
||||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
other_args=["--trust-remote-code"],
|
other_args=[
|
||||||
|
"--trust-remote-code",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
@@ -183,6 +195,8 @@ class TestMinicpmoServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.65",
|
"0.65",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -207,10 +221,13 @@ class TestMimoVLServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.6",
|
"0.6",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
del TestOpenAIVisionServer
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ class TestPixtralServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.70",
|
"0.70",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -45,6 +47,8 @@ class TestMistral3_1Server(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.75",
|
"0.75",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -67,7 +71,8 @@ class TestDeepseekVL2Server(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--context-length",
|
"--context-length",
|
||||||
"4096",
|
"4096",
|
||||||
"--disable-cuda-graph",
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -90,6 +95,8 @@ class TestJanusProServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.35",
|
"0.35",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -120,6 +127,10 @@ class TestJanusProServer(TestOpenAIVisionServer):
|
|||||||
# "0.8",
|
# "0.8",
|
||||||
# "--tp-size=8",
|
# "--tp-size=8",
|
||||||
# "--context-length=8192",
|
# "--context-length=8192",
|
||||||
|
# "--mm-attention-backend",
|
||||||
|
# "fa3",
|
||||||
|
# "--cuda-graph-max-bs",
|
||||||
|
# "4",
|
||||||
# ],
|
# ],
|
||||||
# )
|
# )
|
||||||
# cls.base_url += "/v1"
|
# cls.base_url += "/v1"
|
||||||
@@ -143,6 +154,8 @@ class TestGemma3itServer(TestOpenAIVisionServer):
|
|||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.70",
|
"0.70",
|
||||||
"--enable-multimodal",
|
"--enable-multimodal",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -154,7 +167,7 @@ class TestGemma3itServer(TestOpenAIVisionServer):
|
|||||||
class TestGemma3nServer(TestOpenAIVisionServer):
|
class TestGemma3nServer(TestOpenAIVisionServer):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "google/gemma-3n-E2B-it"
|
cls.model = "google/gemma-3n-E4B-it"
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
cls.api_key = "sk-123456"
|
cls.api_key = "sk-123456"
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
@@ -166,7 +179,7 @@ class TestGemma3nServer(TestOpenAIVisionServer):
|
|||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
"0.70",
|
"0.70",
|
||||||
"--cuda-graph-max-bs",
|
"--cuda-graph-max-bs",
|
||||||
"1",
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -193,6 +206,8 @@ class TestKimiVLServer(TestOpenAIVisionServer):
|
|||||||
"4096",
|
"4096",
|
||||||
"--dtype",
|
"--dtype",
|
||||||
"bfloat16",
|
"bfloat16",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -233,6 +248,8 @@ class TestPhi4MMServer(TestOpenAIVisionServer):
|
|||||||
"--lora-paths",
|
"--lora-paths",
|
||||||
f"vision={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/vision-lora",
|
f"vision={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/vision-lora",
|
||||||
f"speech={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/speech-lora",
|
f"speech={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/speech-lora",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
@@ -277,10 +294,13 @@ class TestVILAServer(TestOpenAIVisionServer):
|
|||||||
"--trust-remote-code",
|
"--trust-remote-code",
|
||||||
"--context-length=65536",
|
"--context-length=65536",
|
||||||
f"--revision={cls.revision}",
|
f"--revision={cls.revision}",
|
||||||
|
"--cuda-graph-max-bs",
|
||||||
|
"4",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
cls.base_url += "/v1"
|
cls.base_url += "/v1"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
del TestOpenAIVisionServer
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ class TestOpenAIVisionServer(CustomTestCase):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": "Describe this image in a very short sentence.",
|
"text": "Describe this image in a sentence.",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -119,7 +119,7 @@ class TestOpenAIVisionServer(CustomTestCase):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": "Describe this image in a very short sentence.",
|
"text": "Describe this image in a sentence.",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -455,7 +455,7 @@ class TestOpenAIVisionServer(CustomTestCase):
|
|||||||
content.append(
|
content.append(
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": "Describe this image in a very short sentence.",
|
"text": "Describe this image in a sentence.",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -528,14 +528,20 @@ class TestOpenAIVisionServer(CustomTestCase):
|
|||||||
# a fragment of Trump's speech
|
# a fragment of Trump's speech
|
||||||
audio_response = self.get_audio_response(
|
audio_response = self.get_audio_response(
|
||||||
AUDIO_TRUMP_SPEECH_URL,
|
AUDIO_TRUMP_SPEECH_URL,
|
||||||
"I have an audio sample. Please repeat the person's words",
|
"Listen to this audio and write down the audio transcription in English.",
|
||||||
category="speech",
|
category="speech",
|
||||||
)
|
)
|
||||||
assert "thank you" in audio_response
|
check_list = [
|
||||||
assert "it's a privilege to be here" in audio_response
|
"thank you",
|
||||||
assert "leader" in audio_response
|
"it's a privilege to be here",
|
||||||
assert "science" in audio_response
|
"leader",
|
||||||
assert "art" in audio_response
|
"science",
|
||||||
|
"art",
|
||||||
|
]
|
||||||
|
for check_word in check_list:
|
||||||
|
assert (
|
||||||
|
check_word in audio_response
|
||||||
|
), f"audio_response: |{audio_response}| should contain |{check_word}|"
|
||||||
|
|
||||||
def _test_audio_ambient_completion(self):
|
def _test_audio_ambient_completion(self):
|
||||||
# bird song
|
# bird song
|
||||||
|
|||||||
Reference in New Issue
Block a user