Llama3.2 vision model support (#1551)
This commit is contained in:
1004
python/sglang/srt/models/mllama.py
Normal file
1004
python/sglang/srt/models/mllama.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -605,7 +605,11 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal):
|
||||
]
|
||||
|
||||
positions = forward_batch.mrope_positions
|
||||
if image_inputs is None or len(image_inputs) == 0:
|
||||
if (
|
||||
forward_batch.forward_mode.is_decode()
|
||||
or image_inputs is None
|
||||
or len(image_inputs) == 0
|
||||
):
|
||||
inputs_embeds = self.model.embed_tokens(input_ids)
|
||||
else:
|
||||
if getattr(self.config, "rope_scaling", {}).get("type", None) == "mrope":
|
||||
|
||||
Reference in New Issue
Block a user