[FA3 Feature] Support multi modal Llama-3.2-11B-Vision-Instruct (#5103)

2025-04-07 22:58:08 -07:00
parent 5a144a8ab9
commit a7c3f74bec
3 changed files with 113 additions and 9 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -886,7 +886,7 @@ class ModelRunner:
                "Please use `--attention-backend flashinfer`."
            )
            logger.warning(
-                "FlashAttention v3 Backend is in Beta. Multimodal, FP8, and Speculative Decoding are not supported."
+                "FlashAttention v3 Backend is in Beta. FP8 is not supported."
            )
            from sglang.srt.layers.attention.flashattention_backend import (
                FlashAttentionBackend,