[FA3 Feature] Support multi modal Llama-3.2-11B-Vision-Instruct (#5103)

This commit is contained in:
Chunan Zeng
2025-04-07 22:58:08 -07:00
committed by GitHub
parent 5a144a8ab9
commit a7c3f74bec
3 changed files with 113 additions and 9 deletions

View File

@@ -886,7 +886,7 @@ class ModelRunner:
"Please use `--attention-backend flashinfer`."
)
logger.warning(
"FlashAttention v3 Backend is in Beta. Multimodal, FP8, and Speculative Decoding are not supported."
"FlashAttention v3 Backend is in Beta. FP8 is not supported."
)
from sglang.srt.layers.attention.flashattention_backend import (
FlashAttentionBackend,