[FA3 Feature] Support multi modal Llama-3.2-11B-Vision-Instruct (#5103)
This commit is contained in:
@@ -886,7 +886,7 @@ class ModelRunner:
|
||||
"Please use `--attention-backend flashinfer`."
|
||||
)
|
||||
logger.warning(
|
||||
"FlashAttention v3 Backend is in Beta. Multimodal, FP8, and Speculative Decoding are not supported."
|
||||
"FlashAttention v3 Backend is in Beta. FP8 is not supported."
|
||||
)
|
||||
from sglang.srt.layers.attention.flashattention_backend import (
|
||||
FlashAttentionBackend,
|
||||
|
||||
Reference in New Issue
Block a user