forked from EngineX-MetaX/enginex-c_series-vllm
[qwen3-omni] Add Qwen3-Omni moe thinker
This commit is contained in:
@@ -54,6 +54,14 @@ def check_xformers_availability():
|
||||
return USE_XFORMERS_OPS
|
||||
|
||||
|
||||
def check_upstream_fa_availability(dtype: torch.dtype):
|
||||
if dtype in (torch.float16, torch.bfloat16) and current_platform.is_cuda(
|
||||
) and current_platform.has_device_capability(80):
|
||||
from transformers.utils import is_flash_attn_2_available
|
||||
return is_flash_attn_2_available()
|
||||
return False
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
"""Attention layer.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user