Fix CI break by express-laned PRs. (#11499)

This commit is contained in:
Liangsheng Yin
2025-10-12 21:06:06 +08:00
committed by GitHub
parent 99a0704a36
commit 01e59e8247
2 changed files with 6 additions and 2 deletions

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Optional
import numpy as np
import torch
@@ -10,6 +10,7 @@ import triton.language as tl
from sglang.srt.configs.model_config import AttentionArch
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.radix_attention import AttentionType
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.speculative.spec_info import SpecInput

View File

@@ -728,7 +728,10 @@ class FlashInferAttnBackend(AttentionBackend):
)
else:
causal = True
if layer.is_cross_attention or layer.attn_type == AttentionType.ENCODER_ONLY:
if (
layer.is_cross_attention
or layer.attn_type == AttentionType.ENCODER_ONLY
):
causal = False
if save_kv_cache and layer.attn_type == AttentionType.ENCODER_ONLY:
save_kv_cache = False