remove _grouped_size_compiled_for_decode_kernels (#3453)

This commit is contained in:
Yineng Zhang
2025-02-10 13:01:21 +08:00
committed by GitHub
parent 52a492a16e
commit 27c4c9cf52
2 changed files with 1 additions and 16 deletions

View File

@@ -1077,21 +1077,6 @@ def should_use_tensor_core(
if env_override is not None:
return env_override.lower() == "true"
# Try to use _grouped_size_compiled_for_decode_kernels if available
# This is for flashinfer <=0.1.6. Otherwise, there is an accuracy bug
try:
from flashinfer.decode import _grouped_size_compiled_for_decode_kernels
if not _grouped_size_compiled_for_decode_kernels(
num_attention_heads,
num_kv_heads,
):
return True
else:
return False
except (ImportError, AttributeError):
pass
# Calculate GQA group size
gqa_group_size = num_attention_heads // num_kv_heads