diff --git a/benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_decode.py b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py similarity index 99% rename from benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_decode.py rename to benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py index 4ce7f2b49..a2d1e10f6 100644 --- a/benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_decode.py +++ b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py @@ -26,7 +26,6 @@ def _decode_kernel( d_original: tl.constexpr, e: tl.constexpr, e_original: tl.constexpr, - BLOCK_SIZE: tl.constexpr = 32, ): off_bh = tl.program_id(0) off_h = off_bh % h diff --git a/benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_prefill.py b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_prefill.py similarity index 99% rename from benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_prefill.py rename to benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_prefill.py index 3db4694c7..cd298487b 100644 --- a/benchmark/kernels/minmax-text-01-lighting_attention/benchmark_lighting_attention_prefill.py +++ b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_prefill.py @@ -493,6 +493,8 @@ def test_lightning_attention_implementations(model_params): msg="Lightning attention implementations produce different results", ) + print("✅ Two implementations match") + def get_benchmark(): batch_size_range = [2**i for i in range(0, 7)] # max 64