diff --git a/python/pyproject.toml b/python/pyproject.toml
index 7471f84bf..c8a8ffc4a 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -49,7 +49,7 @@ runtime_common = [
 
 srt = [
     "sglang[runtime_common]",
-    "sgl-kernel==0.1.8.post1",
+    "sgl-kernel==0.1.8.post2",
     "flashinfer_python==0.2.6.post1",
     "torch==2.7.1",
     "torchaudio==2.7.1",
diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py
index 357146469..e53ad1a3b 100644
--- a/python/sglang/srt/entrypoints/engine.py
+++ b/python/sglang/srt/entrypoints/engine.py
@@ -605,7 +605,7 @@ def _set_envs_and_config(server_args: ServerArgs):
     if _is_cuda:
         assert_pkg_version(
             "sgl-kernel",
-            "0.1.8.post1",
+            "0.1.8.post2",
             "Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
         )
 
diff --git a/python/sglang/srt/layers/attention/cutlass_mla_backend.py b/python/sglang/srt/layers/attention/cutlass_mla_backend.py
index 416eff724..8b3d18602 100644
--- a/python/sglang/srt/layers/attention/cutlass_mla_backend.py
+++ b/python/sglang/srt/layers/attention/cutlass_mla_backend.py
@@ -280,6 +280,7 @@ class CutlassMLABackend(FlashInferMLAAttnBackend):
             seq_lens=forward_batch.seq_lens.to(torch.int32),
             page_table=self.forward_metadata.block_kv_indices,
             workspace=self.forward_metadata.workspace,
+            sm_scale=layer.scaling,
             num_kv_splits=1,
         )
 
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
index e407a3598..582fcc9b4 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
@@ -52,7 +52,11 @@ def grouped_gemm_nt_f8f8bf16_masked(
         expected_m, n, k, num_groups, kernel_type
     ):
         _grouped_gemm_nt_f8f8bf16_masked_raw(
-            lhs, rhs, out, masked_m, expected_m,
+            lhs,
+            rhs,
+            out,
+            masked_m,
+            expected_m,
             **({"recipe": recipe} if DEEPGEMM_V202506 else {})
         )