From 1eb8eade2bf6f69bf38c7d2706242775842131c5 Mon Sep 17 00:00:00 2001
From: yizhang2077 <1109276519@qq.com>
Date: Thu, 20 Feb 2025 16:10:35 +0800
Subject: [PATCH] add control for cutlass fp8 blockwise gemm (#3727)

---
 python/sglang/srt/layers/quantization/fp8_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py
index a613f8a38..2a87f70d1 100644
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -1,3 +1,4 @@
+import os
 from typing import List, Optional, Tuple
 
 import torch
@@ -40,6 +41,8 @@ def normalize_e4m3fn_to_e4m3fnuz(
 
 
 def cutlass_block_fp8_supported() -> bool:
+    if os.environ.get("SUPPORT_CUTLASS_BLOCK_FP8") is None:
+        return False
     if _is_cuda:
         major, minor = torch.cuda.get_device_capability()
         sm_version = major * 10 + minor