From 3e34e9004f2a086e652deff2d57819434862ec84 Mon Sep 17 00:00:00 2001
From: narutolhy <582909902@qq.com>
Date: Mon, 30 Jun 2025 21:51:01 -0700
Subject: [PATCH] Fix: sync prepare_fp8_layer_for_marlin with latest vllm
 changes (#7648)

---
 .../compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
index fa7d77f28..af4f1a0e0 100644
--- a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
@@ -76,7 +76,7 @@ class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
             layer.input_scale = torch.nn.Parameter(
                 layer.input_scale.data, requires_grad=False
             )
-        prepare_fp8_layer_for_marlin(layer, strategy="channel")
+        prepare_fp8_layer_for_marlin(layer, size_k_first=True)
 
     def create_weights(
         self,