Add A100 tuning configs for DeepSeek R1/V3 channel-wise INT8 (#4287)

Co-authored-by: HandH1998 <1335248067@qq.com>
This commit is contained in:
yych0745
2025-03-11 15:49:06 +08:00
committed by GitHub
parent 3a08f54638
commit 6a02b32d07
2 changed files with 152 additions and 1 deletions

View File

@@ -88,7 +88,12 @@ def benchmark_config(
)
w2_scale = torch.randn((hidden_size, num_experts), dtype=torch.float32)
if use_fp8_w8a8 or use_int8_w8a8:
if block_shape is None:
if use_int8_w8a8 and block_shape is None:
w1_scale = torch.randn(
num_experts, shard_intermediate_size, dtype=torch.float32
)
w2_scale = torch.randn(num_experts, hidden_size, dtype=torch.float32)
elif block_shape is None:
w1_scale = torch.randn(num_experts, dtype=torch.float32)
w2_scale = torch.randn(num_experts, dtype=torch.float32)
a1_scale = torch.randn(1, dtype=torch.float32)