Fix AWQ Dequant and Weight Loading of deepseek v2 (#6842)

This commit is contained in:
AniZpZ
2025-06-18 04:45:10 +08:00
committed by GitHub
parent e726131523
commit 3eb4a800e8
3 changed files with 18 additions and 11 deletions

View File

@@ -67,8 +67,8 @@ def sglang_awq_dequantize(
"qweight_row,qweight_col,is_bf16_act",
list(
itertools.product(
[3584, 18944, 128, 256, 512, 1024],
[448, 576, 4736, 16, 32, 64, 128],
[3584, 18944, 128, 256, 512, 1024, 1536],
[448, 576, 4736, 16, 32, 64, 128, 72],
[True, False],
)
),
@@ -77,7 +77,6 @@ def test_awq_dequant_compare_implementations(
qweight_row: int, qweight_col: int, is_bf16_act: bool
):
device = torch.device("cuda")
qweight = torch.randint(
0,
torch.iinfo(torch.int32).max,