Support offloading in fp8 (#9948)

This commit is contained in:
fzyzcjy
2025-09-14 16:14:28 +08:00
committed by GitHub
parent b047b553c2
commit fa46e2bd40
4 changed files with 95 additions and 17 deletions

View File

@@ -2244,8 +2244,15 @@ class DeepseekV2Model(nn.Module):
[
"w13_weight",
"w2_weight",
"w13_blockscale_swizzled",
"w2_blockscale_swizzled",
# only for nvfp4
*(
[
"w13_blockscale_swizzled",
"w2_blockscale_swizzled",
]
if hasattr(module, "w13_blockscale_swizzled")
else []
),
]
if isinstance(module, FusedMoE)
else []