Support offloading in fp8 (#9948)

2025-09-14 16:14:28 +08:00
parent b047b553c2
commit fa46e2bd40
4 changed files with 95 additions and 17 deletions
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -2244,8 +2244,15 @@ class DeepseekV2Model(nn.Module):
                    [
                        "w13_weight",
                        "w2_weight",
-                        "w13_blockscale_swizzled",
-                        "w2_blockscale_swizzled",
+                        # only for nvfp4
+                        *(
+                            [
+                                "w13_blockscale_swizzled",
+                                "w2_blockscale_swizzled",
+                            ]
+                            if hasattr(module, "w13_blockscale_swizzled")
+                            else []
+                        ),
                    ]
                    if isinstance(module, FusedMoE)
                    else []