From 4c54f4420217dcbed3aa66d94b5865665c4bac26 Mon Sep 17 00:00:00 2001
From: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
Date: Fri, 4 Apr 2025 18:08:30 -0400
Subject: [PATCH] [deepep] fix: shared experts are not initialized when shared
 experts fusion is enabled (#5072)

---
 python/sglang/srt/server_args.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index 54b532b4a..1ed3d6880 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -183,7 +183,7 @@ class ServerArgs:
     enable_flashmla: bool = False
     flashinfer_mla_disable_ragged: bool = False
     warmups: Optional[str] = None
-    n_share_experts_fusion: Optional[int] = None
+    n_share_experts_fusion: int = 0
     disable_shared_experts_fusion: bool = False
 
     # Debug tensor dumps
@@ -1110,7 +1110,7 @@ class ServerArgs:
         parser.add_argument(
             "--n-share-experts-fusion",
             type=int,
-            default=None,
+            default=0,
             help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 "
             "we use tp_size by default.",
         )