model: Support Hybrid Mamba2 NemotronHForCausalLM (nvidia/NVIDIA-Nemotron-Nano-9B-v2) (#10909)

Signed-off-by: Netanel Haber <nhaber@nvidia.com>
This commit is contained in:
Netanel Haber
2025-10-08 19:37:38 +03:00
committed by GitHub
parent c882b5ae75
commit d6837aea4d
35 changed files with 3280 additions and 854 deletions

View File

@@ -518,6 +518,24 @@ def make_layers(
return modules, start_layer, end_layer
def make_layers_non_pp(
num_hidden_layers: int,
layer_fn: LayerFn,
prefix: str = "",
) -> torch.nn.ModuleList:
from sglang.srt.offloader import get_offloader
layers = torch.nn.ModuleList(
get_offloader().wrap_modules(
(
layer_fn(idx=idx, prefix=add_prefix(idx, prefix))
for idx in range(num_hidden_layers)
)
)
)
return layers
cmo_stream = None