model : add GroveMoE support (#15510)

* add GroveMoE support

* remove constexpr that fails on certain compilers

* revert crude scalar div implementation, use cast

* build_attn_inp_kv_unified -> build_attn_inp_kv

* fix build_attn

* re-apply ffn_exps regex changes
This commit is contained in:
Sigbjørn Skjæret
2025-09-25 19:50:28 +02:00
committed by GitHub
parent b05a9d650f
commit 835b2b915c
11 changed files with 451 additions and 4 deletions

View File

@@ -670,6 +670,9 @@ class GGUFWriter:
def add_expert_shared_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_expert_chunk_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
@@ -757,6 +760,12 @@ class GGUFWriter:
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
def add_expert_group_scale(self, value: float) -> None:
self.add_float32(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value)
def add_experts_per_group(self, count: int) -> None:
self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count)
def add_moe_every_n_layers(self, value: int) -> None:
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)