Files
KrakenSakura-Maelstrom-12B-v1/mergekit_config.yml

32 lines
807 B
YAML
Raw Normal View History

architecture: MistralForCausalLM
models:
- model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407
- model: B:\12B\38-dare_linear
parameters:
weight: 0.8
density: 0.9
epsilon: 0.09
- model: B:\12B\39-delerp
parameters:
weight: 0.6
density: 0.9
epsilon: 0.09
- model: B:\12B\40-cvs
parameters:
weight: 0.6
density: 0.9
epsilon: 0.09
merge_method: delerp_della
base_model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407
parameters:
normalize: false
int8_mask: false
rescale: true
lambda: 1.0 # Global lambda
lmbda: 1.0 # Individual lambda
tokenizer:
source: union
chat_template: auto
dtype: float32
out_dtype: bfloat16
name: 🦑 KrakenSakura-Maelström-12B-v1