architecture: MistralForCausalLM models: - model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407 - model: B:\12B\38-dare_linear parameters: weight: 0.8 density: 0.9 epsilon: 0.09 - model: B:\12B\39-delerp parameters: weight: 0.6 density: 0.9 epsilon: 0.09 - model: B:\12B\40-cvs parameters: weight: 0.6 density: 0.9 epsilon: 0.09 merge_method: delerp_della base_model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407 parameters: normalize: false int8_mask: false rescale: true lambda: 1.0 # Global lambda lmbda: 1.0 # Individual lambda tokenizer: source: union chat_template: auto dtype: float32 out_dtype: bfloat16 name: 🦑 KrakenSakura-Maelström-12B-v1