32 lines
807 B
YAML
32 lines
807 B
YAML
architecture: MistralForCausalLM
|
|
models:
|
|
- model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407
|
|
- model: B:\12B\38-dare_linear
|
|
parameters:
|
|
weight: 0.8
|
|
density: 0.9
|
|
epsilon: 0.09
|
|
- model: B:\12B\39-delerp
|
|
parameters:
|
|
weight: 0.6
|
|
density: 0.9
|
|
epsilon: 0.09
|
|
- model: B:\12B\40-cvs
|
|
parameters:
|
|
weight: 0.6
|
|
density: 0.9
|
|
epsilon: 0.09
|
|
merge_method: delerp_della
|
|
base_model: A:\LLM\.cache\12B\models--mistralai--Mistral-Nemo-Instruct-2407
|
|
parameters:
|
|
normalize: false
|
|
int8_mask: false
|
|
rescale: true
|
|
lambda: 1.0 # Global lambda
|
|
lmbda: 1.0 # Individual lambda
|
|
tokenizer:
|
|
source: union
|
|
chat_template: auto
|
|
dtype: float32
|
|
out_dtype: bfloat16
|
|
name: 🦑 KrakenSakura-Maelström-12B-v1 |