80 lines
1.9 KiB
YAML
80 lines
1.9 KiB
YAML
test_stage:
|
|
obcq_modifiers:
|
|
SmoothQuantModifier:
|
|
smoothing_strength: 0.8
|
|
mappings:
|
|
- - - re:.*q_proj
|
|
- re:.*k_proj
|
|
- re:.*v_proj
|
|
- re:.*input_layernorm
|
|
- - - re:.*gate_proj
|
|
- re:.*up_proj
|
|
- re:.*post_attention_layernorm
|
|
- - - re:.*down_proj
|
|
- re:.*up_proj
|
|
QuantizationModifier:
|
|
ignore:
|
|
- LlamaRotaryEmbedding
|
|
- LlamaRMSNorm
|
|
- SiLUActivation
|
|
- model.layers.1.mlp.down_proj
|
|
- model.layers.30.mlp.down_proj
|
|
- model.layers.0.mlp.down_proj
|
|
post_oneshot_calibration: true
|
|
scheme_overrides:
|
|
Linear:
|
|
weights:
|
|
num_bits: 8
|
|
symmetric: true
|
|
strategy: channel
|
|
MatMulLeftInput_QK:
|
|
input_activations:
|
|
num_bits: 8
|
|
symmetric: true
|
|
Embedding:
|
|
input_activations: null
|
|
weights:
|
|
num_bits: 8
|
|
symmetric: false
|
|
SparseGPTModifier:
|
|
sparsity: 0.0
|
|
block_size: 128
|
|
sequential_update: false
|
|
quantize: true
|
|
percdamp: 0.01
|
|
mask_structure: 0:0
|
|
targets:
|
|
- model.layers.0
|
|
- model.layers.1
|
|
- model.layers.2
|
|
- model.layers.3
|
|
- model.layers.4
|
|
- model.layers.5
|
|
- model.layers.6
|
|
- model.layers.7
|
|
- model.layers.8
|
|
- model.layers.9
|
|
- model.layers.10
|
|
- model.layers.11
|
|
- model.layers.12
|
|
- model.layers.13
|
|
- model.layers.14
|
|
- model.layers.15
|
|
- model.layers.16
|
|
- model.layers.17
|
|
- model.layers.18
|
|
- model.layers.19
|
|
- model.layers.20
|
|
- model.layers.21
|
|
- model.layers.22
|
|
- model.layers.23
|
|
- model.layers.24
|
|
- model.layers.25
|
|
- model.layers.26
|
|
- model.layers.27
|
|
- model.layers.28
|
|
- model.layers.29
|
|
- model.layers.30
|
|
- model.layers.31
|
|
- lm_head
|