初始化项目,由ModelHub XC社区提供模型
Model: neuralmagic/Llama-2-7b-evolcodealpaca Source: Original Platform
This commit is contained in:
79
recipe.yaml
Normal file
79
recipe.yaml
Normal file
@@ -0,0 +1,79 @@
|
||||
test_stage:
|
||||
obcq_modifiers:
|
||||
SmoothQuantModifier:
|
||||
smoothing_strength: 0.8
|
||||
mappings:
|
||||
- - - re:.*q_proj
|
||||
- re:.*k_proj
|
||||
- re:.*v_proj
|
||||
- re:.*input_layernorm
|
||||
- - - re:.*gate_proj
|
||||
- re:.*up_proj
|
||||
- re:.*post_attention_layernorm
|
||||
- - - re:.*down_proj
|
||||
- re:.*up_proj
|
||||
QuantizationModifier:
|
||||
ignore:
|
||||
- LlamaRotaryEmbedding
|
||||
- LlamaRMSNorm
|
||||
- SiLUActivation
|
||||
- model.layers.1.mlp.down_proj
|
||||
- model.layers.30.mlp.down_proj
|
||||
- model.layers.0.mlp.down_proj
|
||||
post_oneshot_calibration: true
|
||||
scheme_overrides:
|
||||
Linear:
|
||||
weights:
|
||||
num_bits: 8
|
||||
symmetric: true
|
||||
strategy: channel
|
||||
MatMulLeftInput_QK:
|
||||
input_activations:
|
||||
num_bits: 8
|
||||
symmetric: true
|
||||
Embedding:
|
||||
input_activations: null
|
||||
weights:
|
||||
num_bits: 8
|
||||
symmetric: false
|
||||
SparseGPTModifier:
|
||||
sparsity: 0.0
|
||||
block_size: 128
|
||||
sequential_update: false
|
||||
quantize: true
|
||||
percdamp: 0.01
|
||||
mask_structure: 0:0
|
||||
targets:
|
||||
- model.layers.0
|
||||
- model.layers.1
|
||||
- model.layers.2
|
||||
- model.layers.3
|
||||
- model.layers.4
|
||||
- model.layers.5
|
||||
- model.layers.6
|
||||
- model.layers.7
|
||||
- model.layers.8
|
||||
- model.layers.9
|
||||
- model.layers.10
|
||||
- model.layers.11
|
||||
- model.layers.12
|
||||
- model.layers.13
|
||||
- model.layers.14
|
||||
- model.layers.15
|
||||
- model.layers.16
|
||||
- model.layers.17
|
||||
- model.layers.18
|
||||
- model.layers.19
|
||||
- model.layers.20
|
||||
- model.layers.21
|
||||
- model.layers.22
|
||||
- model.layers.23
|
||||
- model.layers.24
|
||||
- model.layers.25
|
||||
- model.layers.26
|
||||
- model.layers.27
|
||||
- model.layers.28
|
||||
- model.layers.29
|
||||
- model.layers.30
|
||||
- model.layers.31
|
||||
- lm_head
|
||||
Reference in New Issue
Block a user