19 lines
488 B
YAML
19 lines
488 B
YAML
quant_stage:
|
|
quant_modifiers:
|
|
SmoothQuantModifier:
|
|
smoothing_strength: 0.8
|
|
mappings:
|
|
- - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
|
|
- re:.*input_layernorm
|
|
- - ['re:.*gate_proj', 're:.*up_proj']
|
|
- re:.*post_attention_layernorm
|
|
- - ['re:.*down_proj']
|
|
- re:.*up_proj
|
|
GPTQModifier:
|
|
sequential_update: false
|
|
dampening_frac: 0.01
|
|
ignore: [lm_head]
|
|
scheme: W8A8
|
|
targets: Linear
|
|
observer: mse
|