17 lines
616 B
YAML
17 lines
616 B
YAML
default_stage:
|
|
default_modifiers:
|
|
AWQModifier:
|
|
targets: [Linear]
|
|
ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$', '*whisper*']
|
|
scheme: W4A16_ASYM
|
|
mappings:
|
|
- smooth_layer: re:.*input_layernorm$
|
|
balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
|
|
- smooth_layer: re:.*v_proj$
|
|
balance_layers: ['re:.*o_proj$']
|
|
- smooth_layer: re:.*post_attention_layernorm$
|
|
balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
|
|
- smooth_layer: re:.*up_proj$
|
|
balance_layers: ['re:.*down_proj$']
|
|
duo_scaling: true
|