47 lines
1.2 KiB
YAML
47 lines
1.2 KiB
YAML
|
|
quant_stage:
|
||
|
|
quant_modifiers:
|
||
|
|
SmoothQuantModifier:
|
||
|
|
smoothing_strength: 0.7
|
||
|
|
mappings:
|
||
|
|
- - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
|
||
|
|
- re:.*input_layernorm
|
||
|
|
ignore: []
|
||
|
|
GPTQModifier:
|
||
|
|
config_groups:
|
||
|
|
group_0:
|
||
|
|
targets: [Linear]
|
||
|
|
weights:
|
||
|
|
num_bits: 8
|
||
|
|
type: int
|
||
|
|
symmetric: true
|
||
|
|
group_size: null
|
||
|
|
strategy: channel
|
||
|
|
block_structure: null
|
||
|
|
dynamic: false
|
||
|
|
actorder: null
|
||
|
|
scale_dtype: null
|
||
|
|
zp_dtype: null
|
||
|
|
observer: mse
|
||
|
|
observer_kwargs: {}
|
||
|
|
input_activations:
|
||
|
|
num_bits: 8
|
||
|
|
type: int
|
||
|
|
symmetric: true
|
||
|
|
group_size: null
|
||
|
|
strategy: token
|
||
|
|
block_structure: null
|
||
|
|
dynamic: true
|
||
|
|
actorder: null
|
||
|
|
scale_dtype: null
|
||
|
|
zp_dtype: null
|
||
|
|
observer: null
|
||
|
|
observer_kwargs: {}
|
||
|
|
output_activations: null
|
||
|
|
format: null
|
||
|
|
targets: [Linear]
|
||
|
|
ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
|
||
|
|
block_size: 128
|
||
|
|
dampening_frac: 0.1
|
||
|
|
actorder: static
|
||
|
|
offload_hessians: false
|