From 9be86ac7e8672007830352c4a04f155ff57848aa Mon Sep 17 00:00:00 2001 From: Tom Jobbins Date: Thu, 9 Nov 2023 18:20:20 +0000 Subject: [PATCH] Update for Transformers AWQ support --- config.json | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/config.json b/config.json index bb21882..da097dd 100644 --- a/config.json +++ b/config.json @@ -1,24 +1,31 @@ { - "_name_or_path": "TheBloke/Llama-2-7B-fp16", - "architectures": [ - "LlamaForCausalLM" - ], - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 2048, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "pad_token_id": 0, - "rms_norm_eps": 1e-05, - "tie_word_embeddings": false, - "torch_dtype": "float32", - "transformers_version": "4.30.2", - "use_cache": true, - "vocab_size": 32000, - "pretraining_tp": 1 + "_name_or_path": "TheBloke/Llama-2-7B-fp16", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "pad_token_id": 0, + "rms_norm_eps": 1e-05, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 32000, + "pretraining_tp": 1, + "quantization_config": { + "quant_method": "awq", + "zero_point": true, + "group_size": 128, + "bits": 4, + "version": "gemm" + } } \ No newline at end of file