diff --git a/config.json b/config.json index bb21882..da097dd 100644 --- a/config.json +++ b/config.json @@ -1,24 +1,31 @@ { - "_name_or_path": "TheBloke/Llama-2-7B-fp16", - "architectures": [ - "LlamaForCausalLM" - ], - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 11008, - "max_position_embeddings": 2048, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "pad_token_id": 0, - "rms_norm_eps": 1e-05, - "tie_word_embeddings": false, - "torch_dtype": "float32", - "transformers_version": "4.30.2", - "use_cache": true, - "vocab_size": 32000, - "pretraining_tp": 1 + "_name_or_path": "TheBloke/Llama-2-7B-fp16", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "pad_token_id": 0, + "rms_norm_eps": 1e-05, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 32000, + "pretraining_tp": 1, + "quantization_config": { + "quant_method": "awq", + "zero_point": true, + "group_size": 128, + "bits": 4, + "version": "gemm" + } } \ No newline at end of file