Update config.json
Refactor: Remove `_name_or_path` and increase `max_position_embeddings` to 8192 - Removed the `_name_or_path` parameter as it's no longer necessary. - Updated `max_position_embeddings` from 2048 to 8192 for better model capacity.
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
{
|
{
|
||||||
"_name_or_path": "./models/llama3-8b_sft_full_1e-5_bs8000_3_trainv59_a/checkpoint-1596",
|
|
||||||
"architectures": [
|
"architectures": [
|
||||||
"LlamaForCausalLM"
|
"LlamaForCausalLM"
|
||||||
],
|
],
|
||||||
@@ -11,7 +10,7 @@
|
|||||||
"hidden_size": 4096,
|
"hidden_size": 4096,
|
||||||
"initializer_range": 0.02,
|
"initializer_range": 0.02,
|
||||||
"intermediate_size": 14336,
|
"intermediate_size": 14336,
|
||||||
"max_position_embeddings": 2048,
|
"max_position_embeddings": 4096,
|
||||||
"model_type": "llama",
|
"model_type": "llama",
|
||||||
"num_attention_heads": 32,
|
"num_attention_heads": 32,
|
||||||
"num_hidden_layers": 32,
|
"num_hidden_layers": 32,
|
||||||
|
|||||||
Reference in New Issue
Block a user