初始化项目,由ModelHub XC社区提供模型

Model: Raymond-dev-546730/MaterialsAnalyst-AI-7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-21 10:56:56 +08:00
commit 599c2caa7b
48 changed files with 304443 additions and 0 deletions

59
.gitattributes vendored Normal file
View File

@@ -0,0 +1,59 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
Data/Train-Ready.jsonl filter=lfs diff=lfs merge=lfs -text
Model_Weights/LoRA_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Model_Weights/safetensors/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-f16.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
Data/Dataset.jsonl filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text

3
Data/Dataset.jsonl Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c853a7dfbfb768d14b47bf45a458aa5d516274707af788e7c1476f43b2027635
size 28585142

BIN
Model_Logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

View File

@@ -0,0 +1,40 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
"bias": "none",
"corda_config": null,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 64,
"lora_bias": false,
"lora_dropout": 0.1,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 32,
"rank_pattern": {},
"revision": null,
"target_modules": [
"q_proj",
"down_proj",
"k_proj",
"lm_head",
"v_proj",
"gate_proj",
"o_proj",
"up_proj"
],
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_rslora": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1e5643f7e70f81b24b0fb369804a434ddaf21cb0b4985ecb6f364cbb1a108207
size 1432932264

View File

@@ -0,0 +1,24 @@
{
"</tool_call>": 151658,
"<tool_call>": 151657,
"<|box_end|>": 151649,
"<|box_start|>": 151648,
"<|endoftext|>": 151643,
"<|file_sep|>": 151664,
"<|fim_middle|>": 151660,
"<|fim_pad|>": 151662,
"<|fim_prefix|>": 151659,
"<|fim_suffix|>": 151661,
"<|im_end|>": 151645,
"<|im_start|>": 151644,
"<|image_pad|>": 151655,
"<|object_ref_end|>": 151647,
"<|object_ref_start|>": 151646,
"<|quad_end|>": 151651,
"<|quad_start|>": 151650,
"<|repo_name|>": 151663,
"<|video_pad|>": 151656,
"<|vision_end|>": 151653,
"<|vision_pad|>": 151654,
"<|vision_start|>": 151652
}

View File

@@ -0,0 +1,54 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0]['role'] == 'system' %}
{{- messages[0]['content'] }}
{%- else %}
{{- 'You are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.' }}
{%- endif %}
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0]['role'] == 'system' %}
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
{%- else %}
{{- '<|im_start|>system\nYou are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in messages %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{{- '<|im_start|>' + message.role }}
{%- if message.content %}
{{- '\n' + message.content }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{{- tool_call.arguments | tojson }}
{{- '}\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
{
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:694f1174c5bdf94e2fc50796c0f1733a5a3945ff110b0dfa40ea0701cc9c9c42
size 11422176

View File

@@ -0,0 +1,207 @@
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151646": {
"content": "<|object_ref_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|object_ref_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151648": {
"content": "<|box_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151649": {
"content": "<|box_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": {},
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9b4da075007398d121f9e440325f72e7152df8bf03e751f6315e1d2c100b9348
size 3346253248

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:df602ce82f4631acf3cc3843fb70a2deade9f8da0b685481acc3eeec68611292
size 4463271360

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a5418d385cd002eca9ed77dbcf153908e24da1747981138e4a7aa69f260073a7
size 4250295744

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:39bc1dd076b896e63d55314d4748e31f64187d4d0eff5f3734dc903043d5d6da
size 3015937472

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c96411a6d918ce2bd25d9564f7717e4d86550f8724844aab1044a6d02d65f79
size 3808388544

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:34efd7373c761354d808462a4011d77c476f93c503f353e68bcc76c6dd2718e8
size 3492365760

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6b03a8c6d9d749cfc82c71d9330725f7fb3fb91fc8e568341ef02c30dbb43ea5
size 4431388096

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c13a3390601d63b785dabfa18765ec6f02f4866d13a4c08404c0eeea3beaf88b
size 4873280960

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:692e14d891038a02e0cd85a2519ac51e636e13c1e524354196c89dcdfb150222
size 4683070912

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6e5f540d8a7202afa2310e76b4062f902a2c7d5883eefed527bb37cb726ecf93
size 4457766336

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:eccbf47537fca23a1fc24e151d703858334e2b7ab7bfddc7f8423a63dfc763a5
size 5315173824

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c051833d407c8b7eed550c16e464ed4347b1ea184ba285bb735a964dc2bb43b
size 5757066688

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:80f1c5c480f6203583246881e3af08db383714478ef98f6d8a573b61638a4faa
size 5444828608

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4a30fe86b09fa3c62d6211e7f5f80bf272cc0bd96f58c2d90ebf8af89ae3eca2
size 5315173824

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fcfbecd774f0f94c10be45d1e93451d56a1ef8ba026e4040e8865e21b14873e0
size 6254196160

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:546116b6b810420dbef5de2067440f4f24d451cbda855ac1ebf5b032b272e101
size 8098522560

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6774ccf0edc76a3e498590e0ec801620d9aecbde5c1089cc8ce1b8ba245282d8
size 15237850560

View File

@@ -0,0 +1,24 @@
{
"</tool_call>": 151658,
"<tool_call>": 151657,
"<|box_end|>": 151649,
"<|box_start|>": 151648,
"<|endoftext|>": 151643,
"<|file_sep|>": 151664,
"<|fim_middle|>": 151660,
"<|fim_pad|>": 151662,
"<|fim_prefix|>": 151659,
"<|fim_suffix|>": 151661,
"<|im_end|>": 151645,
"<|im_start|>": 151644,
"<|image_pad|>": 151655,
"<|object_ref_end|>": 151647,
"<|object_ref_start|>": 151646,
"<|quad_end|>": 151651,
"<|quad_start|>": 151650,
"<|repo_name|>": 151663,
"<|video_pad|>": 151656,
"<|vision_end|>": 151653,
"<|vision_pad|>": 151654,
"<|vision_start|>": 151652
}

View File

@@ -0,0 +1,54 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0]['role'] == 'system' %}
{{- messages[0]['content'] }}
{%- else %}
{{- 'You are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.' }}
{%- endif %}
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0]['role'] == 'system' %}
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
{%- else %}
{{- '<|im_start|>system\nYou are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in messages %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{{- '<|im_start|>' + message.role }}
{%- if message.content %}
{{- '\n' + message.content }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{{- tool_call.arguments | tojson }}
{{- '}\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}

View File

@@ -0,0 +1,28 @@
{
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 151643,
"eos_token_id": 151645,
"hidden_act": "silu",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"max_window_layers": 28,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000.0,
"sliding_window": 131072,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.52.3",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 152064
}

View File

@@ -0,0 +1,14 @@
{
"bos_token_id": 151643,
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"repetition_penalty": 1.05,
"temperature": 0.7,
"top_k": 20,
"top_p": 0.8,
"transformers_version": "4.52.3"
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d04233782f31a3a17f60355f213cf1370d89b0c6591c99612859b9f2d686f34f
size 4877660672

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bec13e53a0d25e9aef9e98e49c36b6274846476f180c1d2e2808b990d4c639a1
size 4932750888

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7944c3af5812335838b66c504f8e710e31b07d365b51637558ef87e1435eceff
size 4330865088

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:294ad4d0c516b67e1edd95e135094f23db0e9c09b0494f3cc928535ea46c8673
size 1089994880

View File

@@ -0,0 +1,346 @@
{
"metadata": {
"total_size": 15231233024
},
"weight_map": {
"lm_head.weight": "model-00004-of-00004.safetensors",
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.norm.weight": "model-00003-of-00004.safetensors"
}
}

View File

@@ -0,0 +1,31 @@
{
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
size 11421896

View File

@@ -0,0 +1,207 @@
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151646": {
"content": "<|object_ref_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|object_ref_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151648": {
"content": "<|box_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151649": {
"content": "<|box_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": {},
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null
}

File diff suppressed because one or more lines are too long

156
README.md Normal file
View File

@@ -0,0 +1,156 @@
---
license: apache-2.0
tags:
- materialsanalyst-ai-7b
- MaterialsAnalyst-AI-7B
- materials-science
- computational-materials
- materials-analysis
- chain-of-thought
- reasoning-model
- property-prediction
- materials-discovery
- crystal-structure
- materials-informatics
- scientific-ai
- 7b
- text-generation
- gguf
- quantized
- fine-tuned
- lora
- peft
- json-mode
- structured-output
- materials-engineering
- band-gap-prediction
- computational-chemistry
- materials-characterization
base_model:
- Qwen/Qwen2.5-7B-Instruct
language:
- en
---
![MaterialsAnalyst-AI Logo](Model_Logo.png)
MaterialsAnalyst-AI-7B transforms raw materials data into actionable insights. This **open source** model delivers transparent, step-by-step reasoning followed by structured analysis and practical application recommendations.
## Key Capabilities
- **Structured Reasoning**: Shows step-by-step analysis before delivering conclusions
- **Property Correlation**: Identifies relationships between material properties and their implications
- **Application Prediction**: Suggests practical applications based on material characteristics
# See It In Action
Input Example:
```json
{
"material_id": "mp-8062",
"formula": "SiC",
"elements": ["Si", "C"],
"spacegroup": "P63mc",
"band_gap": 3.26,
"formation_energy_per_atom": -0.73,
"density": 3.21,
"volume": 41.2,
"nsites": 8,
"is_stable": true,
"elastic_modulus": 448,
"bulk_modulus": 220,
"thermal_expansion": 4.2e-06,
"electron_affinity": 4.0,
"ionization_energy": 6.7,
"crystal_system": "Hexagonal",
"magnetic_property": "Non-magnetic",
"thermal_conductivity": 490,
"specific_heat": 0.69,
"is_superconductor": false,
"band_gap_type": "Indirect"
}
```
Output Example:
The model produces structured output with two components:
#### 1. Step-by-Step Reasoning
~~~
<think>
I need to think through how to interpret this SiC entry and assess its characteristics.
First, I should examine the composition and structure. This material contains silicon (Si) and carbon (C) in a hexagonal crystal structure with space group P63mc. This structure suggests high symmetry and anisotropic properties, typical for wide-bandgap materials like SiC.
Next, I should analyze the electronic properties. With a band gap of 3.26 eV that is indirect, this material would be classified as a wide-bandgap semiconductor. This suggests applications in optoelectronics, high-power electronics, and transparent conductive oxides.
For thermodynamic stability, the formation energy per atom of -0.73 eV indicates strong bonding and thermodynamic favorability. The material is stable according to the data, which means it can be synthesized and used under practical conditions without decomposition.
The mechanical properties show a high elastic modulus (448 GPa) and bulk modulus (220 GPa). This suggests the material would be very stiff and resistant to deformation when subjected to stress or pressure.
Regarding thermal behavior, the thermal conductivity of 490 W/m·K and expansion coefficient of 4.2e-6 K^-1 indicate excellent heat dissipation and dimensional stability under temperature changes. This would make it suitable for high-temperature applications like thermal management in electronics or coatings.
The magnetic properties are listed as non-magnetic, which means it won't interact significantly with magnetic fields. This would affect applications in magneto-electronics but is generally beneficial for general semiconductor uses.
Additionally, the material is marked as not superconducting, which is significant because it rules out cryogenic or quantum computing applications where superconductivity is required.
Considering all these properties together, this material would be particularly valuable for high-power electronics, optoelectronics, and thermal management due to its combination of wide bandgap, high thermal conductivity, and mechanical robustness. It might compete with other materials like GaN or AlN but offers advantages in cost (Si-based) and thermal stability.
</think>
~~~
#### 2. Comprehensive Analysis
~~~
<answer>
**Summary of SiC (ID: mp-8062):**
1. **Material Type & Composition**
- Silicon carbide (SiC), a widely studied wide-bandgap semiconductor compound.
2. **Crystal Structure**
- Hexagonal structure with space group P63mc, indicating anisotropic properties and high symmetry.
3. **Electronic Properties**
- Indirect bandgap of 3.26 eV, suitable for optoelectronics and high-power devices.
4. **Thermodynamic Stability**
- Highly stable (formation energy: -0.73 eV/atom), making it practical for synthesis and use.
5. **Mechanical Properties**
- Exceptional stiffness (elastic modulus: 448 GPa) and resistance to compression (bulk modulus: 220 GPa).
6. **Thermal Properties**
- Outstanding thermal conductivity (490 W/m·K) and low expansion coefficient (4.2e-6 K^-1), ideal for heat dissipation.
7. **Magnetic & Conductive Properties**
- Non-magnetic and non-superconducting, limiting use in magnetic applications but beneficial for general electronics.
8. **Applications**
- High-power electronics, optoelectronics, thermal management systems, and abrasion-resistant coatings.
**Verdict**: SiC is a high-performance material with exceptional thermal, mechanical, and electronic properties, making it ideal for demanding applications like power devices and high-temperature environments. Its stability and robustness give it an edge over competing wide-bandgap materials.
</answer>
~~~
# Getting Started
## 1. Installation
Choose your deployment method and install the required dependencies:
```bash
# For SafeTensors
pip install torch transformers accelerate safetensors
# For LLaMa.cpp
pip install llama-cpp-python
```
## 2. Configuration
<u>Download</u> and edit your chosen inference script to customize the analysis:
- **Input data**: Update the `JSON_INPUT` variable with your materials data
- **Model location**: Set the `model_path` variable to your downloaded model directory
## 3. Running Analysis
Run your script and the analysis results will appear in the terminal:
```bash
# For SafeTensors
python Inference_safetensors.py
# For LLaMa.cpp
python Inference_llama.cpp.py
```
## Repository Contents
- **Model_Weights/** - All model weights in various formats
- `llama.cpp/` - LLaMA.cpp compatible weights with various quantization options available
- `safetensors/` - SafeTensors format models
- `LoRA_adapter/` - LoRA adapter weights
- **Scripts/** - <u>Ready-to-use inference scripts</u>
- `Inference_llama.cpp.py` - For LLaMA.cpp deployment
- `Inference_safetensors.py` - For SafeTensors deployment
- **Data/** - Training data
- `Dataset.jsonl` - Complete JSONL training dataset
- **Training/** - Training documentation and logs
- `Training_Logs.txt` - Complete terminal logs from the training process
- `Training_Documentation.txt` - Detailed training specifications and parameters
## Attribution
MaterialsAnalyst-AI-7B was developed by *Raymond Lee*. If you use this model in your work, please include a reference to this repository. As of June 1st, 2026, this repository has reached **10,267** lifetime downloads. Thank you for your support!

View File

@@ -0,0 +1,60 @@
from llama_cpp import Llama
# INSTRUCTIONS: Replace the JSON below with your material's properties
# Common data sources: materialsproject.org, DFT calculations, experimental databases
JSON_INPUT = """
{
"material_id": "mp-8062",
"formula": "SiC",
"elements": [
"Si",
"C"
],
"spacegroup": "P63mc",
"band_gap": 3.26,
"formation_energy_per_atom": -0.73,
"density": 3.21,
"volume": 41.2,
"nsites": 8,
"is_stable": true,
"elastic_modulus": 448,
"bulk_modulus": 220,
"thermal_expansion": 4.2e-06,
"electron_affinity": 4.0,
"ionization_energy": 6.7,
"crystal_system": "Hexagonal",
"magnetic_property": "Non-magnetic",
"thermal_conductivity": 490,
"specific_heat": 0.69,
"is_superconductor": false,
"band_gap_type": "Indirect"
}
"""
model_path = "./" # Path to the directory containing your model weight files
llm = Llama(
model_path=model_path,
n_gpu_layers=29,
n_ctx=10000,
n_threads=4
)
topic = JSON_INPUT.strip()
prompt = f"USER: {topic}\nASSISTANT:"
output = llm(
prompt,
max_tokens=3000,
temperature=0.7,
top_p=0.9,
repeat_penalty=1.1
)
result = output.get("choices", [{}])[0].get("text", "").strip()
print(result)

View File

@@ -0,0 +1,83 @@
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# INSTRUCTIONS: Replace the JSON below with your material's properties
# Common data sources: materialsproject.org, DFT calculations, experimental databases
JSON_INPUT = """
{
"material_id": "mp-8062",
"formula": "SiC",
"elements": [
"Si",
"C"
],
"spacegroup": "P63mc",
"band_gap": 3.26,
"formation_energy_per_atom": -0.73,
"density": 3.21,
"volume": 41.2,
"nsites": 8,
"is_stable": true,
"elastic_modulus": 448,
"bulk_modulus": 220,
"thermal_expansion": 4.2e-06,
"electron_affinity": 4.0,
"ionization_energy": 6.7,
"crystal_system": "Hexagonal",
"magnetic_property": "Non-magnetic",
"thermal_conductivity": 490,
"specific_heat": 0.69,
"is_superconductor": false,
"band_gap_type": "Indirect"
}
"""
def load_model(model_path):
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_path,
trust_remote_code=True
)
return model, tokenizer
def generate_response(model, tokenizer, topic):
topic = topic.strip()
prompt = f"USER: {topic}\nASSISTANT:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=3000,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("ASSISTANT:")[-1].strip()
def run():
model_path = "./" # Path to the directory containing your model weight files
model, tokenizer = load_model(model_path)
result = generate_response(model, tokenizer, JSON_INPUT)
print(result)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,63 @@
MaterialsAnalyst-AI-7B Training Documentation
================================================
Model Training Details
---------------------
Base Model: Qwen 2.5 Instruct 7B
Fine-tuning Method: LoRA (Low-Rank Adaptation)
Training Infrastructure: Single NVIDIA A100 SXM4 GPU
Training Duration: Approximately 5.4 hours
Training Dataset: Custom curated dataset for materials analysis
Dataset Specifications
---------------------
Total Token Count: 6,292,692
Total Sample Count: 6,000
Average Tokens/Sample: 1048.78
Max Token Count: 1,289
Min Token Count: 922
Tokens Counted Using: tiktoken (cl100k_base encoding)
Dataset Creation: Generated using DeepSeekV3 API
Training Configuration
---------------------
LoRA Parameters:
- Rank: 32
- Alpha: 64
- Dropout: 0.1
- Target Modules: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj, lm_head
Training Hyperparameters:
- Learning Rate: 5e-5
- Batch Size: 4
- Gradient Accumulation: 5
- Effective Batch Size: 20
- Max Sequence Length: 2048
- Epochs: 3
- Warmup Ratio: 0.01
- Weight Decay: 0.01
- Max Grad Norm: 1.0
- LR Scheduler: Cosine
Hardware & Environment
---------------------
GPU: NVIDIA A100 SXM4 (40GB)
Operating System: Ubuntu
CUDA Version: 11.8
PyTorch Version: 2.7.0
Compute Capability: 8.0
Optimization: FP16, Gradient Checkpointing
Training Performance
---------------------
Training Runtime: 5.37 hours (19,348 seconds)
Train Samples/Second: 0.884
Train Steps/Second: 0.044
Training Loss (Final): 0.170
Validation Loss (Final): 0.136
Total Training Steps: 855

109
Training/Training_Logs.txt Normal file
View File

@@ -0,0 +1,109 @@
Loading tokenizer...
Loading dataset from ./Dataset.jsonl
Loaded 6000 samples
Training on 5700 samples, validating on 300 samples
Loading model...
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00, 1.23s/it]
Trainable parameters: 85,721,088 (1.11% of 7,701,337,600)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Starting training...
{'loss': 1.0399, 'grad_norm': 0.704595148563385, 'learning_rate': 5e-05, 'epoch': 0.04}
{'loss': 0.5935, 'grad_norm': 0.47508421540260315, 'learning_rate': 4.998276468898823e-05, 'epoch': 0.07}
{'loss': 0.3596, 'grad_norm': 0.313719779253006, 'learning_rate': 4.993108252042854e-05, 'epoch': 0.11}
{'loss': 0.2979, 'grad_norm': 0.31504514813423157, 'learning_rate': 4.9845024754980876e-05, 'epoch': 0.14}
{'loss': 0.2571, 'grad_norm': 0.3241384029388428, 'learning_rate': 4.97247100512334e-05, 'epoch': 0.18}
{'loss': 0.2425, 'grad_norm': 0.31259262561798096, 'learning_rate': 4.9570304302093216e-05, 'epoch': 0.21}
{'loss': 0.2251, 'grad_norm': 0.34634077548980713, 'learning_rate': 4.938202040604898e-05, 'epoch': 0.25}
{'loss': 0.2125, 'grad_norm': 0.33605319261550903, 'learning_rate': 4.916011797362123e-05, 'epoch': 0.28}
{'loss': 0.2091, 'grad_norm': 0.3671645522117615, 'learning_rate': 4.890490296940496e-05, 'epoch': 0.32}
{'loss': 0.1996, 'grad_norm': 0.3266342580318451, 'learning_rate': 4.861672729019797e-05, 'epoch': 0.35}
{'loss': 0.1901, 'grad_norm': 0.37788864970207214, 'learning_rate': 4.829598827979682e-05, 'epoch': 0.39}
{'loss': 0.183, 'grad_norm': 0.3393491208553314, 'learning_rate': 4.794312818112935e-05, 'epoch': 0.42}
{'loss': 0.1823, 'grad_norm': 0.34580835700035095, 'learning_rate': 4.755863352647909e-05, 'epoch': 0.46}
{'loss': 0.183, 'grad_norm': 0.33839499950408936, 'learning_rate': 4.7143034466642464e-05, 'epoch': 0.49}
{'loss': 0.1837, 'grad_norm': 0.3656191825866699, 'learning_rate': 4.669690403994367e-05, 'epoch': 0.53}
{'loss': 0.1711, 'grad_norm': 0.3499873876571655, 'learning_rate': 4.622085738211518e-05, 'epoch': 0.56}
{'loss': 0.1738, 'grad_norm': 0.34751611948013306, 'learning_rate': 4.57155508781333e-05, 'epoch': 0.6}
{'loss': 0.1716, 'grad_norm': 0.3428999185562134, 'learning_rate': 4.518168125717824e-05, 'epoch': 0.63}
{'loss': 0.174, 'grad_norm': 0.37544649839401245, 'learning_rate': 4.4619984631966524e-05, 'epoch': 0.67}
{'loss': 0.1673, 'grad_norm': 0.32618045806884766, 'learning_rate': 4.403123548378055e-05, 'epoch': 0.7}
{'loss': 0.164, 'grad_norm': 0.3500118851661682, 'learning_rate': 4.341624559459447e-05, 'epoch': 0.74}
{'loss': 0.1656, 'grad_norm': 0.35224637389183044, 'learning_rate': 4.2775862927769025e-05, 'epoch': 0.77}
{'loss': 0.1641, 'grad_norm': 0.3303431570529938, 'learning_rate': 4.2110970458858546e-05, 'epoch': 0.81}
{'loss': 0.1628, 'grad_norm': 0.3580450117588043, 'learning_rate': 4.1422484958142326e-05, 'epoch': 0.84}
{'loss': 0.1637, 'grad_norm': 0.3214457035064697, 'learning_rate': 4.071135572655892e-05, 'epoch': 0.88}
{'loss': 0.1626, 'grad_norm': 0.3646449148654938, 'learning_rate': 3.99785632867864e-05, 'epoch': 0.91}
{'loss': 0.1629, 'grad_norm': 0.32008472084999084, 'learning_rate': 3.922511803127329e-05, 'epoch': 0.95}
{'loss': 0.1563, 'grad_norm': 0.33964064717292786, 'learning_rate': 3.845205882908432e-05, 'epoch': 0.98}
{'eval_loss': 0.15612632036209106, 'eval_runtime': 106.8808, 'eval_samples_per_second': 2.807, 'eval_steps_per_second': 0.356, 'epoch': 1.0}
33%|█████████████████████████████████████████▎ | 285/855 [1:47:02<3:31:58, 22.31s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
{'loss': 0.1529, 'grad_norm': 0.33749136328697205, 'learning_rate': 3.766045159348191e-05, 'epoch': 1.02}
{'loss': 0.1499, 'grad_norm': 0.36011260747909546, 'learning_rate': 3.685138781221844e-05, 'epoch': 1.05}
{'loss': 0.1473, 'grad_norm': 0.35543763637542725, 'learning_rate': 3.6025983042565795e-05, 'epoch': 1.09}
{'loss': 0.1463, 'grad_norm': 0.34852930903434753, 'learning_rate': 3.51853753731572e-05, 'epoch': 1.12}
{'loss': 0.1479, 'grad_norm': 0.35828718543052673, 'learning_rate': 3.433072385476237e-05, 'epoch': 1.16}
{'loss': 0.1468, 'grad_norm': 0.33792921900749207, 'learning_rate': 3.3463206902159395e-05, 'epoch': 1.19}
{'loss': 0.1489, 'grad_norm': 0.34330445528030396, 'learning_rate': 3.2584020669307146e-05, 'epoch': 1.23}
{'loss': 0.1501, 'grad_norm': 0.33124926686286926, 'learning_rate': 3.169437740005849e-05, 'epoch': 1.26}
{'loss': 0.1434, 'grad_norm': 0.33036795258522034, 'learning_rate': 3.079550375668821e-05, 'epoch': 1.3}
{'loss': 0.1468, 'grad_norm': 0.33003684878349304, 'learning_rate': 2.9888639128540615e-05, 'epoch': 1.33}
{'loss': 0.1455, 'grad_norm': 0.3746543824672699, 'learning_rate': 2.8975033923128642e-05, 'epoch': 1.37}
{'loss': 0.1489, 'grad_norm': 0.3552297353744507, 'learning_rate': 2.8055947842040862e-05, 'epoch': 1.4}
{'loss': 0.1436, 'grad_norm': 0.35433629155158997, 'learning_rate': 2.713264814403362e-05, 'epoch': 1.44}
{'loss': 0.1477, 'grad_norm': 0.36622723937034607, 'learning_rate': 2.6206407897703095e-05, 'epoch': 1.47}
{'loss': 0.1428, 'grad_norm': 0.32289159297943115, 'learning_rate': 2.5278504226146636e-05, 'epoch': 1.51}
{'loss': 0.143, 'grad_norm': 0.3453490734100342, 'learning_rate': 2.4350216546033738e-05, 'epoch': 1.54}
{'loss': 0.1398, 'grad_norm': 0.35188964009284973, 'learning_rate': 2.3422824803514384e-05, 'epoch': 1.58}
{'loss': 0.1415, 'grad_norm': 0.36733925342559814, 'learning_rate': 2.2497607709397543e-05, 'epoch': 1.61}
{'loss': 0.1424, 'grad_norm': 0.34543827176094055, 'learning_rate': 2.1575840976032867e-05, 'epoch': 1.65}
{'loss': 0.1401, 'grad_norm': 0.34187060594558716, 'learning_rate': 2.0658795558326743e-05, 'epoch': 1.68}
{'loss': 0.1366, 'grad_norm': 0.386068731546402, 'learning_rate': 1.974773590131805e-05, 'epoch': 1.72}
{'loss': 0.1404, 'grad_norm': 0.34624332189559937, 'learning_rate': 1.884391819672991e-05, 'epoch': 1.75}
{'loss': 0.1405, 'grad_norm': 0.32291245460510254, 'learning_rate': 1.794858865090123e-05, 'epoch': 1.79}
{'loss': 0.1422, 'grad_norm': 0.3514968454837799, 'learning_rate': 1.7062981766486437e-05, 'epoch': 1.82}
{'loss': 0.1389, 'grad_norm': 0.3431772291660309, 'learning_rate': 1.618831864029251e-05, 'epoch': 1.86}
{'loss': 0.1363, 'grad_norm': 0.3377828896045685, 'learning_rate': 1.5325805279600286e-05, 'epoch': 1.89}
{'loss': 0.1373, 'grad_norm': 0.3381596803665161, 'learning_rate': 1.447663093929163e-05, 'epoch': 1.93}
{'loss': 0.1387, 'grad_norm': 0.3349764049053192, 'learning_rate': 1.3641966482075208e-05, 'epoch': 1.96}
{'loss': 0.1376, 'grad_norm': 0.3536580801010132, 'learning_rate': 1.282296276407189e-05, 'epoch': 2.0}
{'eval_loss': 0.13975860178470612, 'eval_runtime': 106.9279, 'eval_samples_per_second': 2.806, 'eval_steps_per_second': 0.355, 'epoch': 2.0}
67%|██████████████████████████████████████████████████████████████████████████████████▋ | 570/855 [3:35:09<1:46:05, 22.33s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
{'loss': 0.1307, 'grad_norm': 0.34761759638786316, 'learning_rate': 1.2020749047985627e-05, 'epoch': 2.04}
{'loss': 0.1275, 'grad_norm': 0.3375226557254791, 'learning_rate': 1.1236431446047985e-05, 'epoch': 2.07}
{'loss': 0.1263, 'grad_norm': 0.382931113243103, 'learning_rate': 1.0471091394883086e-05, 'epoch': 2.11}
{'loss': 0.1299, 'grad_norm': 0.3302248418331146, 'learning_rate': 9.72578416439587e-06, 'epoch': 2.14}
{'loss': 0.1316, 'grad_norm': 0.3511696457862854, 'learning_rate': 9.001537402739656e-06, 'epoch': 2.18}
{'loss': 0.1333, 'grad_norm': 0.3526867926120758, 'learning_rate': 8.29934971936938e-06, 'epoch': 2.21}
{'loss': 0.1284, 'grad_norm': 0.3544050455093384, 'learning_rate': 7.620189308133943e-06, 'epoch': 2.25}
{'loss': 0.1278, 'grad_norm': 0.34632644057273865, 'learning_rate': 6.964992612306526e-06, 'epoch': 2.28}
{'loss': 0.13, 'grad_norm': 0.3530580997467041, 'learning_rate': 6.334663033393229e-06, 'epoch': 2.32}
{'loss': 0.1307, 'grad_norm': 0.4017309546470642, 'learning_rate': 5.730069685500669e-06, 'epoch': 2.35}
{'loss': 0.1356, 'grad_norm': 0.3461137115955353, 'learning_rate': 5.1520461969797565e-06, 'epoch': 2.39}
{'loss': 0.1276, 'grad_norm': 0.34974604845046997, 'learning_rate': 4.60138956099824e-06, 'epoch': 2.42}
{'loss': 0.1284, 'grad_norm': 0.3364820182323456, 'learning_rate': 4.078859036626676e-06, 'epoch': 2.46}
{'loss': 0.1252, 'grad_norm': 0.33579540252685547, 'learning_rate': 3.5851751019531088e-06, 'epoch': 2.49}
{'loss': 0.1257, 'grad_norm': 0.3575705587863922, 'learning_rate': 3.121018460669986e-06, 'epoch': 2.53}
{'loss': 0.1283, 'grad_norm': 0.34064194560050964, 'learning_rate': 2.687029103502972e-06, 'epoch': 2.56}
{'loss': 0.1279, 'grad_norm': 0.337855726480484, 'learning_rate': 2.283805425775784e-06, 'epoch': 2.6}
{'loss': 0.1285, 'grad_norm': 0.35349616408348083, 'learning_rate': 1.9119034023278637e-06, 'epoch': 2.63}
{'loss': 0.1319, 'grad_norm': 0.34553083777427673, 'learning_rate': 1.5718358209224153e-06, 'epoch': 2.67}
{'loss': 0.1283, 'grad_norm': 0.3534978926181793, 'learning_rate': 1.2640715752018778e-06, 'epoch': 2.7}
{'loss': 0.1294, 'grad_norm': 0.3369212746620178, 'learning_rate': 9.890350181657126e-07, 'epoch': 2.74}
{'loss': 0.1242, 'grad_norm': 0.3364659249782562, 'learning_rate': 7.471053770619352e-07, 'epoch': 2.77}
{'loss': 0.128, 'grad_norm': 0.33932891488075256, 'learning_rate': 5.386162304991394e-07, 'epoch': 2.81}
{'loss': 0.1295, 'grad_norm': 0.36040279269218445, 'learning_rate': 3.638550485000031e-07, 'epoch': 2.84}
{'loss': 0.128, 'grad_norm': 0.3814302086830139, 'learning_rate': 2.230627961304993e-07, 'epoch': 2.88}
{'loss': 0.1271, 'grad_norm': 0.3205372095108032, 'learning_rate': 1.1643360125123126e-07, 'epoch': 2.91}
{'loss': 0.131, 'grad_norm': 0.37936437129974365, 'learning_rate': 4.411448684913666e-08, 'epoch': 2.95}
{'loss': 0.1306, 'grad_norm': 0.3545021116733551, 'learning_rate': 6.205168318523802e-09, 'epoch': 2.98}
{'eval_loss': 0.1355518102645874, 'eval_runtime': 105.9093, 'eval_samples_per_second': 2.833, 'eval_steps_per_second': 0.359, 'epoch': 3.0}
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 855/855 [5:22:24<00:00, 22.17s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
{'train_runtime': 19348.838, 'train_samples_per_second': 0.884, 'train_steps_per_second': 0.044, 'train_loss': 0.17026110399536223, 'epoch': 3.0}
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 855/855 [5:22:29<00:00, 22.63s/it]
Saving model...
/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
Model saved to ./MaterialsAnalyst-AI-7B_LoRA_adapter