初始化项目,由ModelHub XC社区提供模型
Model: Raymond-dev-546730/MaterialsAnalyst-AI-7B Source: Original Platform
This commit is contained in:
59
.gitattributes
vendored
Normal file
59
.gitattributes
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Data/Train-Ready.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/LoRA_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/safetensors/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-f16.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Data/Dataset.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
3
Data/Dataset.jsonl
Normal file
3
Data/Dataset.jsonl
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c853a7dfbfb768d14b47bf45a458aa5d516274707af788e7c1476f43b2027635
|
||||
size 28585142
|
||||
BIN
Model_Logo.png
Normal file
BIN
Model_Logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 74 KiB |
40
Model_Weights/LoRA_adapter/adapter_config.json
Normal file
40
Model_Weights/LoRA_adapter/adapter_config.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"alpha_pattern": {},
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
||||
"bias": "none",
|
||||
"corda_config": null,
|
||||
"eva_config": null,
|
||||
"exclude_modules": null,
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layer_replication": null,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"loftq_config": {},
|
||||
"lora_alpha": 64,
|
||||
"lora_bias": false,
|
||||
"lora_dropout": 0.1,
|
||||
"megatron_config": null,
|
||||
"megatron_core": "megatron.core",
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 32,
|
||||
"rank_pattern": {},
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"q_proj",
|
||||
"down_proj",
|
||||
"k_proj",
|
||||
"lm_head",
|
||||
"v_proj",
|
||||
"gate_proj",
|
||||
"o_proj",
|
||||
"up_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM",
|
||||
"trainable_token_indices": null,
|
||||
"use_dora": false,
|
||||
"use_rslora": false
|
||||
}
|
||||
3
Model_Weights/LoRA_adapter/adapter_model.safetensors
Normal file
3
Model_Weights/LoRA_adapter/adapter_model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1e5643f7e70f81b24b0fb369804a434ddaf21cb0b4985ecb6f364cbb1a108207
|
||||
size 1432932264
|
||||
24
Model_Weights/LoRA_adapter/added_tokens.json
Normal file
24
Model_Weights/LoRA_adapter/added_tokens.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"</tool_call>": 151658,
|
||||
"<tool_call>": 151657,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
54
Model_Weights/LoRA_adapter/chat_template.jinja
Normal file
54
Model_Weights/LoRA_adapter/chat_template.jinja
Normal file
@@ -0,0 +1,54 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
151388
Model_Weights/LoRA_adapter/merges.txt
Normal file
151388
Model_Weights/LoRA_adapter/merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
31
Model_Weights/LoRA_adapter/special_tokens_map.json
Normal file
31
Model_Weights/LoRA_adapter/special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
Model_Weights/LoRA_adapter/tokenizer.json
Normal file
3
Model_Weights/LoRA_adapter/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:694f1174c5bdf94e2fc50796c0f1733a5a3945ff110b0dfa40ea0701cc9c9c42
|
||||
size 11422176
|
||||
207
Model_Weights/LoRA_adapter/tokenizer_config.json
Normal file
207
Model_Weights/LoRA_adapter/tokenizer_config.json
Normal file
@@ -0,0 +1,207 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1
Model_Weights/LoRA_adapter/vocab.json
Normal file
1
Model_Weights/LoRA_adapter/vocab.json
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9b4da075007398d121f9e440325f72e7152df8bf03e751f6315e1d2c100b9348
|
||||
size 3346253248
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:df602ce82f4631acf3cc3843fb70a2deade9f8da0b685481acc3eeec68611292
|
||||
size 4463271360
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a5418d385cd002eca9ed77dbcf153908e24da1747981138e4a7aa69f260073a7
|
||||
size 4250295744
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q2_K.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q2_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:39bc1dd076b896e63d55314d4748e31f64187d4d0eff5f3734dc903043d5d6da
|
||||
size 3015937472
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4c96411a6d918ce2bd25d9564f7717e4d86550f8724844aab1044a6d02d65f79
|
||||
size 3808388544
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:34efd7373c761354d808462a4011d77c476f93c503f353e68bcc76c6dd2718e8
|
||||
size 3492365760
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_0.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6b03a8c6d9d749cfc82c71d9330725f7fb3fb91fc8e568341ef02c30dbb43ea5
|
||||
size 4431388096
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_1.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q4_1.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c13a3390601d63b785dabfa18765ec6f02f4866d13a4c08404c0eeea3beaf88b
|
||||
size 4873280960
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:692e14d891038a02e0cd85a2519ac51e636e13c1e524354196c89dcdfb150222
|
||||
size 4683070912
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6e5f540d8a7202afa2310e76b4062f902a2c7d5883eefed527bb37cb726ecf93
|
||||
size 4457766336
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_0.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eccbf47537fca23a1fc24e151d703858334e2b7ab7bfddc7f8423a63dfc763a5
|
||||
size 5315173824
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_1.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q5_1.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4c051833d407c8b7eed550c16e464ed4347b1ea184ba285bb735a964dc2bb43b
|
||||
size 5757066688
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80f1c5c480f6203583246881e3af08db383714478ef98f6d8a573b61638a4faa
|
||||
size 5444828608
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4a30fe86b09fa3c62d6211e7f5f80bf272cc0bd96f58c2d90ebf8af89ae3eca2
|
||||
size 5315173824
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q6_K.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q6_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fcfbecd774f0f94c10be45d1e93451d56a1ef8ba026e4040e8865e21b14873e0
|
||||
size 6254196160
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q8_0.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:546116b6b810420dbef5de2067440f4f24d451cbda855ac1ebf5b032b272e101
|
||||
size 8098522560
|
||||
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-f16.gguf
Normal file
3
Model_Weights/llama.cpp/MaterialsAnalyst-AI-7B-f16.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6774ccf0edc76a3e498590e0ec801620d9aecbde5c1089cc8ce1b8ba245282d8
|
||||
size 15237850560
|
||||
24
Model_Weights/safetensors/added_tokens.json
Normal file
24
Model_Weights/safetensors/added_tokens.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"</tool_call>": 151658,
|
||||
"<tool_call>": 151657,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
54
Model_Weights/safetensors/chat_template.jinja
Normal file
54
Model_Weights/safetensors/chat_template.jinja
Normal file
@@ -0,0 +1,54 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are MaterialsAnalyst-AI-7B. You are a helpful assistant with expertise in materials science.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
28
Model_Weights/safetensors/config.json
Normal file
28
Model_Weights/safetensors/config.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen2ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"eos_token_id": 151645,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 3584,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 18944,
|
||||
"max_position_embeddings": 32768,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen2",
|
||||
"num_attention_heads": 28,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 4,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": 131072,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.52.3",
|
||||
"use_cache": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 152064
|
||||
}
|
||||
14
Model_Weights/safetensors/generation_config.json
Normal file
14
Model_Weights/safetensors/generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 151643,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"repetition_penalty": 1.05,
|
||||
"temperature": 0.7,
|
||||
"top_k": 20,
|
||||
"top_p": 0.8,
|
||||
"transformers_version": "4.52.3"
|
||||
}
|
||||
151388
Model_Weights/safetensors/merges.txt
Normal file
151388
Model_Weights/safetensors/merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d04233782f31a3a17f60355f213cf1370d89b0c6591c99612859b9f2d686f34f
|
||||
size 4877660672
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bec13e53a0d25e9aef9e98e49c36b6274846476f180c1d2e2808b990d4c639a1
|
||||
size 4932750888
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7944c3af5812335838b66c504f8e710e31b07d365b51637558ef87e1435eceff
|
||||
size 4330865088
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:294ad4d0c516b67e1edd95e135094f23db0e9c09b0494f3cc928535ea46c8673
|
||||
size 1089994880
|
||||
346
Model_Weights/safetensors/model.safetensors.index.json
Normal file
346
Model_Weights/safetensors/model.safetensors.index.json
Normal file
@@ -0,0 +1,346 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 15231233024
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00003-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
31
Model_Weights/safetensors/special_tokens_map.json
Normal file
31
Model_Weights/safetensors/special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
Model_Weights/safetensors/tokenizer.json
Normal file
3
Model_Weights/safetensors/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
||||
size 11421896
|
||||
207
Model_Weights/safetensors/tokenizer_config.json
Normal file
207
Model_Weights/safetensors/tokenizer_config.json
Normal file
@@ -0,0 +1,207 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1
Model_Weights/safetensors/vocab.json
Normal file
1
Model_Weights/safetensors/vocab.json
Normal file
File diff suppressed because one or more lines are too long
156
README.md
Normal file
156
README.md
Normal file
@@ -0,0 +1,156 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- materialsanalyst-ai-7b
|
||||
- MaterialsAnalyst-AI-7B
|
||||
- materials-science
|
||||
- computational-materials
|
||||
- materials-analysis
|
||||
- chain-of-thought
|
||||
- reasoning-model
|
||||
- property-prediction
|
||||
- materials-discovery
|
||||
- crystal-structure
|
||||
- materials-informatics
|
||||
- scientific-ai
|
||||
- 7b
|
||||
- text-generation
|
||||
- gguf
|
||||
- quantized
|
||||
- fine-tuned
|
||||
- lora
|
||||
- peft
|
||||
- json-mode
|
||||
- structured-output
|
||||
- materials-engineering
|
||||
- band-gap-prediction
|
||||
- computational-chemistry
|
||||
- materials-characterization
|
||||
base_model:
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
language:
|
||||
- en
|
||||
---
|
||||
|
||||

|
||||
MaterialsAnalyst-AI-7B transforms raw materials data into actionable insights. This **open source** model delivers transparent, step-by-step reasoning followed by structured analysis and practical application recommendations.
|
||||
|
||||
## Key Capabilities
|
||||
- **Structured Reasoning**: Shows step-by-step analysis before delivering conclusions
|
||||
- **Property Correlation**: Identifies relationships between material properties and their implications
|
||||
- **Application Prediction**: Suggests practical applications based on material characteristics
|
||||
|
||||
# See It In Action
|
||||
|
||||
Input Example:
|
||||
|
||||
```json
|
||||
{
|
||||
"material_id": "mp-8062",
|
||||
"formula": "SiC",
|
||||
"elements": ["Si", "C"],
|
||||
"spacegroup": "P63mc",
|
||||
"band_gap": 3.26,
|
||||
"formation_energy_per_atom": -0.73,
|
||||
"density": 3.21,
|
||||
"volume": 41.2,
|
||||
"nsites": 8,
|
||||
"is_stable": true,
|
||||
"elastic_modulus": 448,
|
||||
"bulk_modulus": 220,
|
||||
"thermal_expansion": 4.2e-06,
|
||||
"electron_affinity": 4.0,
|
||||
"ionization_energy": 6.7,
|
||||
"crystal_system": "Hexagonal",
|
||||
"magnetic_property": "Non-magnetic",
|
||||
"thermal_conductivity": 490,
|
||||
"specific_heat": 0.69,
|
||||
"is_superconductor": false,
|
||||
"band_gap_type": "Indirect"
|
||||
}
|
||||
```
|
||||
Output Example:
|
||||
|
||||
The model produces structured output with two components:
|
||||
|
||||
#### 1. Step-by-Step Reasoning
|
||||
~~~
|
||||
<think>
|
||||
I need to think through how to interpret this SiC entry and assess its characteristics.
|
||||
First, I should examine the composition and structure. This material contains silicon (Si) and carbon (C) in a hexagonal crystal structure with space group P63mc. This structure suggests high symmetry and anisotropic properties, typical for wide-bandgap materials like SiC.
|
||||
Next, I should analyze the electronic properties. With a band gap of 3.26 eV that is indirect, this material would be classified as a wide-bandgap semiconductor. This suggests applications in optoelectronics, high-power electronics, and transparent conductive oxides.
|
||||
For thermodynamic stability, the formation energy per atom of -0.73 eV indicates strong bonding and thermodynamic favorability. The material is stable according to the data, which means it can be synthesized and used under practical conditions without decomposition.
|
||||
The mechanical properties show a high elastic modulus (448 GPa) and bulk modulus (220 GPa). This suggests the material would be very stiff and resistant to deformation when subjected to stress or pressure.
|
||||
Regarding thermal behavior, the thermal conductivity of 490 W/m·K and expansion coefficient of 4.2e-6 K^-1 indicate excellent heat dissipation and dimensional stability under temperature changes. This would make it suitable for high-temperature applications like thermal management in electronics or coatings.
|
||||
The magnetic properties are listed as non-magnetic, which means it won't interact significantly with magnetic fields. This would affect applications in magneto-electronics but is generally beneficial for general semiconductor uses.
|
||||
Additionally, the material is marked as not superconducting, which is significant because it rules out cryogenic or quantum computing applications where superconductivity is required.
|
||||
Considering all these properties together, this material would be particularly valuable for high-power electronics, optoelectronics, and thermal management due to its combination of wide bandgap, high thermal conductivity, and mechanical robustness. It might compete with other materials like GaN or AlN but offers advantages in cost (Si-based) and thermal stability.
|
||||
</think>
|
||||
~~~
|
||||
|
||||
#### 2. Comprehensive Analysis
|
||||
~~~
|
||||
<answer>
|
||||
**Summary of SiC (ID: mp-8062):**
|
||||
1. **Material Type & Composition**
|
||||
- Silicon carbide (SiC), a widely studied wide-bandgap semiconductor compound.
|
||||
2. **Crystal Structure**
|
||||
- Hexagonal structure with space group P63mc, indicating anisotropic properties and high symmetry.
|
||||
3. **Electronic Properties**
|
||||
- Indirect bandgap of 3.26 eV, suitable for optoelectronics and high-power devices.
|
||||
4. **Thermodynamic Stability**
|
||||
- Highly stable (formation energy: -0.73 eV/atom), making it practical for synthesis and use.
|
||||
5. **Mechanical Properties**
|
||||
- Exceptional stiffness (elastic modulus: 448 GPa) and resistance to compression (bulk modulus: 220 GPa).
|
||||
6. **Thermal Properties**
|
||||
- Outstanding thermal conductivity (490 W/m·K) and low expansion coefficient (4.2e-6 K^-1), ideal for heat dissipation.
|
||||
7. **Magnetic & Conductive Properties**
|
||||
- Non-magnetic and non-superconducting, limiting use in magnetic applications but beneficial for general electronics.
|
||||
8. **Applications**
|
||||
- High-power electronics, optoelectronics, thermal management systems, and abrasion-resistant coatings.
|
||||
**Verdict**: SiC is a high-performance material with exceptional thermal, mechanical, and electronic properties, making it ideal for demanding applications like power devices and high-temperature environments. Its stability and robustness give it an edge over competing wide-bandgap materials.
|
||||
</answer>
|
||||
~~~
|
||||
|
||||
# Getting Started
|
||||
|
||||
## 1. Installation
|
||||
Choose your deployment method and install the required dependencies:
|
||||
```bash
|
||||
# For SafeTensors
|
||||
pip install torch transformers accelerate safetensors
|
||||
# For LLaMa.cpp
|
||||
pip install llama-cpp-python
|
||||
```
|
||||
|
||||
## 2. Configuration
|
||||
<u>Download</u> and edit your chosen inference script to customize the analysis:
|
||||
- **Input data**: Update the `JSON_INPUT` variable with your materials data
|
||||
- **Model location**: Set the `model_path` variable to your downloaded model directory
|
||||
|
||||
## 3. Running Analysis
|
||||
Run your script and the analysis results will appear in the terminal:
|
||||
```bash
|
||||
# For SafeTensors
|
||||
python Inference_safetensors.py
|
||||
# For LLaMa.cpp
|
||||
python Inference_llama.cpp.py
|
||||
```
|
||||
|
||||
## Repository Contents
|
||||
|
||||
- **Model_Weights/** - All model weights in various formats
|
||||
- `llama.cpp/` - LLaMA.cpp compatible weights with various quantization options available
|
||||
- `safetensors/` - SafeTensors format models
|
||||
- `LoRA_adapter/` - LoRA adapter weights
|
||||
- **Scripts/** - <u>Ready-to-use inference scripts</u>
|
||||
- `Inference_llama.cpp.py` - For LLaMA.cpp deployment
|
||||
- `Inference_safetensors.py` - For SafeTensors deployment
|
||||
- **Data/** - Training data
|
||||
- `Dataset.jsonl` - Complete JSONL training dataset
|
||||
- **Training/** - Training documentation and logs
|
||||
- `Training_Logs.txt` - Complete terminal logs from the training process
|
||||
- `Training_Documentation.txt` - Detailed training specifications and parameters
|
||||
|
||||
## Attribution
|
||||
MaterialsAnalyst-AI-7B was developed by *Raymond Lee*. If you use this model in your work, please include a reference to this repository. As of June 1st, 2026, this repository has reached **10,267** lifetime downloads. Thank you for your support!
|
||||
60
Scripts/Inference_llama.cpp.py
Normal file
60
Scripts/Inference_llama.cpp.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from llama_cpp import Llama
|
||||
|
||||
# INSTRUCTIONS: Replace the JSON below with your material's properties
|
||||
# Common data sources: materialsproject.org, DFT calculations, experimental databases
|
||||
|
||||
JSON_INPUT = """
|
||||
{
|
||||
"material_id": "mp-8062",
|
||||
"formula": "SiC",
|
||||
"elements": [
|
||||
"Si",
|
||||
"C"
|
||||
],
|
||||
"spacegroup": "P63mc",
|
||||
"band_gap": 3.26,
|
||||
"formation_energy_per_atom": -0.73,
|
||||
"density": 3.21,
|
||||
"volume": 41.2,
|
||||
"nsites": 8,
|
||||
"is_stable": true,
|
||||
"elastic_modulus": 448,
|
||||
"bulk_modulus": 220,
|
||||
"thermal_expansion": 4.2e-06,
|
||||
"electron_affinity": 4.0,
|
||||
"ionization_energy": 6.7,
|
||||
"crystal_system": "Hexagonal",
|
||||
"magnetic_property": "Non-magnetic",
|
||||
"thermal_conductivity": 490,
|
||||
"specific_heat": 0.69,
|
||||
"is_superconductor": false,
|
||||
"band_gap_type": "Indirect"
|
||||
}
|
||||
"""
|
||||
|
||||
model_path = "./" # Path to the directory containing your model weight files
|
||||
|
||||
llm = Llama(
|
||||
model_path=model_path,
|
||||
n_gpu_layers=29,
|
||||
n_ctx=10000,
|
||||
n_threads=4
|
||||
)
|
||||
|
||||
topic = JSON_INPUT.strip()
|
||||
prompt = f"USER: {topic}\nASSISTANT:"
|
||||
|
||||
|
||||
output = llm(
|
||||
prompt,
|
||||
max_tokens=3000,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
repeat_penalty=1.1
|
||||
)
|
||||
|
||||
result = output.get("choices", [{}])[0].get("text", "").strip()
|
||||
|
||||
print(result)
|
||||
|
||||
|
||||
83
Scripts/Inference_safetensors.py
Normal file
83
Scripts/Inference_safetensors.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# INSTRUCTIONS: Replace the JSON below with your material's properties
|
||||
# Common data sources: materialsproject.org, DFT calculations, experimental databases
|
||||
|
||||
JSON_INPUT = """
|
||||
{
|
||||
"material_id": "mp-8062",
|
||||
"formula": "SiC",
|
||||
"elements": [
|
||||
"Si",
|
||||
"C"
|
||||
],
|
||||
"spacegroup": "P63mc",
|
||||
"band_gap": 3.26,
|
||||
"formation_energy_per_atom": -0.73,
|
||||
"density": 3.21,
|
||||
"volume": 41.2,
|
||||
"nsites": 8,
|
||||
"is_stable": true,
|
||||
"elastic_modulus": 448,
|
||||
"bulk_modulus": 220,
|
||||
"thermal_expansion": 4.2e-06,
|
||||
"electron_affinity": 4.0,
|
||||
"ionization_energy": 6.7,
|
||||
"crystal_system": "Hexagonal",
|
||||
"magnetic_property": "Non-magnetic",
|
||||
"thermal_conductivity": 490,
|
||||
"specific_heat": 0.69,
|
||||
"is_superconductor": false,
|
||||
"band_gap_type": "Indirect"
|
||||
}
|
||||
"""
|
||||
|
||||
def load_model(model_path):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
torch_dtype=torch.float16,
|
||||
device_map="auto",
|
||||
trust_remote_code=True
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_path,
|
||||
trust_remote_code=True
|
||||
)
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
def generate_response(model, tokenizer, topic):
|
||||
topic = topic.strip()
|
||||
|
||||
prompt = f"USER: {topic}\nASSISTANT:"
|
||||
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
||||
|
||||
outputs = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=3000,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
repetition_penalty=1.1,
|
||||
do_sample=True
|
||||
)
|
||||
|
||||
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
return response.split("ASSISTANT:")[-1].strip()
|
||||
|
||||
def run():
|
||||
model_path = "./" # Path to the directory containing your model weight files
|
||||
|
||||
model, tokenizer = load_model(model_path)
|
||||
|
||||
result = generate_response(model, tokenizer, JSON_INPUT)
|
||||
|
||||
print(result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
||||
|
||||
63
Training/Training_Documentation.txt
Normal file
63
Training/Training_Documentation.txt
Normal file
@@ -0,0 +1,63 @@
|
||||
MaterialsAnalyst-AI-7B Training Documentation
|
||||
================================================
|
||||
|
||||
Model Training Details
|
||||
---------------------
|
||||
|
||||
Base Model: Qwen 2.5 Instruct 7B
|
||||
Fine-tuning Method: LoRA (Low-Rank Adaptation)
|
||||
Training Infrastructure: Single NVIDIA A100 SXM4 GPU
|
||||
Training Duration: Approximately 5.4 hours
|
||||
Training Dataset: Custom curated dataset for materials analysis
|
||||
|
||||
Dataset Specifications
|
||||
---------------------
|
||||
|
||||
Total Token Count: 6,292,692
|
||||
Total Sample Count: 6,000
|
||||
Average Tokens/Sample: 1048.78
|
||||
Max Token Count: 1,289
|
||||
Min Token Count: 922
|
||||
Tokens Counted Using: tiktoken (cl100k_base encoding)
|
||||
Dataset Creation: Generated using DeepSeekV3 API
|
||||
|
||||
Training Configuration
|
||||
---------------------
|
||||
|
||||
LoRA Parameters:
|
||||
- Rank: 32
|
||||
- Alpha: 64
|
||||
- Dropout: 0.1
|
||||
- Target Modules: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj, lm_head
|
||||
|
||||
Training Hyperparameters:
|
||||
- Learning Rate: 5e-5
|
||||
- Batch Size: 4
|
||||
- Gradient Accumulation: 5
|
||||
- Effective Batch Size: 20
|
||||
- Max Sequence Length: 2048
|
||||
- Epochs: 3
|
||||
- Warmup Ratio: 0.01
|
||||
- Weight Decay: 0.01
|
||||
- Max Grad Norm: 1.0
|
||||
- LR Scheduler: Cosine
|
||||
|
||||
Hardware & Environment
|
||||
---------------------
|
||||
|
||||
GPU: NVIDIA A100 SXM4 (40GB)
|
||||
Operating System: Ubuntu
|
||||
CUDA Version: 11.8
|
||||
PyTorch Version: 2.7.0
|
||||
Compute Capability: 8.0
|
||||
Optimization: FP16, Gradient Checkpointing
|
||||
|
||||
Training Performance
|
||||
---------------------
|
||||
|
||||
Training Runtime: 5.37 hours (19,348 seconds)
|
||||
Train Samples/Second: 0.884
|
||||
Train Steps/Second: 0.044
|
||||
Training Loss (Final): 0.170
|
||||
Validation Loss (Final): 0.136
|
||||
Total Training Steps: 855
|
||||
109
Training/Training_Logs.txt
Normal file
109
Training/Training_Logs.txt
Normal file
@@ -0,0 +1,109 @@
|
||||
Loading tokenizer...
|
||||
Loading dataset from ./Dataset.jsonl
|
||||
Loaded 6000 samples
|
||||
Training on 5700 samples, validating on 300 samples
|
||||
Loading model...
|
||||
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00, 1.23s/it]
|
||||
Trainable parameters: 85,721,088 (1.11% of 7,701,337,600)
|
||||
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
|
||||
Starting training...
|
||||
{'loss': 1.0399, 'grad_norm': 0.704595148563385, 'learning_rate': 5e-05, 'epoch': 0.04}
|
||||
{'loss': 0.5935, 'grad_norm': 0.47508421540260315, 'learning_rate': 4.998276468898823e-05, 'epoch': 0.07}
|
||||
{'loss': 0.3596, 'grad_norm': 0.313719779253006, 'learning_rate': 4.993108252042854e-05, 'epoch': 0.11}
|
||||
{'loss': 0.2979, 'grad_norm': 0.31504514813423157, 'learning_rate': 4.9845024754980876e-05, 'epoch': 0.14}
|
||||
{'loss': 0.2571, 'grad_norm': 0.3241384029388428, 'learning_rate': 4.97247100512334e-05, 'epoch': 0.18}
|
||||
{'loss': 0.2425, 'grad_norm': 0.31259262561798096, 'learning_rate': 4.9570304302093216e-05, 'epoch': 0.21}
|
||||
{'loss': 0.2251, 'grad_norm': 0.34634077548980713, 'learning_rate': 4.938202040604898e-05, 'epoch': 0.25}
|
||||
{'loss': 0.2125, 'grad_norm': 0.33605319261550903, 'learning_rate': 4.916011797362123e-05, 'epoch': 0.28}
|
||||
{'loss': 0.2091, 'grad_norm': 0.3671645522117615, 'learning_rate': 4.890490296940496e-05, 'epoch': 0.32}
|
||||
{'loss': 0.1996, 'grad_norm': 0.3266342580318451, 'learning_rate': 4.861672729019797e-05, 'epoch': 0.35}
|
||||
{'loss': 0.1901, 'grad_norm': 0.37788864970207214, 'learning_rate': 4.829598827979682e-05, 'epoch': 0.39}
|
||||
{'loss': 0.183, 'grad_norm': 0.3393491208553314, 'learning_rate': 4.794312818112935e-05, 'epoch': 0.42}
|
||||
{'loss': 0.1823, 'grad_norm': 0.34580835700035095, 'learning_rate': 4.755863352647909e-05, 'epoch': 0.46}
|
||||
{'loss': 0.183, 'grad_norm': 0.33839499950408936, 'learning_rate': 4.7143034466642464e-05, 'epoch': 0.49}
|
||||
{'loss': 0.1837, 'grad_norm': 0.3656191825866699, 'learning_rate': 4.669690403994367e-05, 'epoch': 0.53}
|
||||
{'loss': 0.1711, 'grad_norm': 0.3499873876571655, 'learning_rate': 4.622085738211518e-05, 'epoch': 0.56}
|
||||
{'loss': 0.1738, 'grad_norm': 0.34751611948013306, 'learning_rate': 4.57155508781333e-05, 'epoch': 0.6}
|
||||
{'loss': 0.1716, 'grad_norm': 0.3428999185562134, 'learning_rate': 4.518168125717824e-05, 'epoch': 0.63}
|
||||
{'loss': 0.174, 'grad_norm': 0.37544649839401245, 'learning_rate': 4.4619984631966524e-05, 'epoch': 0.67}
|
||||
{'loss': 0.1673, 'grad_norm': 0.32618045806884766, 'learning_rate': 4.403123548378055e-05, 'epoch': 0.7}
|
||||
{'loss': 0.164, 'grad_norm': 0.3500118851661682, 'learning_rate': 4.341624559459447e-05, 'epoch': 0.74}
|
||||
{'loss': 0.1656, 'grad_norm': 0.35224637389183044, 'learning_rate': 4.2775862927769025e-05, 'epoch': 0.77}
|
||||
{'loss': 0.1641, 'grad_norm': 0.3303431570529938, 'learning_rate': 4.2110970458858546e-05, 'epoch': 0.81}
|
||||
{'loss': 0.1628, 'grad_norm': 0.3580450117588043, 'learning_rate': 4.1422484958142326e-05, 'epoch': 0.84}
|
||||
{'loss': 0.1637, 'grad_norm': 0.3214457035064697, 'learning_rate': 4.071135572655892e-05, 'epoch': 0.88}
|
||||
{'loss': 0.1626, 'grad_norm': 0.3646449148654938, 'learning_rate': 3.99785632867864e-05, 'epoch': 0.91}
|
||||
{'loss': 0.1629, 'grad_norm': 0.32008472084999084, 'learning_rate': 3.922511803127329e-05, 'epoch': 0.95}
|
||||
{'loss': 0.1563, 'grad_norm': 0.33964064717292786, 'learning_rate': 3.845205882908432e-05, 'epoch': 0.98}
|
||||
{'eval_loss': 0.15612632036209106, 'eval_runtime': 106.8808, 'eval_samples_per_second': 2.807, 'eval_steps_per_second': 0.356, 'epoch': 1.0}
|
||||
33%|█████████████████████████████████████████▎ | 285/855 [1:47:02<3:31:58, 22.31s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
|
||||
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
|
||||
{'loss': 0.1529, 'grad_norm': 0.33749136328697205, 'learning_rate': 3.766045159348191e-05, 'epoch': 1.02}
|
||||
{'loss': 0.1499, 'grad_norm': 0.36011260747909546, 'learning_rate': 3.685138781221844e-05, 'epoch': 1.05}
|
||||
{'loss': 0.1473, 'grad_norm': 0.35543763637542725, 'learning_rate': 3.6025983042565795e-05, 'epoch': 1.09}
|
||||
{'loss': 0.1463, 'grad_norm': 0.34852930903434753, 'learning_rate': 3.51853753731572e-05, 'epoch': 1.12}
|
||||
{'loss': 0.1479, 'grad_norm': 0.35828718543052673, 'learning_rate': 3.433072385476237e-05, 'epoch': 1.16}
|
||||
{'loss': 0.1468, 'grad_norm': 0.33792921900749207, 'learning_rate': 3.3463206902159395e-05, 'epoch': 1.19}
|
||||
{'loss': 0.1489, 'grad_norm': 0.34330445528030396, 'learning_rate': 3.2584020669307146e-05, 'epoch': 1.23}
|
||||
{'loss': 0.1501, 'grad_norm': 0.33124926686286926, 'learning_rate': 3.169437740005849e-05, 'epoch': 1.26}
|
||||
{'loss': 0.1434, 'grad_norm': 0.33036795258522034, 'learning_rate': 3.079550375668821e-05, 'epoch': 1.3}
|
||||
{'loss': 0.1468, 'grad_norm': 0.33003684878349304, 'learning_rate': 2.9888639128540615e-05, 'epoch': 1.33}
|
||||
{'loss': 0.1455, 'grad_norm': 0.3746543824672699, 'learning_rate': 2.8975033923128642e-05, 'epoch': 1.37}
|
||||
{'loss': 0.1489, 'grad_norm': 0.3552297353744507, 'learning_rate': 2.8055947842040862e-05, 'epoch': 1.4}
|
||||
{'loss': 0.1436, 'grad_norm': 0.35433629155158997, 'learning_rate': 2.713264814403362e-05, 'epoch': 1.44}
|
||||
{'loss': 0.1477, 'grad_norm': 0.36622723937034607, 'learning_rate': 2.6206407897703095e-05, 'epoch': 1.47}
|
||||
{'loss': 0.1428, 'grad_norm': 0.32289159297943115, 'learning_rate': 2.5278504226146636e-05, 'epoch': 1.51}
|
||||
{'loss': 0.143, 'grad_norm': 0.3453490734100342, 'learning_rate': 2.4350216546033738e-05, 'epoch': 1.54}
|
||||
{'loss': 0.1398, 'grad_norm': 0.35188964009284973, 'learning_rate': 2.3422824803514384e-05, 'epoch': 1.58}
|
||||
{'loss': 0.1415, 'grad_norm': 0.36733925342559814, 'learning_rate': 2.2497607709397543e-05, 'epoch': 1.61}
|
||||
{'loss': 0.1424, 'grad_norm': 0.34543827176094055, 'learning_rate': 2.1575840976032867e-05, 'epoch': 1.65}
|
||||
{'loss': 0.1401, 'grad_norm': 0.34187060594558716, 'learning_rate': 2.0658795558326743e-05, 'epoch': 1.68}
|
||||
{'loss': 0.1366, 'grad_norm': 0.386068731546402, 'learning_rate': 1.974773590131805e-05, 'epoch': 1.72}
|
||||
{'loss': 0.1404, 'grad_norm': 0.34624332189559937, 'learning_rate': 1.884391819672991e-05, 'epoch': 1.75}
|
||||
{'loss': 0.1405, 'grad_norm': 0.32291245460510254, 'learning_rate': 1.794858865090123e-05, 'epoch': 1.79}
|
||||
{'loss': 0.1422, 'grad_norm': 0.3514968454837799, 'learning_rate': 1.7062981766486437e-05, 'epoch': 1.82}
|
||||
{'loss': 0.1389, 'grad_norm': 0.3431772291660309, 'learning_rate': 1.618831864029251e-05, 'epoch': 1.86}
|
||||
{'loss': 0.1363, 'grad_norm': 0.3377828896045685, 'learning_rate': 1.5325805279600286e-05, 'epoch': 1.89}
|
||||
{'loss': 0.1373, 'grad_norm': 0.3381596803665161, 'learning_rate': 1.447663093929163e-05, 'epoch': 1.93}
|
||||
{'loss': 0.1387, 'grad_norm': 0.3349764049053192, 'learning_rate': 1.3641966482075208e-05, 'epoch': 1.96}
|
||||
{'loss': 0.1376, 'grad_norm': 0.3536580801010132, 'learning_rate': 1.282296276407189e-05, 'epoch': 2.0}
|
||||
{'eval_loss': 0.13975860178470612, 'eval_runtime': 106.9279, 'eval_samples_per_second': 2.806, 'eval_steps_per_second': 0.355, 'epoch': 2.0}
|
||||
67%|██████████████████████████████████████████████████████████████████████████████████▋ | 570/855 [3:35:09<1:46:05, 22.33s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
|
||||
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
|
||||
{'loss': 0.1307, 'grad_norm': 0.34761759638786316, 'learning_rate': 1.2020749047985627e-05, 'epoch': 2.04}
|
||||
{'loss': 0.1275, 'grad_norm': 0.3375226557254791, 'learning_rate': 1.1236431446047985e-05, 'epoch': 2.07}
|
||||
{'loss': 0.1263, 'grad_norm': 0.382931113243103, 'learning_rate': 1.0471091394883086e-05, 'epoch': 2.11}
|
||||
{'loss': 0.1299, 'grad_norm': 0.3302248418331146, 'learning_rate': 9.72578416439587e-06, 'epoch': 2.14}
|
||||
{'loss': 0.1316, 'grad_norm': 0.3511696457862854, 'learning_rate': 9.001537402739656e-06, 'epoch': 2.18}
|
||||
{'loss': 0.1333, 'grad_norm': 0.3526867926120758, 'learning_rate': 8.29934971936938e-06, 'epoch': 2.21}
|
||||
{'loss': 0.1284, 'grad_norm': 0.3544050455093384, 'learning_rate': 7.620189308133943e-06, 'epoch': 2.25}
|
||||
{'loss': 0.1278, 'grad_norm': 0.34632644057273865, 'learning_rate': 6.964992612306526e-06, 'epoch': 2.28}
|
||||
{'loss': 0.13, 'grad_norm': 0.3530580997467041, 'learning_rate': 6.334663033393229e-06, 'epoch': 2.32}
|
||||
{'loss': 0.1307, 'grad_norm': 0.4017309546470642, 'learning_rate': 5.730069685500669e-06, 'epoch': 2.35}
|
||||
{'loss': 0.1356, 'grad_norm': 0.3461137115955353, 'learning_rate': 5.1520461969797565e-06, 'epoch': 2.39}
|
||||
{'loss': 0.1276, 'grad_norm': 0.34974604845046997, 'learning_rate': 4.60138956099824e-06, 'epoch': 2.42}
|
||||
{'loss': 0.1284, 'grad_norm': 0.3364820182323456, 'learning_rate': 4.078859036626676e-06, 'epoch': 2.46}
|
||||
{'loss': 0.1252, 'grad_norm': 0.33579540252685547, 'learning_rate': 3.5851751019531088e-06, 'epoch': 2.49}
|
||||
{'loss': 0.1257, 'grad_norm': 0.3575705587863922, 'learning_rate': 3.121018460669986e-06, 'epoch': 2.53}
|
||||
{'loss': 0.1283, 'grad_norm': 0.34064194560050964, 'learning_rate': 2.687029103502972e-06, 'epoch': 2.56}
|
||||
{'loss': 0.1279, 'grad_norm': 0.337855726480484, 'learning_rate': 2.283805425775784e-06, 'epoch': 2.6}
|
||||
{'loss': 0.1285, 'grad_norm': 0.35349616408348083, 'learning_rate': 1.9119034023278637e-06, 'epoch': 2.63}
|
||||
{'loss': 0.1319, 'grad_norm': 0.34553083777427673, 'learning_rate': 1.5718358209224153e-06, 'epoch': 2.67}
|
||||
{'loss': 0.1283, 'grad_norm': 0.3534978926181793, 'learning_rate': 1.2640715752018778e-06, 'epoch': 2.7}
|
||||
{'loss': 0.1294, 'grad_norm': 0.3369212746620178, 'learning_rate': 9.890350181657126e-07, 'epoch': 2.74}
|
||||
{'loss': 0.1242, 'grad_norm': 0.3364659249782562, 'learning_rate': 7.471053770619352e-07, 'epoch': 2.77}
|
||||
{'loss': 0.128, 'grad_norm': 0.33932891488075256, 'learning_rate': 5.386162304991394e-07, 'epoch': 2.81}
|
||||
{'loss': 0.1295, 'grad_norm': 0.36040279269218445, 'learning_rate': 3.638550485000031e-07, 'epoch': 2.84}
|
||||
{'loss': 0.128, 'grad_norm': 0.3814302086830139, 'learning_rate': 2.230627961304993e-07, 'epoch': 2.88}
|
||||
{'loss': 0.1271, 'grad_norm': 0.3205372095108032, 'learning_rate': 1.1643360125123126e-07, 'epoch': 2.91}
|
||||
{'loss': 0.131, 'grad_norm': 0.37936437129974365, 'learning_rate': 4.411448684913666e-08, 'epoch': 2.95}
|
||||
{'loss': 0.1306, 'grad_norm': 0.3545021116733551, 'learning_rate': 6.205168318523802e-09, 'epoch': 2.98}
|
||||
{'eval_loss': 0.1355518102645874, 'eval_runtime': 105.9093, 'eval_samples_per_second': 2.833, 'eval_steps_per_second': 0.359, 'epoch': 3.0}
|
||||
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 855/855 [5:22:24<00:00, 22.17s/it/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
|
||||
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
|
||||
{'train_runtime': 19348.838, 'train_samples_per_second': 0.884, 'train_steps_per_second': 0.044, 'train_loss': 0.17026110399536223, 'epoch': 3.0}
|
||||
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 855/855 [5:22:29<00:00, 22.63s/it]
|
||||
Saving model...
|
||||
/venv/main/lib/python3.10/site-packages/peft/utils/save_and_load.py:220: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.
|
||||
warnings.warn("Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.")
|
||||
Model saved to ./MaterialsAnalyst-AI-7B_LoRA_adapter
|
||||
Reference in New Issue
Block a user