初始化项目,由ModelHub XC社区提供模型
Model: CaffeineThief/ttp_sft_kanana-1.5_steps_tram-step1-seed44 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
136
README.md
Normal file
136
README.md
Normal file
@@ -0,0 +1,136 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: apache-2.0
|
||||
base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
|
||||
tags:
|
||||
- axolotl
|
||||
- generated_from_trainer
|
||||
datasets:
|
||||
- tram2_train_step1.jsonl
|
||||
model-index:
|
||||
- name: ttp_sft_kanana-1.5_steps_tram-step1-seed44
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
||||
<details><summary>See axolotl config</summary>
|
||||
|
||||
axolotl version: `0.12.2`
|
||||
```yaml
|
||||
base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
|
||||
hf_cache_dir: ../../../../data5/models
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
datasets:
|
||||
- path: tram2_train_step1.jsonl
|
||||
type: chat_template
|
||||
split: train
|
||||
seed: 44
|
||||
|
||||
dataset_prepared_path: preprocess
|
||||
val_set_size: 0
|
||||
output_dir: ./outputs-kanana-steps-tram-step1-seed44
|
||||
dataloader_num_workers: 32
|
||||
seed: 44
|
||||
|
||||
sequence_len: 3072
|
||||
sample_packing: false
|
||||
eval_sample_packing: false
|
||||
pad_to_sequence_len: false
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.liger.LigerPlugin
|
||||
liger_rope: true
|
||||
liger_rms_norm: true
|
||||
liger_swiglu: true
|
||||
liger_fused_linear_cross_entropy: true
|
||||
|
||||
wandb_project: TTP_SFT_LLM_RE
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name: ttp_sft_kanana-1.5_steps_tram-step1-seed44
|
||||
wandb_log_model:
|
||||
hub_model_id: CaffeineThief/ttp_sft_kanana-1.5_steps_tram-step1-seed44
|
||||
hub_private_repo: false
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
# micro_batch_size: 16 # GPU 3장
|
||||
micro_batch_size: 24 # GPU 2장
|
||||
num_epochs: 3
|
||||
optimizer: adamw_torch_fused
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 2e-5
|
||||
|
||||
bf16: auto
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: false
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
|
||||
warmup_ratio: 0.05
|
||||
weight_decay: 0.01
|
||||
evals_per_epoch: 1
|
||||
saves_per_epoch: 1
|
||||
|
||||
fsdp:
|
||||
- full_shard
|
||||
- auto_wrap
|
||||
fsdp_config:
|
||||
fsdp_state_dict_type: FULL_STATE_DICT
|
||||
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
||||
fsdp_activation_checkpointing: true
|
||||
```
|
||||
|
||||
</details><br>
|
||||
|
||||
# ttp_sft_kanana-1.5_steps_tram-step1-seed44
|
||||
|
||||
This model is a fine-tuned version of [kakaocorp/kanana-1.5-2.1b-instruct-2505](https://huggingface.co/kakaocorp/kanana-1.5-2.1b-instruct-2505) on the tram2_train_step1.jsonl dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 2e-05
|
||||
- train_batch_size: 24
|
||||
- eval_batch_size: 24
|
||||
- seed: 44
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 2
|
||||
- total_train_batch_size: 48
|
||||
- total_eval_batch_size: 48
|
||||
- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_steps: 10
|
||||
- training_steps: 204
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.55.2
|
||||
- Pytorch 2.6.0+cu124
|
||||
- Datasets 4.0.0
|
||||
- Tokenizers 0.21.4
|
||||
320
chat_template.jinja
Normal file
320
chat_template.jinja
Normal file
@@ -0,0 +1,320 @@
|
||||
{# version=v3-llama3.1 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}
|
||||
{%- set offset = "" -%}
|
||||
{%- if depth >= 1 -%}
|
||||
{%- set offset = " " * depth -%}
|
||||
{%- endif -%}
|
||||
{%- if comment_info != "<|NONE|>" -%}
|
||||
{{ "\n" + offset + comment_info }}
|
||||
{%- if examples_info | length > 0 -%}
|
||||
{# Append each example info #}
|
||||
{%- for example in examples_info -%}
|
||||
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{{ "\n" + offset + param_declaration }}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro convert_data_type(param_type) -%}
|
||||
{%- if param_type == "integer" or param_type == "float" -%}
|
||||
{{ "number" }}
|
||||
{%- else -%}
|
||||
{{ param_type }}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_param_type(param) -%}
|
||||
{%- set param_type = "any" -%}
|
||||
|
||||
{%- if "type" in param -%}
|
||||
{%- set raw_param_type = param["type"] -%}
|
||||
{%- if raw_param_type is iterable and raw_param_type is not string -%}
|
||||
{%- set param_type = raw_param_type | join(" | ") -%}
|
||||
{%- else -%}
|
||||
{%- set param_type = raw_param_type -%}
|
||||
{%- endif -%}
|
||||
{{ convert_data_type(param_type) }}
|
||||
{%- elif "oneOf" in param -%}
|
||||
{%- set one_of_types = param["oneOf"]|selectattr("type", "defined")|list -%}
|
||||
{%- set one_of_types = one_of_types|map(attribute="type")|unique|list -%}
|
||||
{{ convert_data_type(one_of_types | join(" | ")) }}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_format_param(param) -%}
|
||||
{%- if "format" in param -%}
|
||||
{{ param["format"] }}
|
||||
{%- elif "oneOf" in param -%}
|
||||
{%- set formats = [] -%}
|
||||
{%- for item in param["oneOf"] -%}
|
||||
{%- if "format" in item -%}
|
||||
{%- if item["format"] == param["oneOf"][-1]["format"] -%}
|
||||
{{ item["format"] }}
|
||||
{%- else -%}
|
||||
{{ item["format"] + " or "}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{ "<|NONE|>" }}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_param_info(param) -%}
|
||||
{%- set param_type = param.get("type", "any") -%}
|
||||
{%- set format_param = get_format_param(param) -%}
|
||||
|
||||
{%- if "description" in param or "default" in param or format_param != "<|NONE|>" or param["maximum"] or param["minimum"] or param["maxLength"] or param["minLength"] -%}
|
||||
{{ "//" }}
|
||||
{%- if "description" in param -%}
|
||||
{%- set desc = param["description"] -%}
|
||||
{%- if not desc.endswith(".") -%}
|
||||
{%- set desc = desc + "." -%}
|
||||
{%- endif -%}
|
||||
{{ " " + desc }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if "default" in param -%}
|
||||
{%- set default_value = param["default"] -%}
|
||||
{%- if param_type == "string" -%}
|
||||
{%- set default_value = '"' ~ default_value ~ '"' -%}
|
||||
{%- endif -%}
|
||||
{{ " Default=" ~ default_value ~ "." }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set format_param = get_format_param(param) -%}
|
||||
{%- if format_param != "<|NONE|>" -%}
|
||||
{{ " Format=" ~ format_param }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- for field, field_name in [("maximum", "Maximum"), ("minimum", "Minimum"), ("maxLength", "Maximum length"), ("minLength", "Minimum length")] -%}
|
||||
{%- if field in param -%}
|
||||
{{ " " + field_name ~ "=" ~ param[field] }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{ "<|NONE|>"}}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_enum_option_str(enum_options) -%}
|
||||
{%- for v in enum_options -%}
|
||||
{%- if v is string -%}
|
||||
{{ '"' + v + '"' }}
|
||||
{%- else -%}
|
||||
{{ v }}
|
||||
{%- endif -%}
|
||||
{%- if enum_options|length > 0 and v != enum_options[-1] -%}
|
||||
{{ " | " }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_array_typescript(param_name, param_dic, depth) -%}
|
||||
{%- set offset = '' -%}
|
||||
{%- if depth >= 1 -%}
|
||||
{%- set offset = " " * depth -%}
|
||||
{%- endif -%}
|
||||
{%- set items_info = param_dic.get('items', {}) -%}
|
||||
|
||||
{%- if items_info|length == 0 -%}
|
||||
{%- if param_name -%}
|
||||
{{ "\n" + offset + param_name + ": []" }}
|
||||
{%- else -%}
|
||||
{{ "\n" + offset + "[]" }}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{%- set array_type = get_param_type(items_info) -%}
|
||||
{%- if array_type == 'object' -%}
|
||||
{%- if param_name -%}
|
||||
{{ "\n" + offset + param_name + ": {" }}
|
||||
{%- else -%}
|
||||
{{ "\n" + offset + "{" }}
|
||||
{%- endif -%}
|
||||
{{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}
|
||||
{{- "\n" + offset + "}[]" }}
|
||||
{%- elif array_type == 'array' -%}
|
||||
{%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}
|
||||
{%- if not param_name -%}
|
||||
{{ "\n" + item_info + "[]" }}
|
||||
{%- else -%}
|
||||
{{ "\n" + offset + param_name + ": " + item_info|trim + "[]" }}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{%- if 'enum' in items_info -%}
|
||||
{%- set item_type = get_enum_option_str(items_info['enum']) -%}
|
||||
{%- if param_name is none -%}
|
||||
{{ "(" + item_type + ")[]"}}
|
||||
{%- else -%}
|
||||
{{ "\n" + offset + param_name + ": (" + item_type + ")[]" }}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{%- if param_name is none -%}
|
||||
{{ "\n" + array_type + "[]" }}
|
||||
{%- else -%}
|
||||
{{ "\n" + offset + param_name + ": " + array_type + "[]," }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}
|
||||
{%- set res = "" -%}
|
||||
{%- for param_name, param in properties.items() -%}
|
||||
{%- if param is mapping -%}
|
||||
{%- set comment_info = get_param_info(param) -%}
|
||||
{# Param Examples #}
|
||||
{%- set examples_info = [] -%}
|
||||
{%- if "examples" in param -%}
|
||||
{%- set examples_info = ["Example " + param_name + ":"] -%}
|
||||
{%- set examples_info = examples_info + param["examples"] -%}
|
||||
{%- endif -%}
|
||||
|
||||
{# Param Name declaration #}
|
||||
{%- set param_declaration = param_name -%}
|
||||
{%- if required_params is iterable and param_name not in required_params -%}
|
||||
{%- set param_declaration = param_declaration + "?" -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set param_type = get_param_type(param) -%}
|
||||
|
||||
{# Handle indentation based on depth #}
|
||||
{%- set offset = "" -%}
|
||||
{%- if depth >= 1 -%}
|
||||
{%- set offset = " " * depth -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if param_type == "object" -%}
|
||||
{%- if comment_info != "<|NONE|>" -%}
|
||||
{{ "\n" + offset + comment_info }}
|
||||
{%- endif -%}
|
||||
{%- if examples_info|length > 0 -%}
|
||||
{%- for example in examples_info -%}
|
||||
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- set param_declaration = param_declaration + ": {" -%}
|
||||
{{ "\n" + offset + param_declaration -}}
|
||||
{{- get_parameter_typescript(param.get("properties", {}), param.get("required", []), depth + 1) -}}
|
||||
{{- "\n" + offset + "}," }}
|
||||
{%- elif param_type == "array" -%}
|
||||
{%- set item_info = param.get("items", {}) -%}
|
||||
{%- if "type" not in item_info -%}
|
||||
{%- set param_declaration = param_declaration + ": []," -%}
|
||||
{{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}
|
||||
{%- else -%}
|
||||
{%- if comment_info != "<|NONE|>" -%}
|
||||
{{ "\n" + offset + comment_info }}
|
||||
{%- endif -%}
|
||||
{%- if examples_info|length > 0 -%}
|
||||
{%- for example in examples_info -%}
|
||||
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}
|
||||
{%- if not array_declaration.endswith(",") -%}
|
||||
{%- set array_declaration = array_declaration + "," -%}
|
||||
{%- endif -%}
|
||||
{{ array_declaration}}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{%- if "enum" in param -%}
|
||||
{%- set param_type = get_enum_option_str(param["enum"]) -%}
|
||||
{%- endif -%}
|
||||
{%- if "nullable" in param and param["nullable"] -%}
|
||||
{%- set param_type = param_type + " | null" -%}
|
||||
{%- endif -%}
|
||||
{%- set param_declaration = param_declaration + ": " + param_type + "," -%}
|
||||
{{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro generate_schema_from_functions(functions, namespace='functions') -%}
|
||||
{{ "// Supported function definitions that should be called when necessary.\n" -}}
|
||||
{{- "namespace " + namespace + " {\n\n" -}}
|
||||
|
||||
{%- for function in functions -%}
|
||||
{%- if function.get("function") -%}
|
||||
{%- set function = function.get("function") -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set function_name = function.get("name") -%}
|
||||
{%- if function_name -%}
|
||||
{%- set description = function.get('description', '') -%}
|
||||
{%- set parameters = function.get('parameters', {}) -%}
|
||||
{{- "// " + description + "\n" -}}
|
||||
{{- "type " + function_name -}}
|
||||
{%- if parameters and parameters.get("properties") -%}
|
||||
{{- " = (_: {" -}}
|
||||
{%- set required_params = parameters.get("required", []) -%}
|
||||
{{ get_parameter_typescript(parameters.get("properties"), required_params, 0) -}}
|
||||
{{- "\n}) => any;\n\n" }}
|
||||
{%- else -%}
|
||||
{{ " = () => any;\n\n" }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{ "} // namespace " + namespace }}
|
||||
{%- endmacro -%}
|
||||
{%- if not tools is defined -%}
|
||||
{%- set tools = none -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set has_code_interpreter = tools | selectattr("type", "equalto", "code_interpreter") | list | length > 0 -%}
|
||||
{%- if has_code_interpreter -%}
|
||||
{%- set tools = tools | rejectattr("type", "equalto", "code_interpreter") | list -%}
|
||||
{%- endif -%}
|
||||
|
||||
{#- System message + builtin tools #}
|
||||
{{- bos_token + "<|start_header_id|>system<|end_header_id|>\n\n" }}
|
||||
{%- if has_code_interpreter %}
|
||||
{{- "Environment: ipython\n\n" }}
|
||||
{%- else -%}
|
||||
{{ "\n"}}
|
||||
{%- endif %}
|
||||
{%- if tools %}
|
||||
{{- "\nYou have access to the following functions:\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{%- if "type" in t -%}
|
||||
{{ "Use the function '" + t["function"]["name"] + "' to '" + t["function"]["description"] + "'\n" + t["function"] | tojson() }}
|
||||
{%- else -%}
|
||||
{{ "Use the function '" + t["name"] + "' to '" + t["description"] + "'\n" + t | tojson }}
|
||||
{%- endif -%}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{{- '\nThink very carefully before calling functions.\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- If looking for real time information use relevant functions before falling back to brave_search\n- Function calls MUST follow the specified format, start with <function= and end with </function>\n- Required parameters MUST be specified\n- Only call one function at a time\n- Put the entire function call reply on one line\n\n' -}}
|
||||
{%- endif %}
|
||||
{{- "<|eot_id|>" -}}
|
||||
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'user' or message['role'] == 'system' -%}
|
||||
{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
{{ '<|start_header_id|>ipython<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
|
||||
{%- else -%}
|
||||
{%- if (message['content'] and message['content']|length > 0) or ('tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0) -%}
|
||||
{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}
|
||||
{%- endif -%}
|
||||
{%- if message['content'] and message['content']|length > 0 -%}
|
||||
{{ message['content'] }}
|
||||
{%- endif -%}
|
||||
{%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0 -%}
|
||||
{%- for tool_call in message['tool_calls'] -%}
|
||||
{%- if tool_call["function"]["name"] == "python" -%}
|
||||
{{ '<|python_tag|>' + tool_call['function']['arguments'] }}
|
||||
{%- else -%}
|
||||
{{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] + '</function>' }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{ '<|eom_id|>' }}
|
||||
{%- elif message['content'] and message['content']|length > 0 -%}
|
||||
{{ '<|eot_id|>' }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
|
||||
{%- endif -%}
|
||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128009,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1792,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 8064,
|
||||
"max_position_embeddings": 32768,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 24,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 128001,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 8000000.0,
|
||||
"tie_word_embeddings": true,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.55.2",
|
||||
"use_cache": false,
|
||||
"vocab_size": 128259
|
||||
}
|
||||
8
generation_config.json
Normal file
8
generation_config.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 128009,
|
||||
"pad_token_id": 128001,
|
||||
"transformers_version": "4.55.2"
|
||||
}
|
||||
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aefec96ae72e76c2a40a402b5a4a1d2a04d696a2bd49eb3992324394d7d9670d
|
||||
size 4982330224
|
||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e2a7f80a0bf8d51c89fdad2fdc0e0255b08e3a656c90b2088b12731aee29d15d
|
||||
size 4285002608
|
||||
299
model.safetensors.index.json
Normal file
299
model.safetensors.index.json
Normal file
@@ -0,0 +1,299 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 1043492352,
|
||||
"total_size": 9267299328
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00002-of-00002.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.norm.weight": "model-00002-of-00002.safetensors"
|
||||
}
|
||||
}
|
||||
23
special_tokens_map.json
Normal file
23
special_tokens_map.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|eot_id|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|end_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:31d17e500c82ff116fc14d4dd0106a08e49bd3512d8a81473047121ef8ea09f9
|
||||
size 17210524
|
||||
2087
tokenizer_config.json
Normal file
2087
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3fb3f3447ec4048af4198631986cbf6c67af6f8301b981960c0a848d7cf670a4
|
||||
size 7224
|
||||
Reference in New Issue
Block a user