初始化项目,由ModelHub XC社区提供模型
Model: CaffeineThief/ttp_sft_kanana-1.5_steps_tram2_step4 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
133
README.md
Normal file
133
README.md
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
---
|
||||||
|
library_name: transformers
|
||||||
|
license: apache-2.0
|
||||||
|
base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
|
||||||
|
tags:
|
||||||
|
- axolotl
|
||||||
|
- generated_from_trainer
|
||||||
|
datasets:
|
||||||
|
- tram2_train_step4_re.jsonl
|
||||||
|
model-index:
|
||||||
|
- name: ttp_sft_kanana-1.5_steps_tram2_step4
|
||||||
|
results: []
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||||
|
should probably proofread and complete it, then remove this comment. -->
|
||||||
|
|
||||||
|
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
|
<details><summary>See axolotl config</summary>
|
||||||
|
|
||||||
|
axolotl version: `0.12.2`
|
||||||
|
```yaml
|
||||||
|
base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
|
||||||
|
hf_cache_dir: ../../../../data5/models
|
||||||
|
|
||||||
|
load_in_8bit: false
|
||||||
|
load_in_4bit: false
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: tram2_train_step4_re.jsonl
|
||||||
|
type: chat_template
|
||||||
|
split: train
|
||||||
|
|
||||||
|
dataset_prepared_path: preprocess
|
||||||
|
val_set_size: 0
|
||||||
|
output_dir: ./outputs-kanana-steps-tram2-step4
|
||||||
|
dataloader_num_workers: 32
|
||||||
|
|
||||||
|
sequence_len: 3072
|
||||||
|
sample_packing: false
|
||||||
|
eval_sample_packing: false
|
||||||
|
pad_to_sequence_len: false
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.liger.LigerPlugin
|
||||||
|
liger_rope: true
|
||||||
|
liger_rms_norm: true
|
||||||
|
liger_swiglu: true
|
||||||
|
liger_fused_linear_cross_entropy: true
|
||||||
|
|
||||||
|
wandb_project: TTP_SFT_LLM_RE
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name: ttp_sft_kanana-1.5_steps_tram2_step4
|
||||||
|
wandb_log_model:
|
||||||
|
hub_model_id: CaffeineThief/ttp_sft_kanana-1.5_steps_tram2_step4
|
||||||
|
hub_private_repo: false
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 1
|
||||||
|
micro_batch_size: 16
|
||||||
|
num_epochs: 3
|
||||||
|
optimizer: adamw_torch_fused
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 2e-5
|
||||||
|
|
||||||
|
bf16: auto
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: false
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_ratio: 0.05
|
||||||
|
weight_decay: 0.01
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
|
||||||
|
fsdp:
|
||||||
|
- full_shard
|
||||||
|
- auto_wrap
|
||||||
|
fsdp_config:
|
||||||
|
fsdp_state_dict_type: FULL_STATE_DICT
|
||||||
|
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
||||||
|
fsdp_activation_checkpointing: true
|
||||||
|
```
|
||||||
|
|
||||||
|
</details><br>
|
||||||
|
|
||||||
|
# ttp_sft_kanana-1.5_steps_tram2_step4
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [kakaocorp/kanana-1.5-2.1b-instruct-2505](https://huggingface.co/kakaocorp/kanana-1.5-2.1b-instruct-2505) on the tram2_train_step4_re.jsonl dataset.
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training and evaluation data
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
### Training hyperparameters
|
||||||
|
|
||||||
|
The following hyperparameters were used during training:
|
||||||
|
- learning_rate: 2e-05
|
||||||
|
- train_batch_size: 16
|
||||||
|
- eval_batch_size: 16
|
||||||
|
- seed: 42
|
||||||
|
- distributed_type: multi-GPU
|
||||||
|
- num_devices: 3
|
||||||
|
- total_train_batch_size: 48
|
||||||
|
- total_eval_batch_size: 48
|
||||||
|
- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||||
|
- lr_scheduler_type: cosine
|
||||||
|
- lr_scheduler_warmup_steps: 10
|
||||||
|
- training_steps: 204
|
||||||
|
|
||||||
|
### Training results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- Transformers 4.55.2
|
||||||
|
- Pytorch 2.6.0+cu124
|
||||||
|
- Datasets 4.0.0
|
||||||
|
- Tokenizers 0.21.4
|
||||||
320
chat_template.jinja
Normal file
320
chat_template.jinja
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
{# version=v3-llama3.1 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}
|
||||||
|
{%- set offset = "" -%}
|
||||||
|
{%- if depth >= 1 -%}
|
||||||
|
{%- set offset = " " * depth -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if comment_info != "<|NONE|>" -%}
|
||||||
|
{{ "\n" + offset + comment_info }}
|
||||||
|
{%- if examples_info | length > 0 -%}
|
||||||
|
{# Append each example info #}
|
||||||
|
{%- for example in examples_info -%}
|
||||||
|
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ "\n" + offset + param_declaration }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro convert_data_type(param_type) -%}
|
||||||
|
{%- if param_type == "integer" or param_type == "float" -%}
|
||||||
|
{{ "number" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ param_type }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_param_type(param) -%}
|
||||||
|
{%- set param_type = "any" -%}
|
||||||
|
|
||||||
|
{%- if "type" in param -%}
|
||||||
|
{%- set raw_param_type = param["type"] -%}
|
||||||
|
{%- if raw_param_type is iterable and raw_param_type is not string -%}
|
||||||
|
{%- set param_type = raw_param_type | join(" | ") -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- set param_type = raw_param_type -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ convert_data_type(param_type) }}
|
||||||
|
{%- elif "oneOf" in param -%}
|
||||||
|
{%- set one_of_types = param["oneOf"]|selectattr("type", "defined")|list -%}
|
||||||
|
{%- set one_of_types = one_of_types|map(attribute="type")|unique|list -%}
|
||||||
|
{{ convert_data_type(one_of_types | join(" | ")) }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_format_param(param) -%}
|
||||||
|
{%- if "format" in param -%}
|
||||||
|
{{ param["format"] }}
|
||||||
|
{%- elif "oneOf" in param -%}
|
||||||
|
{%- set formats = [] -%}
|
||||||
|
{%- for item in param["oneOf"] -%}
|
||||||
|
{%- if "format" in item -%}
|
||||||
|
{%- if item["format"] == param["oneOf"][-1]["format"] -%}
|
||||||
|
{{ item["format"] }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ item["format"] + " or "}}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "<|NONE|>" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_param_info(param) -%}
|
||||||
|
{%- set param_type = param.get("type", "any") -%}
|
||||||
|
{%- set format_param = get_format_param(param) -%}
|
||||||
|
|
||||||
|
{%- if "description" in param or "default" in param or format_param != "<|NONE|>" or param["maximum"] or param["minimum"] or param["maxLength"] or param["minLength"] -%}
|
||||||
|
{{ "//" }}
|
||||||
|
{%- if "description" in param -%}
|
||||||
|
{%- set desc = param["description"] -%}
|
||||||
|
{%- if not desc.endswith(".") -%}
|
||||||
|
{%- set desc = desc + "." -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ " " + desc }}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- if "default" in param -%}
|
||||||
|
{%- set default_value = param["default"] -%}
|
||||||
|
{%- if param_type == "string" -%}
|
||||||
|
{%- set default_value = '"' ~ default_value ~ '"' -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ " Default=" ~ default_value ~ "." }}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set format_param = get_format_param(param) -%}
|
||||||
|
{%- if format_param != "<|NONE|>" -%}
|
||||||
|
{{ " Format=" ~ format_param }}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- for field, field_name in [("maximum", "Maximum"), ("minimum", "Minimum"), ("maxLength", "Maximum length"), ("minLength", "Minimum length")] -%}
|
||||||
|
{%- if field in param -%}
|
||||||
|
{{ " " + field_name ~ "=" ~ param[field] }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "<|NONE|>"}}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_enum_option_str(enum_options) -%}
|
||||||
|
{%- for v in enum_options -%}
|
||||||
|
{%- if v is string -%}
|
||||||
|
{{ '"' + v + '"' }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ v }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if enum_options|length > 0 and v != enum_options[-1] -%}
|
||||||
|
{{ " | " }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_array_typescript(param_name, param_dic, depth) -%}
|
||||||
|
{%- set offset = '' -%}
|
||||||
|
{%- if depth >= 1 -%}
|
||||||
|
{%- set offset = " " * depth -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set items_info = param_dic.get('items', {}) -%}
|
||||||
|
|
||||||
|
{%- if items_info|length == 0 -%}
|
||||||
|
{%- if param_name -%}
|
||||||
|
{{ "\n" + offset + param_name + ": []" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n" + offset + "[]" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- set array_type = get_param_type(items_info) -%}
|
||||||
|
{%- if array_type == 'object' -%}
|
||||||
|
{%- if param_name -%}
|
||||||
|
{{ "\n" + offset + param_name + ": {" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n" + offset + "{" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}
|
||||||
|
{{- "\n" + offset + "}[]" }}
|
||||||
|
{%- elif array_type == 'array' -%}
|
||||||
|
{%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}
|
||||||
|
{%- if not param_name -%}
|
||||||
|
{{ "\n" + item_info + "[]" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n" + offset + param_name + ": " + item_info|trim + "[]" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if 'enum' in items_info -%}
|
||||||
|
{%- set item_type = get_enum_option_str(items_info['enum']) -%}
|
||||||
|
{%- if param_name is none -%}
|
||||||
|
{{ "(" + item_type + ")[]"}}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n" + offset + param_name + ": (" + item_type + ")[]" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if param_name is none -%}
|
||||||
|
{{ "\n" + array_type + "[]" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n" + offset + param_name + ": " + array_type + "[]," }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}
|
||||||
|
{%- set res = "" -%}
|
||||||
|
{%- for param_name, param in properties.items() -%}
|
||||||
|
{%- if param is mapping -%}
|
||||||
|
{%- set comment_info = get_param_info(param) -%}
|
||||||
|
{# Param Examples #}
|
||||||
|
{%- set examples_info = [] -%}
|
||||||
|
{%- if "examples" in param -%}
|
||||||
|
{%- set examples_info = ["Example " + param_name + ":"] -%}
|
||||||
|
{%- set examples_info = examples_info + param["examples"] -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{# Param Name declaration #}
|
||||||
|
{%- set param_declaration = param_name -%}
|
||||||
|
{%- if required_params is iterable and param_name not in required_params -%}
|
||||||
|
{%- set param_declaration = param_declaration + "?" -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set param_type = get_param_type(param) -%}
|
||||||
|
|
||||||
|
{# Handle indentation based on depth #}
|
||||||
|
{%- set offset = "" -%}
|
||||||
|
{%- if depth >= 1 -%}
|
||||||
|
{%- set offset = " " * depth -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- if param_type == "object" -%}
|
||||||
|
{%- if comment_info != "<|NONE|>" -%}
|
||||||
|
{{ "\n" + offset + comment_info }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if examples_info|length > 0 -%}
|
||||||
|
{%- for example in examples_info -%}
|
||||||
|
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set param_declaration = param_declaration + ": {" -%}
|
||||||
|
{{ "\n" + offset + param_declaration -}}
|
||||||
|
{{- get_parameter_typescript(param.get("properties", {}), param.get("required", []), depth + 1) -}}
|
||||||
|
{{- "\n" + offset + "}," }}
|
||||||
|
{%- elif param_type == "array" -%}
|
||||||
|
{%- set item_info = param.get("items", {}) -%}
|
||||||
|
{%- if "type" not in item_info -%}
|
||||||
|
{%- set param_declaration = param_declaration + ": []," -%}
|
||||||
|
{{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if comment_info != "<|NONE|>" -%}
|
||||||
|
{{ "\n" + offset + comment_info }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if examples_info|length > 0 -%}
|
||||||
|
{%- for example in examples_info -%}
|
||||||
|
{{ "\n" + offset + "// " + example|string|replace("'", '"') }}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}
|
||||||
|
{%- if not array_declaration.endswith(",") -%}
|
||||||
|
{%- set array_declaration = array_declaration + "," -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{{ array_declaration}}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if "enum" in param -%}
|
||||||
|
{%- set param_type = get_enum_option_str(param["enum"]) -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if "nullable" in param and param["nullable"] -%}
|
||||||
|
{%- set param_type = param_type + " | null" -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set param_declaration = param_declaration + ": " + param_type + "," -%}
|
||||||
|
{{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro generate_schema_from_functions(functions, namespace='functions') -%}
|
||||||
|
{{ "// Supported function definitions that should be called when necessary.\n" -}}
|
||||||
|
{{- "namespace " + namespace + " {\n\n" -}}
|
||||||
|
|
||||||
|
{%- for function in functions -%}
|
||||||
|
{%- if function.get("function") -%}
|
||||||
|
{%- set function = function.get("function") -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set function_name = function.get("name") -%}
|
||||||
|
{%- if function_name -%}
|
||||||
|
{%- set description = function.get('description', '') -%}
|
||||||
|
{%- set parameters = function.get('parameters', {}) -%}
|
||||||
|
{{- "// " + description + "\n" -}}
|
||||||
|
{{- "type " + function_name -}}
|
||||||
|
{%- if parameters and parameters.get("properties") -%}
|
||||||
|
{{- " = (_: {" -}}
|
||||||
|
{%- set required_params = parameters.get("required", []) -%}
|
||||||
|
{{ get_parameter_typescript(parameters.get("properties"), required_params, 0) -}}
|
||||||
|
{{- "\n}) => any;\n\n" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ " = () => any;\n\n" }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ "} // namespace " + namespace }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
{%- if not tools is defined -%}
|
||||||
|
{%- set tools = none -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set has_code_interpreter = tools | selectattr("type", "equalto", "code_interpreter") | list | length > 0 -%}
|
||||||
|
{%- if has_code_interpreter -%}
|
||||||
|
{%- set tools = tools | rejectattr("type", "equalto", "code_interpreter") | list -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{#- System message + builtin tools #}
|
||||||
|
{{- bos_token + "<|start_header_id|>system<|end_header_id|>\n\n" }}
|
||||||
|
{%- if has_code_interpreter %}
|
||||||
|
{{- "Environment: ipython\n\n" }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "\n"}}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if tools %}
|
||||||
|
{{- "\nYou have access to the following functions:\n\n" }}
|
||||||
|
{%- for t in tools %}
|
||||||
|
{%- if "type" in t -%}
|
||||||
|
{{ "Use the function '" + t["function"]["name"] + "' to '" + t["function"]["description"] + "'\n" + t["function"] | tojson() }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ "Use the function '" + t["name"] + "' to '" + t["description"] + "'\n" + t | tojson }}
|
||||||
|
{%- endif -%}
|
||||||
|
{{- "\n\n" }}
|
||||||
|
{%- endfor %}
|
||||||
|
{{- '\nThink very carefully before calling functions.\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- If looking for real time information use relevant functions before falling back to brave_search\n- Function calls MUST follow the specified format, start with <function= and end with </function>\n- Required parameters MUST be specified\n- Only call one function at a time\n- Put the entire function call reply on one line\n\n' -}}
|
||||||
|
{%- endif %}
|
||||||
|
{{- "<|eot_id|>" -}}
|
||||||
|
|
||||||
|
{%- for message in messages -%}
|
||||||
|
{%- if message['role'] == 'user' or message['role'] == 'system' -%}
|
||||||
|
{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
|
||||||
|
{%- elif message['role'] == 'tool' -%}
|
||||||
|
{{ '<|start_header_id|>ipython<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if (message['content'] and message['content']|length > 0) or ('tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0) -%}
|
||||||
|
{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if message['content'] and message['content']|length > 0 -%}
|
||||||
|
{{ message['content'] }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0 -%}
|
||||||
|
{%- for tool_call in message['tool_calls'] -%}
|
||||||
|
{%- if tool_call["function"]["name"] == "python" -%}
|
||||||
|
{{ '<|python_tag|>' + tool_call['function']['arguments'] }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] + '</function>' }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ '<|eom_id|>' }}
|
||||||
|
{%- elif message['content'] and message['content']|length > 0 -%}
|
||||||
|
{{ '<|eot_id|>' }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- if add_generation_prompt -%}
|
||||||
|
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
|
||||||
|
{%- endif -%}
|
||||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 128000,
|
||||||
|
"eos_token_id": 128009,
|
||||||
|
"head_dim": 128,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 1792,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 8064,
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 24,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pad_token_id": 128001,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 8000000.0,
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.55.2",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 128259
|
||||||
|
}
|
||||||
8
generation_config.json
Normal file
8
generation_config.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 128000,
|
||||||
|
"do_sample": true,
|
||||||
|
"eos_token_id": 128009,
|
||||||
|
"pad_token_id": 128001,
|
||||||
|
"transformers_version": "4.55.2"
|
||||||
|
}
|
||||||
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:6e01a548a6af1a79bc95e4bd2578c306b17ad2753b72756e9df64131479cd190
|
||||||
|
size 4982330224
|
||||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c447fc87fe7a9ec0e1bf7e8a448047815d036d4ae9454bb061adeca8ba6481cf
|
||||||
|
size 4285002608
|
||||||
299
model.safetensors.index.json
Normal file
299
model.safetensors.index.json
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_parameters": 695661590,
|
||||||
|
"total_size": 9267299328
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.norm.weight": "model-00002-of-00002.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
23
special_tokens_map.json
Normal file
23
special_tokens_map.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|begin_of_text|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|eot_id|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<|end_of_text|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:31d17e500c82ff116fc14d4dd0106a08e49bd3512d8a81473047121ef8ea09f9
|
||||||
|
size 17210524
|
||||||
2087
tokenizer_config.json
Normal file
2087
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c212f5fdfeae9001c9946b98880b98dd31f013387c8234647ce95ac5157fea39
|
||||||
|
size 7224
|
||||||
Reference in New Issue
Block a user