初始化项目,由ModelHub XC社区提供模型

Model: zkaedi/gemma-7b-solidity-energy-signatures
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-13 05:40:56 +08:00
commit 8f052babb8
42 changed files with 1856 additions and 0 deletions

41
.gitattributes vendored Normal file
View File

@@ -0,0 +1,41 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
gemma-7b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text
gemma-2-9b-it.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
gemma-2-9b-it.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
checkpoint-54/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-81/tokenizer.json filter=lfs diff=lfs merge=lfs -text

14
Modelfile Normal file
View File

@@ -0,0 +1,14 @@
FROM gemma-2-9b-it.Q8_0.gguf
TEMPLATE """<start_of_turn>user
{{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
<start_of_turn>model
{{ .Response }}<end_of_turn>
"""
PARAMETER repeat_penalty 1
PARAMETER stop "<start_of_turn>"
PARAMETER stop "<end_of_turn>"
PARAMETER penalize_newline false
PARAMETER temperature 1.5
PARAMETER min_p 0.1
PARAMETER num_ctx 4096

63
README.md Normal file
View File

@@ -0,0 +1,63 @@
---
base_model: google/gemma-2-9b
library_name: peft
model_name: retrained_adapter_v4
tags:
- base_model:adapter:google/gemma-2-9b
- lora
- sft
- transformers
- trl
- unsloth
licence: license
pipeline_tag: text-generation
---
# Model Card for retrained_adapter_v4
This model is a fine-tuned version of [google/gemma-2-9b](https://huggingface.co/google/gemma-2-9b).
It has been trained using [TRL](https://github.com/huggingface/trl).
## Quick start
```python
from transformers import pipeline
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
generator = pipeline("text-generation", model="None", device="cuda")
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
print(output["generated_text"])
```
## Training procedure
This model was trained with SFT.
### Framework versions
- PEFT 0.18.1
- TRL: 0.29.0
- Transformers: 5.0.0
- Pytorch: 2.10.0+cu128
- Datasets: 4.0.0
- Tokenizers: 0.22.2
## Citations
Cite TRL as:
```bibtex
@software{vonwerra2020trl,
title = {{TRL: Transformers Reinforcement Learning}},
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
license = {Apache-2.0},
url = {https://github.com/huggingface/trl},
year = {2020}
}
```

50
adapter_config.json Normal file
View File

@@ -0,0 +1,50 @@
{
"alora_invocation_tokens": null,
"alpha_pattern": {},
"arrow_config": null,
"auto_mapping": {
"base_model_class": "Gemma2ForCausalLM",
"parent_library": "transformers.models.gemma2.modeling_gemma2",
"unsloth_fixed": true
},
"base_model_name_or_path": "unsloth/gemma-2-9b-it-bnb-4bit",
"bias": "none",
"corda_config": null,
"ensure_weight_tying": false,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 128,
"lora_bias": false,
"lora_dropout": 0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"peft_version": "0.18.1",
"qalora_group_size": 16,
"r": 64,
"rank_pattern": {},
"revision": null,
"target_modules": [
"down_proj",
"q_proj",
"v_proj",
"k_proj",
"up_proj",
"gate_proj",
"o_proj"
],
"target_parameters": null,
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_qalora": false,
"use_rslora": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7de9f78ba95e1c30aadda38b36034995efbcfef1531ef1e3a6e725c1dd5100e
size 864368280

3
benchmark_results.md Normal file
View File

@@ -0,0 +1,3 @@
# 🔱 ZKAEDI PRIME Benchmark
...paste full benchmark content here...

4
chat_template.jinja Normal file
View File

@@ -0,0 +1,4 @@
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
' + message['content'] | trim + '<end_of_turn>
' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
'}}{% endif %}

209
checkpoint-54/README.md Normal file
View File

@@ -0,0 +1,209 @@
---
base_model: google/gemma-2-9b
library_name: peft
pipeline_tag: text-generation
tags:
- base_model:adapter:google/gemma-2-9b
- lora
- sft
- transformers
- trl
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.18.1

View File

@@ -0,0 +1,50 @@
{
"alora_invocation_tokens": null,
"alpha_pattern": {},
"arrow_config": null,
"auto_mapping": {
"base_model_class": "Gemma2ForCausalLM",
"parent_library": "transformers.models.gemma2.modeling_gemma2",
"unsloth_fixed": true
},
"base_model_name_or_path": "unsloth/gemma-2-9b-it-bnb-4bit",
"bias": "none",
"corda_config": null,
"ensure_weight_tying": false,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 128,
"lora_bias": false,
"lora_dropout": 0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"peft_version": "0.18.1",
"qalora_group_size": 16,
"r": 64,
"rank_pattern": {},
"revision": null,
"target_modules": [
"gate_proj",
"down_proj",
"v_proj",
"o_proj",
"k_proj",
"q_proj",
"up_proj"
],
"target_parameters": null,
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_qalora": false,
"use_rslora": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7edc86143dcf76b998d38629904ff583a1b000495870002bdebdc642b442bb07
size 864368280

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d4f16ca1f46eb0c1292bc526139ab1569a171a64e73c1a041bafba4309e901c7
size 439594453

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:31b4b19d89d55db0f191effcf8b1505dacdb6bc9693041719d51e757c41acce7
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
size 34362748

View File

@@ -0,0 +1,19 @@
{
"backend": "tokenizers",
"bos_token": "<bos>",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"extra_special_tokens": [
"<start_of_turn>",
"<end_of_turn>"
],
"is_local": false,
"mask_token": "<mask>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<eos>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

View File

@@ -0,0 +1,84 @@
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 54,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 0.7260835453867912,
"epoch": 0.37735849056603776,
"grad_norm": 0.9797868132591248,
"learning_rate": 0.00018,
"loss": 1.0297086715698243,
"mean_token_accuracy": 0.8047336474061012,
"num_tokens": 41605.0,
"step": 10
},
{
"entropy": 0.6147415287792682,
"epoch": 0.7547169811320755,
"grad_norm": 0.8319393992424011,
"learning_rate": 0.00019217488001088784,
"loss": 0.610823392868042,
"mean_token_accuracy": 0.8518229335546493,
"num_tokens": 86144.0,
"step": 20
},
{
"entropy": 0.5127407720214442,
"epoch": 1.1132075471698113,
"grad_norm": 0.898364782333374,
"learning_rate": 0.0001666935530836651,
"loss": 0.47104392051696775,
"mean_token_accuracy": 0.8734802854688544,
"num_tokens": 125093.0,
"step": 30
},
{
"entropy": 0.35390120558440685,
"epoch": 1.490566037735849,
"grad_norm": 0.6483786702156067,
"learning_rate": 0.0001283661778334297,
"loss": 0.34734306335449217,
"mean_token_accuracy": 0.9023109719157218,
"num_tokens": 167344.0,
"step": 40
},
{
"entropy": 0.2861595153808594,
"epoch": 1.8679245283018868,
"grad_norm": 0.6667284965515137,
"learning_rate": 8.457510670346976e-05,
"loss": 0.27520730495452883,
"mean_token_accuracy": 0.9225298032164574,
"num_tokens": 209654.0,
"step": 50
}
],
"logging_steps": 10,
"max_steps": 81,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3533277150804992e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f1417f344aaba2cf1a99d8c98b244c5eb505be7eea946a989e1ac1ddb6d440b7
size 5585

209
checkpoint-81/README.md Normal file
View File

@@ -0,0 +1,209 @@
---
base_model: google/gemma-2-9b
library_name: peft
pipeline_tag: text-generation
tags:
- base_model:adapter:google/gemma-2-9b
- lora
- sft
- transformers
- trl
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.18.1

View File

@@ -0,0 +1,50 @@
{
"alora_invocation_tokens": null,
"alpha_pattern": {},
"arrow_config": null,
"auto_mapping": {
"base_model_class": "Gemma2ForCausalLM",
"parent_library": "transformers.models.gemma2.modeling_gemma2",
"unsloth_fixed": true
},
"base_model_name_or_path": "unsloth/gemma-2-9b-it-bnb-4bit",
"bias": "none",
"corda_config": null,
"ensure_weight_tying": false,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 128,
"lora_bias": false,
"lora_dropout": 0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"peft_version": "0.18.1",
"qalora_group_size": 16,
"r": 64,
"rank_pattern": {},
"revision": null,
"target_modules": [
"gate_proj",
"down_proj",
"v_proj",
"o_proj",
"k_proj",
"q_proj",
"up_proj"
],
"target_parameters": null,
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_qalora": false,
"use_rslora": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:90727593b9e9491100813dc4792e62454823e6fc29dfb11e8671b19674b20fab
size 864368280

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d4bac7f684314d69752adbc18293350107a1b8ff48d21e81c591b4a7ba613b21
size 439594453

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f4a9f217e852f439efa6bd32fde98d6867f11aa6ea13ddc021ba10af6a0b0934
size 14645

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:267b17180d5035cb3e3dac3e392b4366050735f9c0596b1085fda8dee58ea4cb
size 1465

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
size 34362748

View File

@@ -0,0 +1,19 @@
{
"backend": "tokenizers",
"bos_token": "<bos>",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"extra_special_tokens": [
"<start_of_turn>",
"<end_of_turn>"
],
"is_local": false,
"mask_token": "<mask>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<eos>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

View File

@@ -0,0 +1,114 @@
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 81,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 0.7260835453867912,
"epoch": 0.37735849056603776,
"grad_norm": 0.9797868132591248,
"learning_rate": 0.00018,
"loss": 1.0297086715698243,
"mean_token_accuracy": 0.8047336474061012,
"num_tokens": 41605.0,
"step": 10
},
{
"entropy": 0.6147415287792682,
"epoch": 0.7547169811320755,
"grad_norm": 0.8319393992424011,
"learning_rate": 0.00019217488001088784,
"loss": 0.610823392868042,
"mean_token_accuracy": 0.8518229335546493,
"num_tokens": 86144.0,
"step": 20
},
{
"entropy": 0.5127407720214442,
"epoch": 1.1132075471698113,
"grad_norm": 0.898364782333374,
"learning_rate": 0.0001666935530836651,
"loss": 0.47104392051696775,
"mean_token_accuracy": 0.8734802854688544,
"num_tokens": 125093.0,
"step": 30
},
{
"entropy": 0.35390120558440685,
"epoch": 1.490566037735849,
"grad_norm": 0.6483786702156067,
"learning_rate": 0.0001283661778334297,
"loss": 0.34734306335449217,
"mean_token_accuracy": 0.9023109719157218,
"num_tokens": 167344.0,
"step": 40
},
{
"entropy": 0.2861595153808594,
"epoch": 1.8679245283018868,
"grad_norm": 0.6667284965515137,
"learning_rate": 8.457510670346976e-05,
"loss": 0.27520730495452883,
"mean_token_accuracy": 0.9225298032164574,
"num_tokens": 209654.0,
"step": 50
},
{
"entropy": 0.21439368748351148,
"epoch": 2.2264150943396226,
"grad_norm": 0.9153143763542175,
"learning_rate": 4.375507123592194e-05,
"loss": 0.19489681720733643,
"mean_token_accuracy": 0.9477020047212902,
"num_tokens": 250416.0,
"step": 60
},
{
"entropy": 0.1475609978660941,
"epoch": 2.6037735849056602,
"grad_norm": 0.5222585201263428,
"learning_rate": 1.3768542747997215e-05,
"loss": 0.13845933675765992,
"mean_token_accuracy": 0.9617140784859657,
"num_tokens": 293209.0,
"step": 70
},
{
"entropy": 0.14689538963139057,
"epoch": 2.981132075471698,
"grad_norm": 0.6572704315185547,
"learning_rate": 3.913177925055189e-07,
"loss": 0.13648871183395386,
"mean_token_accuracy": 0.9615253210067749,
"num_tokens": 335896.0,
"step": 80
}
],
"logging_steps": 10,
"max_steps": 81,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0410264608835584e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f1417f344aaba2cf1a99d8c98b244c5eb505be7eea946a989e1ac1ddb6d440b7
size 5585

78
config.json Normal file
View File

@@ -0,0 +1,78 @@
{
"architectures": [
"Gemma2ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": 50.0,
"bos_token_id": 2,
"cache_implementation": "hybrid",
"torch_dtype": "bfloat16",
"eos_token_id": 1,
"final_logit_softcapping": 30.0,
"head_dim": 256,
"hidden_act": "gelu_pytorch_tanh",
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 14336,
"layer_types": [
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"full_attention"
],
"max_position_embeddings": 8192,
"model_type": "gemma2",
"num_attention_heads": 16,
"num_hidden_layers": 42,
"num_key_value_heads": 8,
"pad_token_id": 0,
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 10000.0,
"sliding_window": 4096,
"sliding_window_size": 4096,
"unsloth_version": "2026.2.1",
"use_cache": true,
"vocab_size": 256000
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b882dcee092562d6b90126f3a8adc0fc8f0c1ea5448f1286e91f34fdd1da3269
size 5761057728

3
gemma-2-9b-it.Q8_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5d81b0a8663a44c41252c9bfce671449efc0bb03cf842e02a99af47d224e3b68
size 9827148736

3
gemma-7b.Q4_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:54e6ca4c6b61a289ab2f588049a9bd251b1d732e7ae40a766ffacf1e1f2e336f
size 5329758016

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1f84626d59233fa8c2e06df093ff7938cd93e24cf9566765eb22415962a453a8
size 4903351912

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:397c021656e8dacb8bceac21a858b6546c77180f14fe41200253b7fd47374f02
size 4947570872

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71afc8f334a8409ddc37b5b376e35da43cc153961542c4f5a44126791046c4b6
size 4962221464

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5d80b44b463d937c3cefa8510f0820f9ca8bef297be1189ceac396eeaad1487b
size 3670322200

View File

@@ -0,0 +1,471 @@
{
"metadata": {
"total_size": 18483411968
},
"weight_map": {
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.32.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.32.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.33.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.33.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.34.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.34.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.36.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.36.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.37.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.37.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.38.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.38.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.39.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.39.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.40.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.40.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.40.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.40.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.40.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.40.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.41.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.41.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.41.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.41.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.41.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.7.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.7.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.norm.weight": "model-00004-of-00004.safetensors"
}
}

34
special_tokens_map.json Normal file
View File

@@ -0,0 +1,34 @@
{
"additional_special_tokens": [
"<start_of_turn>",
"<end_of_turn>"
],
"bos_token": {
"content": "<bos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
size 34362748

3
tokenizer.model Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
size 4241003

19
tokenizer_config.json Normal file
View File

@@ -0,0 +1,19 @@
{
"backend": "tokenizers",
"bos_token": "<bos>",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"extra_special_tokens": [],
"from_slow": true,
"is_local": false,
"legacy": false,
"mask_token": "<mask>",
"model_max_length": 8192,
"pad_token": "<pad>",
"padding_side": "left",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

228
viz_waterfall.html Normal file
View File

@@ -0,0 +1,228 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>ZKAEDI PRIME — Hamiltonian Self-Similarity Waterfall</title>
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Orbitron:wght@700;900&display=swap" rel="stylesheet">
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{background:#000;color:#C8BFAE;font-family:'JetBrains Mono',monospace;font-size:11px;display:flex;flex-direction:column;align-items:center;justify-content:center;height:100vh;overflow:hidden;padding:8px}
h1{font-family:'Orbitron',monospace;font-size:10px;letter-spacing:4px;color:#CC0030;margin-bottom:3px;text-align:center}
.sub{font-size:8px;color:#3A3D55;letter-spacing:2px;margin-bottom:8px;text-align:center}
#c{display:block;border:1px solid #1C1E30}
.metrics{display:flex;gap:16px;margin-top:8px;font-size:8px;color:#3A3D55;letter-spacing:1px;flex-wrap:wrap;justify-content:center}
.mv{color:#00BBCC}
</style>
</head>
<body>
<h1>HAMILTONIAN SELF-SIMILARITY WATERFALL</h1>
<div class="sub">H_t DECOMPOSED ACROSS SCALES τ=1,2,4,8,16,32 — SELF-SIMILAR STRUCTURE REVEALED</div>
<canvas id="c"></canvas>
<div class="metrics">
FRACTAL DIM: <span class="mv" id="fd"></span>&nbsp;&nbsp;
SCALE CORR: <span class="mv" id="sc"></span>&nbsp;&nbsp;
HURST (field): <span class="mv" id="hf"></span>&nbsp;&nbsp;
DOMINANT SCALE: <span class="mv" id="ds"></span>
</div>
<script>
const C=document.getElementById('c');
const ctx=C.getContext('2d');
const W=Math.min(window.innerWidth-40,680);
const H=Math.min(window.innerHeight-110,420);
C.width=W;C.height=H;
// ZKAEDI PRIME — generate H_t field
const sigmoid=x=>1/(1+Math.exp(-Math.max(-20,Math.min(20,x))));
function genHfield(N,eta=0.42,gamma=0.35,beta=0.10,sigma=0.05){
let seed=7777;
const rng=()=>{seed=(seed*1664525+1013904223)&0xffffffff;return((seed>>>0)/0xffffffff-.5)*2;};
const H=new Float64Array(N);
const H0=0.5;
H[0]=H0;
for(let t=1;t<N;t++){
const sg=sigmoid(gamma*H[t-1]);
H[t]=H0+eta*H[t-1]*sg+sigma*rng()*(1+beta*Math.abs(H[t-1]));
}
return H;
}
// Smooth at scale s (box average)
function smoothAt(H,scale){
const N=H.length;
const out=new Float64Array(N);
for(let i=0;i<N;i++){
let sum=0,cnt=0;
for(let k=Math.max(0,i-scale);k<=Math.min(N-1,i+scale);k++){sum+=H[k];cnt++;}
out[i]=sum/cnt;
}
return out;
}
// Scale-to-scale correlation
function corr(a,b){
const n=Math.min(a.length,b.length);
let ma=0,mb=0;
for(let i=0;i<n;i++){ma+=a[i];mb+=b[i];}
ma/=n;mb/=n;
let num=0,da=0,db=0;
for(let i=0;i<n;i++){num+=(a[i]-ma)*(b[i]-mb);da+=(a[i]-ma)**2;db+=(b[i]-mb)**2;}
return (da>0&&db>0)?num/Math.sqrt(da*db):0;
}
const N=600;
const H=genHfield(N);
const SCALES=[1,2,4,8,16,32];
const COLORS=[
'rgba(0,187,204,0.8)', // τ=1 cyan
'rgba(0,204,122,0.8)', // τ=2 jade
'rgba(201,168,76,0.8)', // τ=4 gold
'rgba(204,85,0,0.8)', // τ=8 amber
'rgba(148,0,204,0.8)', // τ=16 orchid
'rgba(204,0,48,0.8)', // τ=32 crimson
];
const LABELS=['τ=1','τ=2','τ=4','τ=8','τ=16','τ=32'];
const scales=SCALES.map(s=>smoothAt(H,s));
const LAY_H=(H-20)/SCALES.length; // height per scale row
const PAD_L=52, PAD_R=14, PAD_T=12, PAD_B=20;
// For each scale, find min/max for scaling
const stats=scales.map(s=>{
let mn=Infinity,mx=-Infinity;
for(const v of s){if(v<mn)mn=v;if(v>mx)mx=v;}
return{mn,mx};
});
// Scale correlations
const corrs=[];
for(let i=0;i<scales.length-1;i++) corrs.push(corr(scales[i],scales[i+1]));
const avgCorr=corrs.reduce((a,b)=>a+b,0)/corrs.length;
// Hurst of H field
function simpleHurst(arr){
const n=arr.length;
const m=arr.reduce((a,b)=>a+b,0)/n;
let cum=0,mx=-Infinity,mn=Infinity;
for(const v of arr){cum+=v-m;if(cum>mx)mx=cum;if(cum<mn)mn=cum;}
const R=mx-mn;
const S=Math.sqrt(arr.reduce((a,v)=>a+(v-m)**2,0)/n);
return S>0?Math.log(R/S)/Math.log(n):0.5;
}
const hfHurst=simpleHurst(Array.from(H));
// Draw background
ctx.fillStyle='#000';ctx.fillRect(0,0,W,H);
// Grid lines
ctx.strokeStyle='rgba(200,191,174,0.04)';ctx.lineWidth=0.5;
for(let t=0;t<N;t+=60){
const x=PAD_L+(t/N)*(W-PAD_L-PAD_R);
ctx.beginPath();ctx.moveTo(x,PAD_T);ctx.lineTo(x,H-PAD_B);ctx.stroke();
}
// Draw each scale as filled band + line, waterfall stacked
for(let si=0;si<scales.length;si++){
const s=scales[si];
const {mn,mx}=stats[si];
const range=mx-mn||1;
const yBase=PAD_T+si*LAY_H+LAY_H; // bottom of this row
const yTop=PAD_T+si*LAY_H; // top of this row
const rowH=LAY_H-1;
const color=COLORS[si];
const scale=SCALES[si];
// Fill band
ctx.beginPath();
for(let t=0;t<N;t++){
const x=PAD_L+(t/N)*(W-PAD_L-PAD_R);
const yn=yTop+rowH*(1-(s[t]-mn)/range);
t===0?ctx.moveTo(x,yn):ctx.lineTo(x,yn);
}
ctx.lineTo(PAD_L+(N-1)/N*(W-PAD_L-PAD_R),yBase);
ctx.lineTo(PAD_L,yBase);
ctx.closePath();
const grad=ctx.createLinearGradient(0,yTop,0,yBase);
grad.addColorStop(0,color.replace('0.8','0.18'));
grad.addColorStop(1,'rgba(0,0,0,0)');
ctx.fillStyle=grad;ctx.fill();
// Line
ctx.beginPath();
for(let t=0;t<N;t++){
const x=PAD_L+(t/N)*(W-PAD_L-PAD_R);
const yn=yTop+rowH*(1-(s[t]-mn)/range);
t===0?ctx.moveTo(x,yn):ctx.lineTo(x,yn);
}
ctx.strokeStyle=color;ctx.lineWidth=si===0?1.2:1.0;ctx.stroke();
// Row separator
ctx.strokeStyle='rgba(200,191,174,0.06)';ctx.lineWidth=0.5;
ctx.beginPath();ctx.moveTo(PAD_L,yBase);ctx.lineTo(W-PAD_R,yBase);ctx.stroke();
// Label
ctx.fillStyle=color;ctx.font="bold 8px 'Orbitron'";
ctx.fillText(LABELS[si],4,yTop+rowH*0.55);
// Corr with next scale
if(si<corrs.length){
ctx.fillStyle='rgba(200,191,174,0.3)';ctx.font="7px 'JetBrains Mono'";
ctx.fillText('r='+corrs[si].toFixed(2),4,yTop+rowH*0.85);
}
// Self-similarity markers: where scale-1 and scale-s are most correlated — highlight peaks
if(si===0){
// Mark phase transitions: points where dH/dt spikes
const dH=[];
for(let t=1;t<N;t++) dH.push(Math.abs(s[t]-s[t-1]));
const dMean=dH.reduce((a,b)=>a+b,0)/dH.length;
const dStd=Math.sqrt(dH.reduce((a,v)=>a+(v-dMean)**2,0)/dH.length);
for(let t=1;t<N;t++){
if(dH[t-1]>dMean+2.2*dStd){
const x=PAD_L+(t/N)*(W-PAD_L-PAD_R);
ctx.strokeStyle='rgba(204,0,48,0.5)';ctx.lineWidth=1;ctx.setLineDash([1,2]);
ctx.beginPath();ctx.moveTo(x,PAD_T);ctx.lineTo(x,H-PAD_B);ctx.stroke();ctx.setLineDash([]);
}
}
}
}
// Time axis
ctx.fillStyle='rgba(200,191,174,0.25)';ctx.font="7px 'JetBrains Mono'";
for(let t=0;t<N;t+=100){
const x=PAD_L+(t/N)*(W-PAD_L-PAD_R);
ctx.fillText('t='+t,x-8,H-4);
}
ctx.fillText('t →',W-30,H-4);
// Spectral power analysis (RMS at each scale)
const powers=scales.map((s,i)=>{
const mn=stats[i].mn;
return Math.sqrt(s.reduce((a,v)=>a+(v-mn)**2,0)/N);
});
const maxP=Math.max(...powers);
// Mini power spectrum bar in top-right
const sbX=W-80,sbY=PAD_T,sbW=60,sbH=40;
ctx.strokeStyle='rgba(200,191,174,0.1)';ctx.lineWidth=0.5;
ctx.strokeRect(sbX,sbY,sbW,sbH);
ctx.fillStyle='rgba(200,191,174,0.15)';ctx.font="6px 'JetBrains Mono'";
ctx.fillText('POWER',sbX+16,sbY-2);
for(let si=0;si<SCALES.length;si++){
const bx=sbX+si*(sbW/SCALES.length)+1;
const bh=(powers[si]/maxP)*sbH*0.9;
ctx.fillStyle=COLORS[si].replace('0.8','0.7');
ctx.fillRect(bx,sbY+sbH-bh,sbW/SCALES.length-2,bh);
}
// Fractal dimension estimate: D = 2 - H
const D=(2-hfHurst).toFixed(3);
document.getElementById('fd').textContent=D;
document.getElementById('sc').textContent=avgCorr.toFixed(3);
document.getElementById('hf').textContent=hfHurst.toFixed(3);
// Dominant scale: highest power
const domIdx=powers.indexOf(Math.max(...powers));
document.getElementById('ds').textContent=LABELS[domIdx];
</script>
</body>
</html>

31
zkaedi_audit.py Normal file
View File

@@ -0,0 +1,31 @@
import requests, json, time, re
# ── Config ───────────────────────────────────────────────────────────────────
ETHERSCAN_KEY = "YOUR_ETHERSCAN_API_KEY"
# ── Vuln type alias map ──────────────────────────────────────────────────────
VULN_TYPE_ALIASES = {
"flash_loan_oracle": ["flash_loan_oracle", "price manipulation", "oracle manipulation", "flash loan oracle"],
"flash_loan_governance": ["flash_loan_governance", "flash loan governance", "governance attack"],
"reentrancy": ["reentrancy", "re-entrancy", "recursive call"],
"price_manipulation": ["price_manipulation", "price manipulation", "oracle manipulation"],
"improper_initialization":["improper_initialization", "improper initialization", "zero hash", "uninitialized", "merkle root"],
"uninitialized_proxy": ["uninitialized_proxy", "uninitialized proxy", "uninitialized wallet", "selfdestruct", "self-destruct", "kill"],
"donation_attack": ["donation_attack", "donation attack", "violates", "health check", "edonation", "bad debt"],
"read_only_reentrancy": ["read_only_reentrancy", "read-only reentrancy", "read only reentrancy", "view reentrancy"],
"access_control": ["access_control", "access control", "unauthorized", "missing modifier", "privilege escalation", "permit", "permit signature", "signature re-use", "signature reuse"],
}
TARGETS = {
"DAO (Reentrancy - $60M 2016)": {"address": "0xBB9bc244D798123fDe783fCc1C72d3Bb8C189413", "expected_type": "reentrancy", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.8},
"Beanstalk (Governance Flash Loan - $182M 2022)": {"address": "0xC1E088fC1323b20BCBee9bd1B9fC9546db5624C5", "expected_type": "flash_loan_governance", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.7},
"Harvest USDC Vault (Oracle Manipulation - $34M 2020)": {"address": "0x3461B89F2c334aE37e0C3c1bD4d9c027d939fBc9", "expected_type": "flash_loan_oracle", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.6},
"Compound cETH (Flash Loan Attack Surface - $150M 2021)":{"address": "0x4Ddc2D193948926D02f9B1fE9e1daa0718270ED5", "expected_type": "price_manipulation", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.6},
"Uniswap V1 imBTC Pool (Reentrancy ERC777 - $300K 2020)":{"address": "0x2a1530C4C41db0B0b2bB646CB5Eb1A67b7158667", "expected_type": "reentrancy", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.5},
"Nomad Bridge (Improper Initialization - $190M 2022)": {"address": "0x88A69B4E698A4B090DF6CF5Bd7B2D47325Ad30A3", "expected_type": "improper_initialization", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.7},
"Parity Multisig (Uninitialized Proxy - $150M 2017)": {"address": "0x863DF6BFa4469f3ead0bE8f9F2AAE51c91A907b4", "expected_type": "uninitialized_proxy", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.7},
"Euler Finance (Donation Attack - $197M 2023)": {"address": "0x27182842E098f60e3D576794A5bFFb0777E025d3", "expected_type": "donation_attack", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.8},
"Cream Finance (Read-Only Reentrancy - $130M 2021)": {"address": "0x797AAB1ce7c01eB727ab980762bA88e7133d2157", "expected_type": "read_only_reentrancy", "expected_severity": "HIGH", "expected_lyapunov_min": 0.5},
"Multichain (Access Control - $126M 2023)": {"address": "0xC10Ef9F491C9B59f936957026020C321651ac078", "expected_type": "access_control", "expected_severity": "CRITICAL", "expected_lyapunov_min": 0.6},
}
# Full LOCAL_SOURCE and audit logic: see zkaedi_audit.py in this repo