初始化项目,由ModelHub XC社区提供模型

Model: Josephgflowers/Tinyllama-1.5B-Cinder-Test-4
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-22 00:52:12 +08:00
commit c977dccde2
15 changed files with 391 additions and 0 deletions

49
.gitattributes vendored Normal file
View File

@@ -0,0 +1,49 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bin.* filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*.tfevents* filter=lfs diff=lfs merge=lfs -text
*.db* filter=lfs diff=lfs merge=lfs -text
*.ark* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.gguf* filter=lfs diff=lfs merge=lfs -text
*.ggml filter=lfs diff=lfs merge=lfs -text
*.llamafile* filter=lfs diff=lfs merge=lfs -text
*.pt2 filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

5
README.md Normal file
View File

@@ -0,0 +1,5 @@
---
license: mit
---
This is a depth up scalled model of the 616M cinder model and Cinder v2. This model still needs further training. Putting it up for testing. More information coming. Maybe. Lol. Here is a brief desc of the project: Im mixing a lot of techniques I guess that I found interesting and have been testing, HF Cosmo is not great but decent and was fully trained in 4 days using a mix of more fine tuned directed datasets and some synthetic textbook style datasets. So I used pruning and a similar mix as Cosmo on tinyllama (trained on a ton of data for an extended time for its size) to keep the tinyllama model coherent during pruning. Now I am trying to depth up scale it using my pruned model and an original, Then taking a majority of each and combining them to create a larger model. Then it needs more training, then fine tuning. Then theoretically it will be a well performing 1.5B model (that didn't need full scale training). Test 2, some training, re depth upscalled with cinder reason 1.3B and merged back with 1.5 and slight training. Continuing training from this model for next iteration. This is the next iteration with 1 go on metamath and step by step reasoning with cinder.
And a little textbooks are all you need lite.

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "/home/joe/Downloads/so_much",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 5632,
"max_position_embeddings": 2048,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 31,
"num_key_value_heads": 4,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.38.0.dev0",
"unsloth_version": "2024.1",
"use_cache": false,
"vocab_size": 32000
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.38.0.dev0",
"use_cache": false
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef32ccef672d5de3e5f2be43e0a1be3cb7061cc11bd8751c0c818f751415bfe8
size 2992926176

3
optimizer.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b6c6a9e7b695f7687e731b7a15386f2a736e885cb744ab237ee84633b183ff84
size 5454522

3
rng_state.pth Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
size 14244

3
scheduler.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f9625c4bd6137497fdc071f2ef3c6109f815959a4f84d167e89493c9fe28650
size 1064

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bcd04f0eadf90287bd26e1a183ac487d8a141b09b06aecb7725bbdd343640f2e
size 1842767

3
tokenizer.model Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
size 499723

46
tokenizer_config.json Normal file
View File

@@ -0,0 +1,46 @@
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<s>",
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"max_length": 2048,
"model_max_length": 2048,
"pad_token": "</s>",
"padding_side": "right",
"sp_model_kwargs": {},
"stride": 0,
"tokenizer_class": "LlamaTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

203
trainer_state.json Normal file
View File

@@ -0,0 +1,203 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.056700482783555585,
"eval_steps": 500,
"global_step": 2666,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.9096835851669312,
"learning_rate": 4.989366000978328e-05,
"loss": 0.5573,
"step": 100
},
{
"epoch": 0.0,
"grad_norm": 0.8342915177345276,
"learning_rate": 4.978732001956656e-05,
"loss": 0.5416,
"step": 200
},
{
"epoch": 0.01,
"grad_norm": 1.401437759399414,
"learning_rate": 4.968098002934984e-05,
"loss": 0.5475,
"step": 300
},
{
"epoch": 0.01,
"grad_norm": 0.7456225156784058,
"learning_rate": 4.957464003913312e-05,
"loss": 0.5302,
"step": 400
},
{
"epoch": 0.01,
"grad_norm": 0.966705858707428,
"learning_rate": 4.94683000489164e-05,
"loss": 0.5257,
"step": 500
},
{
"epoch": 0.01,
"grad_norm": 0.8829948902130127,
"learning_rate": 4.936302345860184e-05,
"loss": 0.5185,
"step": 600
},
{
"epoch": 0.01,
"grad_norm": 0.8967320322990417,
"learning_rate": 4.925668346838512e-05,
"loss": 0.5195,
"step": 700
},
{
"epoch": 0.02,
"grad_norm": 0.8852934241294861,
"learning_rate": 4.9151406878070574e-05,
"loss": 0.5117,
"step": 800
},
{
"epoch": 0.02,
"grad_norm": 1.0393266677856445,
"learning_rate": 4.9045066887853846e-05,
"loss": 0.5027,
"step": 900
},
{
"epoch": 0.02,
"grad_norm": 1.016802191734314,
"learning_rate": 4.8938726897637125e-05,
"loss": 0.5099,
"step": 1000
},
{
"epoch": 0.02,
"grad_norm": 1.0666687488555908,
"learning_rate": 4.8832386907420405e-05,
"loss": 0.5129,
"step": 1100
},
{
"epoch": 0.03,
"grad_norm": 1.0762836933135986,
"learning_rate": 4.872604691720369e-05,
"loss": 0.5277,
"step": 1200
},
{
"epoch": 0.03,
"grad_norm": 0.8559880256652832,
"learning_rate": 4.861970692698696e-05,
"loss": 0.4987,
"step": 1300
},
{
"epoch": 0.03,
"grad_norm": 1.0834448337554932,
"learning_rate": 4.851336693677024e-05,
"loss": 0.5171,
"step": 1400
},
{
"epoch": 0.03,
"grad_norm": 0.9118117690086365,
"learning_rate": 4.840702694655352e-05,
"loss": 0.5073,
"step": 1500
},
{
"epoch": 0.03,
"grad_norm": 1.029111623764038,
"learning_rate": 4.830068695633681e-05,
"loss": 0.5085,
"step": 1600
},
{
"epoch": 0.04,
"grad_norm": 0.9440446496009827,
"learning_rate": 4.819434696612008e-05,
"loss": 0.4978,
"step": 1700
},
{
"epoch": 0.04,
"grad_norm": 0.8061498403549194,
"learning_rate": 4.808800697590336e-05,
"loss": 0.4859,
"step": 1800
},
{
"epoch": 0.04,
"grad_norm": 1.1587491035461426,
"learning_rate": 4.798166698568664e-05,
"loss": 0.5029,
"step": 1900
},
{
"epoch": 0.04,
"grad_norm": 0.8422715067863464,
"learning_rate": 4.787532699546992e-05,
"loss": 0.507,
"step": 2000
},
{
"epoch": 0.04,
"grad_norm": 1.0634592771530151,
"learning_rate": 4.77689870052532e-05,
"loss": 0.4967,
"step": 2100
},
{
"epoch": 0.05,
"grad_norm": 1.1594743728637695,
"learning_rate": 4.7662647015036477e-05,
"loss": 0.4993,
"step": 2200
},
{
"epoch": 0.05,
"grad_norm": 1.0729972124099731,
"learning_rate": 4.7556307024819756e-05,
"loss": 0.496,
"step": 2300
},
{
"epoch": 0.05,
"grad_norm": 1.0069080591201782,
"learning_rate": 4.7449967034603035e-05,
"loss": 0.5016,
"step": 2400
},
{
"epoch": 0.05,
"grad_norm": 0.9722279906272888,
"learning_rate": 4.7343627044386314e-05,
"loss": 0.4883,
"step": 2500
},
{
"epoch": 0.06,
"grad_norm": 0.760681688785553,
"learning_rate": 4.7237287054169594e-05,
"loss": 0.4899,
"step": 2600
}
],
"logging_steps": 100,
"max_steps": 47019,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1333,
"total_flos": 7.500219014216417e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e57c4dc9131f9bf65b5c29340629c4a87fa288a08d427753cdc7fbe9a295c893
size 4920