初始化项目,由ModelHub XC社区提供模型

Model: nisten/Biggie-SmoLlm-0.4B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-12 21:01:52 +08:00
commit d09c411e33
13 changed files with 147408 additions and 0 deletions

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
biggie_trained_5bit.gguf filter=lfs diff=lfs merge=lfs -text
biggie_trained_4bit.gguf filter=lfs diff=lfs merge=lfs -text
biggie_trained_2bit.gguf filter=lfs diff=lfs merge=lfs -text

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f43c27a28456e93ca9ec1e9488d9f197457948793fad5382572d95e498c700d7
size 478351744

21
README.md Normal file
View File

@@ -0,0 +1,21 @@
---
base_model: HuggingFaceTB/SmolLM-360M-Instruct
---
###Coherent Frankenstein of smolLm-0.36b upped to 0.4b
This took about 5 hours of semi-automated continuous merging to figure out the recipe.
Model is smarter, and UNTRAINED. Uploaded it for training. Yet it performs well as is even quantized to 8bit.
8bit gguf included for testing.
```bash
wget https://huggingface.co/nisten/Biggie-SmoLlm-0.4B/resolve/main/Biggie_SmolLM_400M_q8_0.gguf
```
```verilog
./llama-cli -ngl 99 -co --temp 0 -p "How to build a city on Mars via calculating Aldrin-Cycler orbits?" -m Biggie_SmolLM_400M_q8_0.gguf -cnv -fa --keep -1
```
![image/png](https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/XgF2kz3Zz0Jqz7BEVZ96h.png)

3
biggie_trained_2bit.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:06eb7e7b92b30a27a23f8efa1439f490c7bcc5e6f8895682bbe1830a048f5b50
size 228460064

3
biggie_trained_4bit.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:003ae79c4a3ea090c4fed367ad411a29c81bb968cc5ad5df43db7a407669159b
size 306892064

3
biggie_trained_5bit.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:efc6502de6a9ae1acb385ba703eb9e526ca3897ff6cb7089639b3756937b2ef2
size 346866464

30
config.json Normal file
View File

@@ -0,0 +1,30 @@
{
"_name_or_path": "/Users/n/hf/SmolLM-360M-Instruct",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 960,
"initializer_range": 0.02,
"intermediate_size": 2560,
"max_position_embeddings": 2048,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 15,
"num_hidden_layers": 36,
"num_key_value_heads": 5,
"pad_token_id": 2,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.43.3",
"use_cache": true,
"vocab_size": 49152
}

48901
merges.txt Normal file

File diff suppressed because it is too large Load Diff

3
pytorch_model.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:26cb4687dd1eea3cec627535feeb782ab0916a09ada9e5a8df70728c571455ca
size 723767570

34
special_tokens_map.json Normal file
View File

@@ -0,0 +1,34 @@
{
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>"
],
"bos_token": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

98249
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

154
tokenizer_config.json Normal file
View File

@@ -0,0 +1,154 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<repo_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<reponame>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<file_sep>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<filename>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<gh_stars>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<issue_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<issue_comment>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "<issue_closed>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "<jupyter_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "<jupyter_text>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "<jupyter_code>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "<jupyter_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "<jupyter_script>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "<empty_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>"
],
"bos_token": "<|im_start|>",
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"model_max_length": 2048,
"pad_token": "<|im_end|>",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>",
"vocab_size": 49152
}

1
vocab.json Normal file

File diff suppressed because one or more lines are too long