初始化项目，由ModelHub XC社区提供模型

Model: nisten/Biggie-SmoLlm-0.4B Source: Original Platform
2026-05-12 21:01:52 +08:00
commit d09c411e33
13 changed files with 147408 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,3 @@
 biggie_trained_5bit.gguf filter=lfs diff=lfs merge=lfs -text
 biggie_trained_4bit.gguf filter=lfs diff=lfs merge=lfs -text
 biggie_trained_2bit.gguf filter=lfs diff=lfs merge=lfs -text
--- a/Biggie_SmolLM_400M_q8_0.gguf
+++ b/Biggie_SmolLM_400M_q8_0.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f43c27a28456e93ca9ec1e9488d9f197457948793fad5382572d95e498c700d7
 size 478351744
--- a/README.md
+++ b/README.md
@@ -0,0 +1,21 @@
 ---
 base_model: HuggingFaceTB/SmolLM-360M-Instruct
 ---
 ###Coherent Frankenstein of smolLm-0.36b upped to 0.4b
 This took about 5 hours of semi-automated continuous merging to figure out the recipe.
 Model is smarter, and UNTRAINED. Uploaded it for training. Yet it performs well as is even quantized to 8bit.
 8bit gguf included for testing.
 ```bash
 wget https://huggingface.co/nisten/Biggie-SmoLlm-0.4B/resolve/main/Biggie_SmolLM_400M_q8_0.gguf
 ```
 ```verilog
 ./llama-cli -ngl 99 -co --temp 0 -p "How to build a city on Mars via calculating Aldrin-Cycler orbits?" -m Biggie_SmolLM_400M_q8_0.gguf -cnv -fa --keep -1
 ```
 ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/XgF2kz3Zz0Jqz7BEVZ96h.png)
--- a/biggie_trained_2bit.gguf
+++ b/biggie_trained_2bit.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:06eb7e7b92b30a27a23f8efa1439f490c7bcc5e6f8895682bbe1830a048f5b50
 size 228460064
--- a/biggie_trained_4bit.gguf
+++ b/biggie_trained_4bit.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:003ae79c4a3ea090c4fed367ad411a29c81bb968cc5ad5df43db7a407669159b
 size 306892064
--- a/biggie_trained_5bit.gguf
+++ b/biggie_trained_5bit.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:efc6502de6a9ae1acb385ba703eb9e526ca3897ff6cb7089639b3756937b2ef2
 size 346866464
--- a/config.json
+++ b/config.json
@@ -0,0 +1,30 @@
 {
  "_name_or_path": "/Users/n/hf/SmolLM-360M-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 960,
  "initializer_range": 0.02,
  "intermediate_size": 2560,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 15,
  "num_hidden_layers": 36,
  "num_key_value_heads": 5,
  "pad_token_id": 2,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.43.3",
  "use_cache": true,
  "vocab_size": 49152
 }
--- a/merges.txt
+++ b/merges.txt
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:26cb4687dd1eea3cec627535feeb782ab0916a09ada9e5a8df70728c571455ca
 size 723767570
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,34 @@
 {
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "bos_token": {
    "content": "<|im_start|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,154 @@
 {
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<repo_name>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "4": {
      "content": "<reponame>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "5": {
      "content": "<file_sep>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "6": {
      "content": "<filename>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "7": {
      "content": "<gh_stars>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "8": {
      "content": "<issue_start>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "9": {
      "content": "<issue_comment>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "10": {
      "content": "<issue_closed>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "11": {
      "content": "<jupyter_start>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "12": {
      "content": "<jupyter_text>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "13": {
      "content": "<jupyter_code>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "14": {
      "content": "<jupyter_output>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "15": {
      "content": "<jupyter_script>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "16": {
      "content": "<empty_output>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "bos_token": "<|im_start|>",
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|im_end|>",
  "model_max_length": 2048,
  "pad_token": "<|im_end|>",
  "tokenizer_class": "GPT2Tokenizer",
  "unk_token": "<|endoftext|>",
  "vocab_size": 49152
 }
--- a/vocab.json
+++ b/vocab.json