初始化项目,由ModelHub XC社区提供模型

Model: ferrotorch/smollm-135m
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-04 17:40:17 +08:00
commit fe9de6e7f1
10 changed files with 98603 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

73
README.md Normal file
View File

@@ -0,0 +1,73 @@
---
license: apache-2.0
tags:
- text-generation
- llama
- ferrotorch
---
# `ferrotorch/smollm-135m`
SmolLM-135M (HuggingFaceTB/SmolLM-135M). Llama-architecture causal LM, 135M parameters, 30 layers / 9 q-heads / 3 kv-heads (GQA), hidden=576, intermediate=1536, vocab=49152, tie_word_embeddings=true, rope_theta=10000.0. Apache 2.0 license. Pinned as the real-artifact baseline for causal LM parity vs `transformers==4.50.3` (#1147).
## Provenance
* Upstream: `HuggingFaceTB/SmolLM-135M` (apache-2.0).
* Conversion script: [`ferrotorch/scripts/pin_pretrained_llm_weights.py`](https://github.com/dollspace/ferrotorch/blob/main/scripts/pin_pretrained_llm_weights.py).
* Ferrotorch issue: <https://github.com/dollspace/ferrotorch/issues/1147>.
* Number of trainable parameters: **134,515,008**.
* SHA-256 of `model.safetensors` (this file is pinned in
`ferrotorch-hub/src/registry.rs`): `c7a387d6fe81ca6dd304aeb809bda3932ff1bbef3ca41c9484502f2f448dc093`.
* Config snapshot: hidden=576, layers=30,
heads=9, kv_heads=3,
intermediate=1536, vocab=49152,
tie_word_embeddings=True,
rope_theta=10000.0,
rms_norm_eps=1e-05.
## Value-parity probe
Two extra files are uploaded so the ferrotorch-side harness can
reproduce the parity verdict without re-running the upstream
transformers model:
* `_value_parity_input.txt` — the verbatim prompt string the
harness tokenizes (`"The quick brown fox jumps over the lazy"`).
* `_value_parity_token_ids.json` — the tokenizer's output for that
prompt (with the upstream tokenizer's `add_special_tokens=True`).
* `_value_parity_output.bin` — float32 logits dumped from a fresh
`transformers.AutoModelForCausalLM.from_pretrained(..., torch_dtype=float32)`
single-prefill forward pass on those token ids (no cache).
Format: `[u32 ndim][u32 × ndim shape][f32 × prod(shape) data]`
little-endian; identical layout to the vision-side dumps.
## How to load
```rust
use ferrotorch_hub::load_pretrained;
use ferrotorch_llama::{LlamaConfig, LlamaForCausalLM};
use ferrotorch_hub::HfTransformerConfig;
let state = load_pretrained::<f32>("smollm-135m")?;
let hf_cfg = HfTransformerConfig::from_file("config.json")?;
let cfg = LlamaConfig::from_hf(&hf_cfg)?;
let mut model = LlamaForCausalLM::<f32>::new(cfg)?;
model.load_hf_state_dict(&state, /* strict = */ true)?;
```
## Upstream license
```
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
```

1
_value_parity_input.txt Normal file
View File

@@ -0,0 +1 @@
The quick brown fox jumps over the lazy

3
_value_parity_output.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c81524998b96290832c3dd8ae62b4c910ea9a561b350f55e2e4f9c14f5cc2b8d
size 1572880

View File

@@ -0,0 +1 @@
[504, 2365, 6354, 16438, 27003, 690, 260, 23790]

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "/fsx/elie_bakouch/checkpoints/final-149M/600000",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 0,
"eos_token_id": 0,
"hidden_act": "silu",
"hidden_size": 576,
"initializer_range": 0.02,
"intermediate_size": 1536,
"max_position_embeddings": 2048,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 9,
"num_hidden_layers": 30,
"num_key_value_heads": 3,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.41.2",
"use_cache": true,
"vocab_size": 49152
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c7a387d6fe81ca6dd304aeb809bda3932ff1bbef3ca41c9484502f2f448dc093
size 538090408

42
special_tokens_map.json Normal file
View File

@@ -0,0 +1,42 @@
{
"additional_special_tokens": [
"<|endoftext|>",
"<|im_start|>",
"<|im_end|>",
"<repo_name>",
"<reponame>",
"<file_sep>",
"<filename>",
"<gh_stars>",
"<issue_start>",
"<issue_comment>",
"<issue_closed>",
"<jupyter_start>",
"<jupyter_text>",
"<jupyter_code>",
"<jupyter_output>",
"<jupyter_script>",
"<empty_output>"
],
"bos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

98249
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

167
tokenizer_config.json Normal file
View File

@@ -0,0 +1,167 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<repo_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<reponame>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<file_sep>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<filename>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<gh_stars>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<issue_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<issue_comment>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "<issue_closed>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "<jupyter_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "<jupyter_text>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "<jupyter_code>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "<jupyter_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "<jupyter_script>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "<empty_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|endoftext|>",
"<|im_start|>",
"<|im_end|>",
"<repo_name>",
"<reponame>",
"<file_sep>",
"<filename>",
"<gh_stars>",
"<issue_start>",
"<issue_comment>",
"<issue_closed>",
"<jupyter_start>",
"<jupyter_text>",
"<jupyter_code>",
"<jupyter_output>",
"<jupyter_script>",
"<empty_output>"
],
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|endoftext|>",
"model_max_length": 1000000000000000019884624838656,
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>",
"vocab_size": 49152
}