初始化项目,由ModelHub XC社区提供模型

Model: RichardErkhov/apple_-_OpenELM-1_1B-gguf
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-04 04:10:17 +08:00
commit b155e64ec3
24 changed files with 357 additions and 0 deletions

57
.gitattributes vendored Normal file
View File

@@ -0,0 +1,57 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-1_1B.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text

3
OpenELM-1_1B.IQ3_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f69a8de904510c78eb42eb3b21c3a9b30d5c90e1647712c13b2006288df4a358
size 521805856

3
OpenELM-1_1B.IQ3_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:905d8da32fe98038843743b1cafcc6acc945d76155e423119998ecde5bb3616f
size 490784800

3
OpenELM-1_1B.IQ3_XS.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9daeda110f2bc58a5188893556f17fd8da51203251788215f45d596ebbc35f8c
size 469403680

3
OpenELM-1_1B.IQ4_NL.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5a02e3226ac97682b30f9529498f260cb0b82235a526877f56c89fdf42549547
size 626995232

3
OpenELM-1_1B.IQ4_XS.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b1c103322d78727eaedc90d92a2c3d8a1dee7cfa829e49aea8aaf37ef5bad1bc
size 595677216

3
OpenELM-1_1B.Q2_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9179907fdf9862e5d35a297f9a5f3df550de71763aed75d4493b33964eb59150
size 423612448

3
OpenELM-1_1B.Q3_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ffa20828175a6cf7776c2c1966b0aa91b8c6c6a54f50c6549e916b2ffefd194
size 555560992

3
OpenELM-1_1B.Q3_K_L.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6cbe1e251c097b428701a94b4b34c18ac88b4402a7866abfaad0ba45063006df
size 599404576

3
OpenELM-1_1B.Q3_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ffa20828175a6cf7776c2c1966b0aa91b8c6c6a54f50c6549e916b2ffefd194
size 555560992

3
OpenELM-1_1B.Q3_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8589ce4540b74d987212151966deaebb1a10cc5a98cab90eaa0df4b452786bfa
size 490784800

3
OpenELM-1_1B.Q4_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:88f114edd73e12d9ddeaa5117d12ca3952596124dc39ce7c70bb902d7560a5c3
size 625487904

3
OpenELM-1_1B.Q4_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7b407997d3e5226737410f555f107d2dbf2f1d8f352b043e20b1b468f3497a9b
size 688877600

3
OpenELM-1_1B.Q4_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:454e2f73370a8ec705180234e1ed9ea232e65f5556295ed6d2f915b312e77a13
size 672919584

3
OpenELM-1_1B.Q4_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:454e2f73370a8ec705180234e1ed9ea232e65f5556295ed6d2f915b312e77a13
size 672919584

3
OpenELM-1_1B.Q4_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1d08c8b9770f9ca5ad9c04231cb1be304156add6f66957f1bc4b45d564827c93
size 626995232

3
OpenELM-1_1B.Q5_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8e7eb34c5fe4e4458d8b857bda0083c16ca0439b0076518d6db8c3c0468805f6
size 752267296

3
OpenELM-1_1B.Q5_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2ff1512bee2a1d360c284783fcc98023cf040e6f2f62640dd9e67cd548e7ab16
size 815656992

3
OpenELM-1_1B.Q5_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5a93137e6827039f56fb0634f38c313e178e194f2b6fc5563cf6a58adf9ad079
size 785795104

3
OpenELM-1_1B.Q5_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5a93137e6827039f56fb0634f38c313e178e194f2b6fc5563cf6a58adf9ad079
size 785795104

3
OpenELM-1_1B.Q5_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a3794c5ef9f6cc7a784cc59ff3116a3427170250ade149213727d7284dcb2177
size 752267296

3
OpenELM-1_1B.Q6_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be817c4669234620e63854de570f14da99dae93ea6e7757a54ccc7e0dd312846
size 886970400

3
OpenELM-1_1B.Q8_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9c83a402a02ab424b5128bf22bd8c0bebf0b07a7cc771c55c72b11d833bec270
size 1148477472

234
README.md Normal file
View File

@@ -0,0 +1,234 @@
Quantization made by Richard Erkhov.
[Github](https://github.com/RichardErkhov)
[Discord](https://discord.gg/pvy7H8DZMG)
[Request more models](https://github.com/RichardErkhov/quant_request)
OpenELM-1_1B - GGUF
- Model creator: https://huggingface.co/apple/
- Original model: https://huggingface.co/apple/OpenELM-1_1B/
| Name | Quant method | Size |
| ---- | ---- | ---- |
| [OpenELM-1_1B.Q2_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q2_K.gguf) | Q2_K | 0.39GB |
| [OpenELM-1_1B.IQ3_XS.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.IQ3_XS.gguf) | IQ3_XS | 0.44GB |
| [OpenELM-1_1B.IQ3_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.IQ3_S.gguf) | IQ3_S | 0.46GB |
| [OpenELM-1_1B.Q3_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q3_K_S.gguf) | Q3_K_S | 0.46GB |
| [OpenELM-1_1B.IQ3_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.IQ3_M.gguf) | IQ3_M | 0.49GB |
| [OpenELM-1_1B.Q3_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q3_K.gguf) | Q3_K | 0.52GB |
| [OpenELM-1_1B.Q3_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q3_K_M.gguf) | Q3_K_M | 0.52GB |
| [OpenELM-1_1B.Q3_K_L.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q3_K_L.gguf) | Q3_K_L | 0.56GB |
| [OpenELM-1_1B.IQ4_XS.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.IQ4_XS.gguf) | IQ4_XS | 0.55GB |
| [OpenELM-1_1B.Q4_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q4_0.gguf) | Q4_0 | 0.58GB |
| [OpenELM-1_1B.IQ4_NL.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.IQ4_NL.gguf) | IQ4_NL | 0.58GB |
| [OpenELM-1_1B.Q4_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q4_K_S.gguf) | Q4_K_S | 0.58GB |
| [OpenELM-1_1B.Q4_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q4_K.gguf) | Q4_K | 0.63GB |
| [OpenELM-1_1B.Q4_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q4_K_M.gguf) | Q4_K_M | 0.63GB |
| [OpenELM-1_1B.Q4_1.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q4_1.gguf) | Q4_1 | 0.64GB |
| [OpenELM-1_1B.Q5_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q5_0.gguf) | Q5_0 | 0.7GB |
| [OpenELM-1_1B.Q5_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q5_K_S.gguf) | Q5_K_S | 0.7GB |
| [OpenELM-1_1B.Q5_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q5_K.gguf) | Q5_K | 0.73GB |
| [OpenELM-1_1B.Q5_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q5_K_M.gguf) | Q5_K_M | 0.73GB |
| [OpenELM-1_1B.Q5_1.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q5_1.gguf) | Q5_1 | 0.76GB |
| [OpenELM-1_1B.Q6_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q6_K.gguf) | Q6_K | 0.83GB |
| [OpenELM-1_1B.Q8_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-1_1B-gguf/blob/main/OpenELM-1_1B.Q8_0.gguf) | Q8_0 | 1.07GB |
Original model description:
---
license: other
license_name: apple-sample-code-license
license_link: LICENSE
---
# OpenELM
*Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, Mohammad Rastegari*
We introduce **OpenELM**, a family of **Open** **E**fficient **L**anguage **M**odels. OpenELM uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model, leading to enhanced accuracy. We pretrained OpenELM models using the [CoreNet](https://github.com/apple/corenet) library. We release both pretrained and instruction tuned models with 270M, 450M, 1.1B and 3B parameters.
Our pre-training dataset contains RefinedWeb, deduplicated PILE, a subset of RedPajama, and a subset of Dolma v1.6, totaling approximately 1.8 trillion tokens. Please check license agreements and terms of these datasets before using them.
## Usage
We have provided an example function to generate output from OpenELM models loaded via [HuggingFace Hub](https://huggingface.co/docs/hub/) in `generate_openelm.py`.
You can try the model by running the following command:
```
python generate_openelm.py --model apple/OpenELM-1_1B --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2
```
Please refer to [this link](https://huggingface.co/docs/hub/security-tokens) to obtain your hugging face access token.
Additional arguments to the hugging face generate function can be passed via `generate_kwargs`. As an example, to speedup the inference, you can try [lookup token speculative generation](https://huggingface.co/docs/transformers/generation_strategies) by passing the `prompt_lookup_num_tokens` argument as follows:
```
python generate_openelm.py --model apple/OpenELM-1_1B --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2 prompt_lookup_num_tokens=10
```
Alternatively, try model-wise speculative generation with an [assistive model](https://huggingface.co/blog/assisted-generation) by passing a smaller model through the `assistant_model` argument, for example:
```
python generate_openelm.py --model apple/OpenELM-1_1B --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2 --assistant_model [SMALLER_MODEL]
```
## Main Results
### Zero-Shot
| **Model Size** | **ARC-c** | **ARC-e** | **BoolQ** | **HellaSwag** | **PIQA** | **SciQ** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|-----------|-----------|---------------|-----------|-----------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 26.45 | 45.08 | **53.98** | 46.71 | 69.75 | **84.70** | **53.91** | 54.37 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **30.55** | **46.68** | 48.56 | **52.07** | **70.78** | 84.40 | 52.72 | **55.11** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 27.56 | 48.06 | 55.78 | 53.97 | 72.31 | 87.20 | 58.01 | 57.56 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **30.38** | **50.00** | **60.37** | **59.34** | **72.63** | **88.00** | **58.96** | **59.95** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 32.34 | **55.43** | 63.58 | 64.81 | **75.57** | **90.60** | 61.72 | 63.44 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **37.97** | 52.23 | **70.00** | **71.20** | 75.03 | 89.30 | **62.75** | **65.50** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 35.58 | 59.89 | 67.40 | 72.44 | 78.24 | **92.70** | 65.51 | 67.39 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **39.42** | **61.74** | **68.17** | **76.36** | **79.00** | 92.50 | **66.85** | **69.15** |
### LLM360
| **Model Size** | **ARC-c** | **HellaSwag** | **MMLU** | **TruthfulQA** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|---------------|-----------|----------------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 27.65 | 47.15 | 25.72 | **39.24** | **53.83** | 38.72 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **32.51** | **51.58** | **26.70** | 38.72 | 53.20 | **40.54** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 30.20 | 53.86 | **26.01** | 40.18 | 57.22 | 41.50 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **33.53** | **59.31** | 25.41 | **40.48** | **58.33** | **43.41** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 36.69 | 65.71 | **27.05** | 36.98 | 63.22 | 45.93 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **41.55** | **71.83** | 25.65 | **45.95** | **64.72** | **49.94** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 42.24 | 73.28 | **26.76** | 34.98 | 67.25 | 48.90 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **47.70** | **76.87** | 24.80 | **38.76** | **67.96** | **51.22** |
### OpenLLM Leaderboard
| **Model Size** | **ARC-c** | **CrowS-Pairs** | **HellaSwag** | **MMLU** | **PIQA** | **RACE** | **TruthfulQA** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|-----------------|---------------|-----------|-----------|-----------|----------------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 27.65 | **66.79** | 47.15 | 25.72 | 69.75 | 30.91 | **39.24** | **53.83** | 45.13 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **32.51** | 66.01 | **51.58** | **26.70** | **70.78** | 33.78 | 38.72 | 53.20 | **46.66** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 30.20 | **68.63** | 53.86 | **26.01** | 72.31 | 33.11 | 40.18 | 57.22 | 47.69 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **33.53** | 67.44 | **59.31** | 25.41 | **72.63** | **36.84** | **40.48** | **58.33** | **49.25** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 36.69 | **71.74** | 65.71 | **27.05** | **75.57** | 36.46 | 36.98 | 63.22 | 51.68 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **41.55** | 71.02 | **71.83** | 25.65 | 75.03 | **39.43** | **45.95** | **64.72** | **54.40** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 42.24 | **73.29** | 73.28 | **26.76** | 78.24 | **38.76** | 34.98 | 67.25 | 54.35 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **47.70** | 72.33 | **76.87** | 24.80 | **79.00** | 38.47 | **38.76** | **67.96** | **55.73** |
See the technical report for more results and comparison.
## Evaluation
### Setup
Install the following dependencies:
```bash
# install public lm-eval-harness
harness_repo="public-lm-eval-harness"
git clone https://github.com/EleutherAI/lm-evaluation-harness ${harness_repo}
cd ${harness_repo}
# use main branch on 03-15-2024, SHA is dc90fec
git checkout dc90fec
pip install -e .
cd ..
# 66d6242 is the main branch on 2024-04-01
pip install datasets@git+https://github.com/huggingface/datasets.git@66d6242
pip install tokenizers>=0.15.2 transformers>=4.38.2 sentencepiece>=0.2.0
```
### Evaluate OpenELM
```bash
# OpenELM-1_1B
hf_model=apple/OpenELM-1_1B
# this flag is needed because lm-eval-harness set add_bos_token to False by default, but OpenELM uses LLaMA tokenizer which requires add_bos_token to be True
tokenizer=meta-llama/Llama-2-7b-hf
add_bos_token=True
batch_size=1
mkdir lm_eval_output
shot=0
task=arc_challenge,arc_easy,boolq,hellaswag,piqa,race,winogrande,sciq,truthfulqa_mc2
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=5
task=mmlu,winogrande
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=25
task=arc_challenge,crows_pairs_english
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=10
task=hellaswag
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
```
## Bias, Risks, and Limitations
The release of OpenELM models aims to empower and enrich the open research community by providing access to state-of-the-art language models. Trained on publicly available datasets, these models are made available without any safety guarantees. Consequently, there exists the possibility of these models producing outputs that are inaccurate, harmful, biased, or objectionable in response to user prompts. Thus, it is imperative for users and developers to undertake thorough safety testing and implement appropriate filtering mechanisms tailored to their specific requirements.
## Citation
If you find our work useful, please cite:
```BibTex
@article{mehtaOpenELMEfficientLanguage2024,
title = {{OpenELM}: {An} {Efficient} {Language} {Model} {Family} with {Open} {Training} and {Inference} {Framework}},
shorttitle = {{OpenELM}},
url = {https://arxiv.org/abs/2404.14619v1},
language = {en},
urldate = {2024-04-24},
journal = {arXiv.org},
author = {Mehta, Sachin and Sekhavat, Mohammad Hossein and Cao, Qingqing and Horton, Maxwell and Jin, Yanzi and Sun, Chenfan and Mirzadeh, Iman and Najibi, Mahyar and Belenko, Dmitry and Zatloukal, Peter and Rastegari, Mohammad},
month = apr,
year = {2024},
}
@inproceedings{mehta2022cvnets,
author = {Mehta, Sachin and Abdolhosseini, Farzad and Rastegari, Mohammad},
title = {CVNets: High Performance Library for Computer Vision},
year = {2022},
booktitle = {Proceedings of the 30th ACM International Conference on Multimedia},
series = {MM '22}
}
```