初始化项目,由ModelHub XC社区提供模型

Model: RichardErkhov/apple_-_OpenELM-270M-gguf
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-04 04:19:20 +08:00
commit 4711dfa477
24 changed files with 357 additions and 0 deletions

57
.gitattributes vendored Normal file
View File

@@ -0,0 +1,57 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
OpenELM-270M.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text

3
OpenELM-270M.IQ3_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:41a5a96be8430ffe0a7abf94400fa9bd6af68cb2d05832b47718626d573b8a11
size 140721824

3
OpenELM-270M.IQ3_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5e13c5db8cda0f991b209721a46fd9a4c7f50697b2c8a84039b950fa298968e5
size 133562784

3
OpenELM-270M.IQ3_XS.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c6772ab627a4367f827e218707db9bfc74220fc23fbfee5157ab6756326f9441
size 128954784

3
OpenELM-270M.IQ4_NL.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4c2b6ac4a6d191f8d3d36bc893942c94ceacd5568a56c98d00e2eca2ae44268a
size 164465824

3
OpenELM-270M.IQ4_XS.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5a81743fd2d9050ac04cc3d53f43f487c14d327dd42f32d340d1a2087214e7be
size 157333664

3
OpenELM-270M.Q2_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e27b7abf90c1c25f8ac74be5aa975f088e65be3e26094b7c0ffbd66ba3f2cb7
size 118303904

3
OpenELM-270M.Q3_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:81e844ebba88f6c69b4e3aaea6bcfe30efcd7e481ab242182205e3d204b25987
size 148460704

3
OpenELM-270M.Q3_K_L.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7e6f7a435955b8c16aaa355984ca7af41811d21108b40ad473fcaf414cdbe716
size 158373024

3
OpenELM-270M.Q3_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:81e844ebba88f6c69b4e3aaea6bcfe30efcd7e481ab242182205e3d204b25987
size 148460704

3
OpenELM-270M.Q3_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2299e69ebabbfd0a2909b0d9a23aaf59d8f0f5934b7dcfaf55e6d40f05ec5601
size 133562784

3
OpenELM-270M.Q4_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2ea769d5f685a34e6ba1ebab3ff68d1b15445b8052356a51b80037476addc520
size 164179104

3
OpenELM-270M.Q4_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f49fb963c74142788abc0abdd16f5ad1e478cb479d4cc4c56b71b9cefeaf5dae
size 178586784

3
OpenELM-270M.Q4_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6bb72b05e63b2f1878acde5c2c98cba3e1835b3018e5c6ccdd1b2619d8f66b6a
size 175399584

3
OpenELM-270M.Q4_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6bb72b05e63b2f1878acde5c2c98cba3e1835b3018e5c6ccdd1b2619d8f66b6a
size 175399584

3
OpenELM-270M.Q4_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b4b9ac122a13f37f04555f8772fc47189b185d261666400a24cf433cd9dba317
size 164465824

3
OpenELM-270M.Q5_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dcc6d743cf121b2c6abca276e8f0ce4c0c284c8743dc274e77b7de0b5f4d1085
size 192994464

3
OpenELM-270M.Q5_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d35a3f4282ad3eb6640ecff5972eeb14a6e4ef5d350d0a59e627aa78afb0b203
size 207402144

3
OpenELM-270M.Q5_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f2564878e0bb0d8382f2da5dc8b1d175fa49f7f043a4024910e6184eaae16d06
size 200893344

3
OpenELM-270M.Q5_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f2564878e0bb0d8382f2da5dc8b1d175fa49f7f043a4024910e6184eaae16d06
size 200893344

3
OpenELM-270M.Q5_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:02559fb4dfc94e7f79e70736b0ed88005cc3a37966928952d0390f4cbf22298f
size 192994464

3
OpenELM-270M.Q6_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e395d378777990b2a87b2b7a0188a882be46cc61d2e3184185bf3c0575727b7
size 223610784

3
OpenELM-270M.Q8_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5510c60cbff63becab589480c5eb175798f60db06752fe05ddde4572d6432c93
size 289360544

234
README.md Normal file
View File

@@ -0,0 +1,234 @@
Quantization made by Richard Erkhov.
[Github](https://github.com/RichardErkhov)
[Discord](https://discord.gg/pvy7H8DZMG)
[Request more models](https://github.com/RichardErkhov/quant_request)
OpenELM-270M - GGUF
- Model creator: https://huggingface.co/apple/
- Original model: https://huggingface.co/apple/OpenELM-270M/
| Name | Quant method | Size |
| ---- | ---- | ---- |
| [OpenELM-270M.Q2_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q2_K.gguf) | Q2_K | 0.11GB |
| [OpenELM-270M.IQ3_XS.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.IQ3_XS.gguf) | IQ3_XS | 0.12GB |
| [OpenELM-270M.IQ3_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.IQ3_S.gguf) | IQ3_S | 0.12GB |
| [OpenELM-270M.Q3_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q3_K_S.gguf) | Q3_K_S | 0.12GB |
| [OpenELM-270M.IQ3_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.IQ3_M.gguf) | IQ3_M | 0.13GB |
| [OpenELM-270M.Q3_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q3_K.gguf) | Q3_K | 0.14GB |
| [OpenELM-270M.Q3_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q3_K_M.gguf) | Q3_K_M | 0.14GB |
| [OpenELM-270M.Q3_K_L.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q3_K_L.gguf) | Q3_K_L | 0.15GB |
| [OpenELM-270M.IQ4_XS.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.IQ4_XS.gguf) | IQ4_XS | 0.15GB |
| [OpenELM-270M.Q4_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q4_0.gguf) | Q4_0 | 0.15GB |
| [OpenELM-270M.IQ4_NL.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.IQ4_NL.gguf) | IQ4_NL | 0.15GB |
| [OpenELM-270M.Q4_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q4_K_S.gguf) | Q4_K_S | 0.15GB |
| [OpenELM-270M.Q4_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q4_K.gguf) | Q4_K | 0.16GB |
| [OpenELM-270M.Q4_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q4_K_M.gguf) | Q4_K_M | 0.16GB |
| [OpenELM-270M.Q4_1.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q4_1.gguf) | Q4_1 | 0.17GB |
| [OpenELM-270M.Q5_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q5_0.gguf) | Q5_0 | 0.18GB |
| [OpenELM-270M.Q5_K_S.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q5_K_S.gguf) | Q5_K_S | 0.18GB |
| [OpenELM-270M.Q5_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q5_K.gguf) | Q5_K | 0.19GB |
| [OpenELM-270M.Q5_K_M.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q5_K_M.gguf) | Q5_K_M | 0.19GB |
| [OpenELM-270M.Q5_1.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q5_1.gguf) | Q5_1 | 0.19GB |
| [OpenELM-270M.Q6_K.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q6_K.gguf) | Q6_K | 0.21GB |
| [OpenELM-270M.Q8_0.gguf](https://huggingface.co/RichardErkhov/apple_-_OpenELM-270M-gguf/blob/main/OpenELM-270M.Q8_0.gguf) | Q8_0 | 0.27GB |
Original model description:
---
license: other
license_name: apple-sample-code-license
license_link: LICENSE
---
# OpenELM
*Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, Mohammad Rastegari*
We introduce **OpenELM**, a family of **Open** **E**fficient **L**anguage **M**odels. OpenELM uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model, leading to enhanced accuracy. We pretrained OpenELM models using the [CoreNet](https://github.com/apple/corenet) library. We release both pretrained and instruction tuned models with 270M, 450M, 1.1B and 3B parameters.
Our pre-training dataset contains RefinedWeb, deduplicated PILE, a subset of RedPajama, and a subset of Dolma v1.6, totaling approximately 1.8 trillion tokens. Please check license agreements and terms of these datasets before using them.
## Usage
We have provided an example function to generate output from OpenELM models loaded via [HuggingFace Hub](https://huggingface.co/docs/hub/) in `generate_openelm.py`.
You can try the model by running the following command:
```
python generate_openelm.py --model apple/OpenELM-270M --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2
```
Please refer to [this link](https://huggingface.co/docs/hub/security-tokens) to obtain your hugging face access token.
Additional arguments to the hugging face generate function can be passed via `generate_kwargs`. As an example, to speedup the inference, you can try [lookup token speculative generation](https://huggingface.co/docs/transformers/generation_strategies) by passing the `prompt_lookup_num_tokens` argument as follows:
```
python generate_openelm.py --model apple/OpenELM-270M --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2 prompt_lookup_num_tokens=10
```
Alternatively, try model-wise speculative generation with an [assistive model](https://huggingface.co/blog/assisted-generation) by passing a smaller model through the `assistant_model` argument, for example:
```
python generate_openelm.py --model apple/OpenELM-270M --hf_access_token [HF_ACCESS_TOKEN] --prompt 'Once upon a time there was' --generate_kwargs repetition_penalty=1.2 --assistant_model [SMALLER_MODEL]
```
## Main Results
### Zero-Shot
| **Model Size** | **ARC-c** | **ARC-e** | **BoolQ** | **HellaSwag** | **PIQA** | **SciQ** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|-----------|-----------|---------------|-----------|-----------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 26.45 | 45.08 | **53.98** | 46.71 | 69.75 | **84.70** | **53.91** | 54.37 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **30.55** | **46.68** | 48.56 | **52.07** | **70.78** | 84.40 | 52.72 | **55.11** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 27.56 | 48.06 | 55.78 | 53.97 | 72.31 | 87.20 | 58.01 | 57.56 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **30.38** | **50.00** | **60.37** | **59.34** | **72.63** | **88.00** | **58.96** | **59.95** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 32.34 | **55.43** | 63.58 | 64.81 | **75.57** | **90.60** | 61.72 | 63.44 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **37.97** | 52.23 | **70.00** | **71.20** | 75.03 | 89.30 | **62.75** | **65.50** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 35.58 | 59.89 | 67.40 | 72.44 | 78.24 | **92.70** | 65.51 | 67.39 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **39.42** | **61.74** | **68.17** | **76.36** | **79.00** | 92.50 | **66.85** | **69.15** |
### LLM360
| **Model Size** | **ARC-c** | **HellaSwag** | **MMLU** | **TruthfulQA** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|---------------|-----------|----------------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 27.65 | 47.15 | 25.72 | **39.24** | **53.83** | 38.72 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **32.51** | **51.58** | **26.70** | 38.72 | 53.20 | **40.54** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 30.20 | 53.86 | **26.01** | 40.18 | 57.22 | 41.50 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **33.53** | **59.31** | 25.41 | **40.48** | **58.33** | **43.41** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 36.69 | 65.71 | **27.05** | 36.98 | 63.22 | 45.93 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **41.55** | **71.83** | 25.65 | **45.95** | **64.72** | **49.94** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 42.24 | 73.28 | **26.76** | 34.98 | 67.25 | 48.90 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **47.70** | **76.87** | 24.80 | **38.76** | **67.96** | **51.22** |
### OpenLLM Leaderboard
| **Model Size** | **ARC-c** | **CrowS-Pairs** | **HellaSwag** | **MMLU** | **PIQA** | **RACE** | **TruthfulQA** | **WinoGrande** | **Average** |
|-----------------------------------------------------------------------------|-----------|-----------------|---------------|-----------|-----------|-----------|----------------|----------------|-------------|
| [OpenELM-270M](https://huggingface.co/apple/OpenELM-270M) | 27.65 | **66.79** | 47.15 | 25.72 | 69.75 | 30.91 | **39.24** | **53.83** | 45.13 |
| [OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) | **32.51** | 66.01 | **51.58** | **26.70** | **70.78** | 33.78 | 38.72 | 53.20 | **46.66** |
| [OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) | 30.20 | **68.63** | 53.86 | **26.01** | 72.31 | 33.11 | 40.18 | 57.22 | 47.69 |
| [OpenELM-450M-Instruct](https://huggingface.co/apple/OpenELM-450M-Instruct) | **33.53** | 67.44 | **59.31** | 25.41 | **72.63** | **36.84** | **40.48** | **58.33** | **49.25** |
| [OpenELM-1_1B](https://huggingface.co/apple/OpenELM-1_1B) | 36.69 | **71.74** | 65.71 | **27.05** | **75.57** | 36.46 | 36.98 | 63.22 | 51.68 |
| [OpenELM-1_1B-Instruct](https://huggingface.co/apple/OpenELM-1_1B-Instruct) | **41.55** | 71.02 | **71.83** | 25.65 | 75.03 | **39.43** | **45.95** | **64.72** | **54.40** |
| [OpenELM-3B](https://huggingface.co/apple/OpenELM-3B) | 42.24 | **73.29** | 73.28 | **26.76** | 78.24 | **38.76** | 34.98 | 67.25 | 54.35 |
| [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) | **47.70** | 72.33 | **76.87** | 24.80 | **79.00** | 38.47 | **38.76** | **67.96** | **55.73** |
See the technical report for more results and comparison.
## Evaluation
### Setup
Install the following dependencies:
```bash
# install public lm-eval-harness
harness_repo="public-lm-eval-harness"
git clone https://github.com/EleutherAI/lm-evaluation-harness ${harness_repo}
cd ${harness_repo}
# use main branch on 03-15-2024, SHA is dc90fec
git checkout dc90fec
pip install -e .
cd ..
# 66d6242 is the main branch on 2024-04-01
pip install datasets@git+https://github.com/huggingface/datasets.git@66d6242
pip install tokenizers>=0.15.2 transformers>=4.38.2 sentencepiece>=0.2.0
```
### Evaluate OpenELM
```bash
# OpenELM-270M
hf_model=apple/OpenELM-270M
# this flag is needed because lm-eval-harness set add_bos_token to False by default, but OpenELM uses LLaMA tokenizer which requires add_bos_token to be True
tokenizer=meta-llama/Llama-2-7b-hf
add_bos_token=True
batch_size=1
mkdir lm_eval_output
shot=0
task=arc_challenge,arc_easy,boolq,hellaswag,piqa,race,winogrande,sciq,truthfulqa_mc2
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=5
task=mmlu,winogrande
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=25
task=arc_challenge,crows_pairs_english
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
shot=10
task=hellaswag
lm_eval --model hf \
--model_args pretrained=${hf_model},trust_remote_code=True,add_bos_token=${add_bos_token},tokenizer=${tokenizer} \
--tasks ${task} \
--device cuda:0 \
--num_fewshot ${shot} \
--output_path ./lm_eval_output/${hf_model//\//_}_${task//,/_}-${shot}shot \
--batch_size ${batch_size} 2>&1 | tee ./lm_eval_output/eval-${hf_model//\//_}_${task//,/_}-${shot}shot.log
```
## Bias, Risks, and Limitations
The release of OpenELM models aims to empower and enrich the open research community by providing access to state-of-the-art language models. Trained on publicly available datasets, these models are made available without any safety guarantees. Consequently, there exists the possibility of these models producing outputs that are inaccurate, harmful, biased, or objectionable in response to user prompts. Thus, it is imperative for users and developers to undertake thorough safety testing and implement appropriate filtering mechanisms tailored to their specific requirements.
## Citation
If you find our work useful, please cite:
```BibTex
@article{mehtaOpenELMEfficientLanguage2024,
title = {{OpenELM}: {An} {Efficient} {Language} {Model} {Family} with {Open} {Training} and {Inference} {Framework}},
shorttitle = {{OpenELM}},
url = {https://arxiv.org/abs/2404.14619v1},
language = {en},
urldate = {2024-04-24},
journal = {arXiv.org},
author = {Mehta, Sachin and Sekhavat, Mohammad Hossein and Cao, Qingqing and Horton, Maxwell and Jin, Yanzi and Sun, Chenfan and Mirzadeh, Iman and Najibi, Mahyar and Belenko, Dmitry and Zatloukal, Peter and Rastegari, Mohammad},
month = apr,
year = {2024},
}
@inproceedings{mehta2022cvnets,
author = {Mehta, Sachin and Abdolhosseini, Farzad and Rastegari, Mohammad},
title = {CVNets: High Performance Library for Computer Vision},
year = {2022},
booktitle = {Proceedings of the 30th ACM International Conference on Multimedia},
series = {MM '22}
}
```