commit b6cddb76a6709aefe5e80f15ef9bd92dc9beb6b3 Author: ModelHub XC Date: Wed Jun 3 20:44:13 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: jondurbin/airoboros-13b Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..53d7257 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,47 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/IMG_0128.jpeg b/IMG_0128.jpeg new file mode 100644 index 0000000..88a6bf7 Binary files /dev/null and b/IMG_0128.jpeg differ diff --git a/IMG_0130.jpeg b/IMG_0130.jpeg new file mode 100644 index 0000000..e403cee Binary files /dev/null and b/IMG_0130.jpeg differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..09445fa --- /dev/null +++ b/README.md @@ -0,0 +1,264 @@ +--- +license: cc-by-nc-4.0 +--- + +# Overview + +This is a fine-tuned 13b parameter LlaMa model, using completely synthetic training data created by https://github.com/jondurbin/airoboros + +__*I don't recommend using this model! The outputs aren't particularly great, and it may contain "harmful" data due to jailbreak*__ + +Please see one of the updated airoboros models for a much better experience. + +### Eval (gpt4 judging) + +![chart](meta-chart.png) + +| model | raw score | gpt-3.5 adjusted score | +| --- | --- | --- | +| __airoboros-13b__ | __17947__ | __98.087__ | +| gpt35 | 18297 | 100.0 | +| gpt4-x-alpasta-30b | 15612 | 85.33 | +| manticore-13b | 15856 | 86.66 | +| vicuna-13b-1.1 | 16306 | 89.12 | +| wizard-vicuna-13b-uncensored | 16287 | 89.01 | + +
+ individual question scores, with shareGPT links (200 prompts generated by gpt-4) + + *wb-13b-u is Wizard-Vicuna-13b-Uncensored* + +| airoboros-13b | gpt35 | gpt4-x-alpasta-30b | manticore-13b | vicuna-13b-1.1 | wv-13b-u | link | +|----------------:|--------:|---------------------:|----------------:|-----------------:|-------------------------------:|:---------------------------------------| +| 80 | 95 | 70 | 90 | 85 | 60 | [eval](https://sharegpt.com/c/PIbRQD3) | +| 20 | 95 | 40 | 30 | 90 | 80 | [eval](https://sharegpt.com/c/fSzwzzd) | +| 100 | 100 | 100 | 95 | 95 | 100 | [eval](https://sharegpt.com/c/AXMzZiO) | +| 90 | 100 | 85 | 60 | 95 | 100 | [eval](https://sharegpt.com/c/7obzJm2) | +| 95 | 90 | 80 | 85 | 95 | 75 | [eval](https://sharegpt.com/c/cRpj6M1) | +| 100 | 95 | 90 | 95 | 98 | 92 | [eval](https://sharegpt.com/c/p0by1T7) | +| 50 | 100 | 80 | 95 | 60 | 55 | [eval](https://sharegpt.com/c/rowNlKx) | +| 70 | 90 | 80 | 60 | 85 | 40 | [eval](https://sharegpt.com/c/I4POj4I) | +| 100 | 95 | 50 | 85 | 40 | 60 | [eval](https://sharegpt.com/c/gUAeiRp) | +| 85 | 60 | 55 | 65 | 50 | 70 | [eval](https://sharegpt.com/c/Lgw4QQL) | +| 95 | 100 | 85 | 90 | 60 | 75 | [eval](https://sharegpt.com/c/X9tDYft) | +| 100 | 95 | 70 | 80 | 50 | 85 | [eval](https://sharegpt.com/c/9V2ElkH) | +| 100 | 95 | 80 | 70 | 60 | 90 | [eval](https://sharegpt.com/c/D5xg6qt) | +| 95 | 100 | 70 | 85 | 90 | 90 | [eval](https://sharegpt.com/c/lQnSfDs) | +| 80 | 95 | 90 | 60 | 30 | 85 | [eval](https://sharegpt.com/c/1hpHGNc) | +| 60 | 95 | 0 | 75 | 50 | 40 | [eval](https://sharegpt.com/c/an6TqE4) | +| 100 | 95 | 90 | 98 | 95 | 95 | [eval](https://sharegpt.com/c/7vr6n3F) | +| 60 | 85 | 40 | 50 | 20 | 0 | [eval](https://sharegpt.com/c/TOkMkgE) | +| 100 | 90 | 85 | 95 | 95 | 80 | [eval](https://sharegpt.com/c/Qu7ak0r) | +| 100 | 95 | 100 | 95 | 90 | 95 | [eval](https://sharegpt.com/c/hMD4gPo) | +| 95 | 90 | 96 | 80 | 92 | 88 | [eval](https://sharegpt.com/c/HTlicNh) | +| 95 | 92 | 90 | 93 | 89 | 91 | [eval](https://sharegpt.com/c/MjxHpAf) | +| 95 | 93 | 90 | 94 | 96 | 92 | [eval](https://sharegpt.com/c/4RvxOR9) | +| 95 | 90 | 93 | 88 | 92 | 85 | [eval](https://sharegpt.com/c/PcAIU9r) | +| 95 | 90 | 85 | 96 | 88 | 92 | [eval](https://sharegpt.com/c/MMqul3q) | +| 95 | 95 | 90 | 93 | 92 | 91 | [eval](https://sharegpt.com/c/YQsLyzJ) | +| 95 | 98 | 80 | 97 | 99 | 96 | [eval](https://sharegpt.com/c/UDhSTMq) | +| 95 | 93 | 90 | 87 | 92 | 89 | [eval](https://sharegpt.com/c/4gCfdCV) | +| 90 | 85 | 95 | 80 | 92 | 75 | [eval](https://sharegpt.com/c/bkQs4SP) | +| 90 | 85 | 95 | 93 | 80 | 92 | [eval](https://sharegpt.com/c/LeLCEEt) | +| 95 | 92 | 90 | 91 | 93 | 89 | [eval](https://sharegpt.com/c/DFxNzVu) | +| 100 | 95 | 90 | 85 | 80 | 95 | [eval](https://sharegpt.com/c/gnVzNML) | +| 95 | 97 | 93 | 92 | 96 | 94 | [eval](https://sharegpt.com/c/y7pxMIy) | +| 95 | 93 | 94 | 90 | 88 | 92 | [eval](https://sharegpt.com/c/5UeCvTY) | +| 90 | 95 | 98 | 85 | 96 | 92 | [eval](https://sharegpt.com/c/T4oL9I5) | +| 90 | 88 | 85 | 80 | 82 | 84 | [eval](https://sharegpt.com/c/HnGyTAG) | +| 90 | 95 | 85 | 87 | 92 | 88 | [eval](https://sharegpt.com/c/ZbRMBNj) | +| 95 | 97 | 96 | 90 | 93 | 92 | [eval](https://sharegpt.com/c/iTmFJqd) | +| 95 | 93 | 92 | 90 | 89 | 91 | [eval](https://sharegpt.com/c/VuPifET) | +| 90 | 95 | 93 | 92 | 94 | 91 | [eval](https://sharegpt.com/c/AvFAH1x) | +| 90 | 85 | 95 | 80 | 88 | 75 | [eval](https://sharegpt.com/c/4ealKGN) | +| 85 | 90 | 95 | 88 | 92 | 80 | [eval](https://sharegpt.com/c/bE1b2vX) | +| 90 | 95 | 92 | 85 | 80 | 87 | [eval](https://sharegpt.com/c/I3nMPBC) | +| 85 | 90 | 95 | 80 | 88 | 75 | [eval](https://sharegpt.com/c/as7r3bW) | +| 85 | 80 | 75 | 90 | 70 | 82 | [eval](https://sharegpt.com/c/qYceaUa) | +| 90 | 85 | 95 | 92 | 93 | 80 | [eval](https://sharegpt.com/c/g4FXchU) | +| 90 | 95 | 75 | 85 | 80 | 70 | [eval](https://sharegpt.com/c/6kGLvL5) | +| 85 | 90 | 80 | 88 | 82 | 83 | [eval](https://sharegpt.com/c/SRozqaF) | +| 85 | 90 | 95 | 92 | 88 | 80 | [eval](https://sharegpt.com/c/GoKydf6) | +| 85 | 90 | 80 | 75 | 95 | 88 | [eval](https://sharegpt.com/c/37aXkHQ) | +| 85 | 90 | 80 | 88 | 84 | 92 | [eval](https://sharegpt.com/c/nVuUaTj) | +| 80 | 90 | 75 | 85 | 70 | 95 | [eval](https://sharegpt.com/c/TkAQKLC) | +| 90 | 88 | 85 | 80 | 92 | 83 | [eval](https://sharegpt.com/c/55cO2y0) | +| 85 | 75 | 90 | 80 | 78 | 88 | [eval](https://sharegpt.com/c/tXtq5lT) | +| 85 | 90 | 80 | 82 | 75 | 88 | [eval](https://sharegpt.com/c/TfMjeJQ) | +| 90 | 85 | 40 | 95 | 80 | 88 | [eval](https://sharegpt.com/c/2jQ6K2S) | +| 85 | 95 | 90 | 75 | 88 | 80 | [eval](https://sharegpt.com/c/aQtr2ca) | +| 85 | 95 | 90 | 92 | 89 | 88 | [eval](https://sharegpt.com/c/tbWLyZ7) | +| 80 | 85 | 75 | 60 | 90 | 70 | [eval](https://sharegpt.com/c/moHC7i2) | +| 85 | 90 | 87 | 80 | 88 | 75 | [eval](https://sharegpt.com/c/GK6GShh) | +| 85 | 80 | 75 | 50 | 90 | 80 | [eval](https://sharegpt.com/c/ugcW4qG) | +| 95 | 80 | 90 | 85 | 75 | 82 | [eval](https://sharegpt.com/c/WL8iq6F) | +| 85 | 90 | 80 | 70 | 95 | 88 | [eval](https://sharegpt.com/c/TZJKnvS) | +| 90 | 95 | 70 | 85 | 80 | 75 | [eval](https://sharegpt.com/c/beNOKb5) | +| 90 | 85 | 70 | 75 | 80 | 60 | [eval](https://sharegpt.com/c/o2oRCF5) | +| 95 | 90 | 70 | 50 | 85 | 80 | [eval](https://sharegpt.com/c/TNjbK6D) | +| 80 | 85 | 40 | 60 | 90 | 95 | [eval](https://sharegpt.com/c/rJvszWJ) | +| 75 | 60 | 80 | 55 | 70 | 85 | [eval](https://sharegpt.com/c/HJwRkro) | +| 90 | 85 | 60 | 50 | 80 | 95 | [eval](https://sharegpt.com/c/AeFoSDK) | +| 45 | 85 | 60 | 20 | 65 | 75 | [eval](https://sharegpt.com/c/KA1cgOl) | +| 85 | 90 | 30 | 60 | 80 | 70 | [eval](https://sharegpt.com/c/RTy8n0y) | +| 90 | 95 | 80 | 40 | 85 | 70 | [eval](https://sharegpt.com/c/PJMJoXh) | +| 85 | 90 | 70 | 75 | 80 | 95 | [eval](https://sharegpt.com/c/Ib3jzyC) | +| 90 | 70 | 50 | 20 | 60 | 40 | [eval](https://sharegpt.com/c/oMmqqtX) | +| 90 | 95 | 75 | 60 | 85 | 80 | [eval](https://sharegpt.com/c/qRNhNTw) | +| 85 | 80 | 60 | 70 | 65 | 75 | [eval](https://sharegpt.com/c/3MAHQIy) | +| 90 | 85 | 80 | 75 | 82 | 70 | [eval](https://sharegpt.com/c/0Emc5HS) | +| 90 | 95 | 80 | 70 | 85 | 75 | [eval](https://sharegpt.com/c/UqAxRWF) | +| 85 | 75 | 30 | 80 | 90 | 70 | [eval](https://sharegpt.com/c/eywxGAw) | +| 85 | 90 | 50 | 70 | 80 | 60 | [eval](https://sharegpt.com/c/A2KSEWP) | +| 100 | 95 | 98 | 99 | 97 | 96 | [eval](https://sharegpt.com/c/C8rebQf) | +| 95 | 90 | 92 | 93 | 91 | 89 | [eval](https://sharegpt.com/c/cd9HF4V) | +| 95 | 92 | 90 | 85 | 88 | 91 | [eval](https://sharegpt.com/c/LHkjvQJ) | +| 100 | 95 | 98 | 97 | 96 | 99 | [eval](https://sharegpt.com/c/o5PdoyZ) | +| 100 | 100 | 100 | 90 | 100 | 95 | [eval](https://sharegpt.com/c/rh8pZVg) | +| 100 | 95 | 98 | 97 | 94 | 99 | [eval](https://sharegpt.com/c/T5DYL83) | +| 95 | 90 | 92 | 93 | 94 | 91 | [eval](https://sharegpt.com/c/G5Osg3X) | +| 100 | 95 | 98 | 90 | 96 | 95 | [eval](https://sharegpt.com/c/9ZqI03V) | +| 95 | 96 | 92 | 90 | 89 | 93 | [eval](https://sharegpt.com/c/4tFfwZU) | +| 100 | 95 | 93 | 90 | 92 | 88 | [eval](https://sharegpt.com/c/mG1JqPH) | +| 100 | 100 | 98 | 97 | 99 | 100 | [eval](https://sharegpt.com/c/VDdtgCu) | +| 95 | 90 | 92 | 85 | 93 | 94 | [eval](https://sharegpt.com/c/uKtGkvg) | +| 95 | 93 | 90 | 92 | 96 | 91 | [eval](https://sharegpt.com/c/9B92N6P) | +| 95 | 96 | 92 | 90 | 93 | 91 | [eval](https://sharegpt.com/c/GeIFfOu) | +| 95 | 90 | 92 | 93 | 91 | 89 | [eval](https://sharegpt.com/c/gn3E9nN) | +| 100 | 98 | 95 | 97 | 96 | 99 | [eval](https://sharegpt.com/c/Erxa46H) | +| 90 | 95 | 85 | 88 | 92 | 87 | [eval](https://sharegpt.com/c/oRHVOvK) | +| 95 | 93 | 90 | 92 | 89 | 88 | [eval](https://sharegpt.com/c/ghtKLUX) | +| 100 | 95 | 97 | 90 | 96 | 94 | [eval](https://sharegpt.com/c/ZL4KjqP) | +| 95 | 93 | 90 | 92 | 94 | 91 | [eval](https://sharegpt.com/c/YOnqIQa) | +| 95 | 92 | 90 | 93 | 94 | 88 | [eval](https://sharegpt.com/c/3BKwKho) | +| 95 | 92 | 60 | 97 | 90 | 96 | [eval](https://sharegpt.com/c/U1i31bn) | +| 95 | 90 | 92 | 93 | 91 | 89 | [eval](https://sharegpt.com/c/etfRoAE) | +| 95 | 90 | 97 | 92 | 91 | 93 | [eval](https://sharegpt.com/c/B0OpVxR) | +| 90 | 95 | 93 | 85 | 92 | 91 | [eval](https://sharegpt.com/c/MBgGJ5A) | +| 95 | 90 | 40 | 92 | 93 | 85 | [eval](https://sharegpt.com/c/eQKTYO7) | +| 100 | 100 | 95 | 90 | 95 | 90 | [eval](https://sharegpt.com/c/szKWCBt) | +| 90 | 95 | 96 | 98 | 93 | 92 | [eval](https://sharegpt.com/c/8ZhUcAv) | +| 90 | 95 | 92 | 89 | 93 | 94 | [eval](https://sharegpt.com/c/VQWdy99) | +| 100 | 95 | 100 | 98 | 96 | 99 | [eval](https://sharegpt.com/c/g1DHUSM) | +| 100 | 100 | 95 | 90 | 100 | 90 | [eval](https://sharegpt.com/c/uYgfJC3) | +| 90 | 85 | 88 | 92 | 87 | 91 | [eval](https://sharegpt.com/c/crk8BH3) | +| 95 | 97 | 90 | 92 | 93 | 94 | [eval](https://sharegpt.com/c/95F9afQ) | +| 90 | 95 | 85 | 88 | 92 | 89 | [eval](https://sharegpt.com/c/otioHUo) | +| 95 | 93 | 90 | 92 | 94 | 91 | [eval](https://sharegpt.com/c/KSiL9F6) | +| 90 | 95 | 85 | 80 | 88 | 82 | [eval](https://sharegpt.com/c/GmGq3b3) | +| 95 | 90 | 60 | 85 | 93 | 70 | [eval](https://sharegpt.com/c/VOhklyz) | +| 95 | 92 | 94 | 93 | 96 | 90 | [eval](https://sharegpt.com/c/wqy8m6k) | +| 95 | 90 | 85 | 93 | 87 | 92 | [eval](https://sharegpt.com/c/iWKrIuS) | +| 95 | 96 | 93 | 90 | 97 | 92 | [eval](https://sharegpt.com/c/o1h3w8N) | +| 100 | 0 | 0 | 100 | 0 | 0 | [eval](https://sharegpt.com/c/3UH9eed) | +| 60 | 100 | 0 | 80 | 0 | 0 | [eval](https://sharegpt.com/c/44g0FAh) | +| 0 | 100 | 60 | 0 | 0 | 90 | [eval](https://sharegpt.com/c/PaQlcrU) | +| 100 | 100 | 0 | 100 | 100 | 100 | [eval](https://sharegpt.com/c/51icV4o) | +| 100 | 100 | 100 | 100 | 95 | 100 | [eval](https://sharegpt.com/c/1VnbGAR) | +| 100 | 100 | 100 | 50 | 90 | 100 | [eval](https://sharegpt.com/c/EYGBrgw) | +| 100 | 100 | 100 | 100 | 95 | 90 | [eval](https://sharegpt.com/c/EGRduOt) | +| 100 | 100 | 100 | 95 | 0 | 100 | [eval](https://sharegpt.com/c/O3JJfnK) | +| 50 | 95 | 20 | 10 | 30 | 85 | [eval](https://sharegpt.com/c/2roVtAu) | +| 100 | 100 | 60 | 20 | 30 | 40 | [eval](https://sharegpt.com/c/sphFpfx) | +| 100 | 0 | 0 | 0 | 0 | 100 | [eval](https://sharegpt.com/c/OeWGKBo) | +| 0 | 100 | 60 | 0 | 0 | 80 | [eval](https://sharegpt.com/c/TOUsuFA) | +| 50 | 100 | 20 | 90 | 0 | 10 | [eval](https://sharegpt.com/c/Y3P6DCu) | +| 100 | 100 | 100 | 100 | 100 | 100 | [eval](https://sharegpt.com/c/hkbdeiM) | +| 100 | 100 | 100 | 100 | 100 | 100 | [eval](https://sharegpt.com/c/eubbaVC) | +| 40 | 100 | 95 | 0 | 100 | 40 | [eval](https://sharegpt.com/c/QWiF49v) | +| 100 | 100 | 100 | 100 | 80 | 100 | [eval](https://sharegpt.com/c/dKTapBu) | +| 100 | 100 | 100 | 0 | 90 | 40 | [eval](https://sharegpt.com/c/P8NGwFZ) | +| 0 | 100 | 100 | 50 | 70 | 20 | [eval](https://sharegpt.com/c/v96BtBL) | +| 100 | 100 | 50 | 90 | 0 | 95 | [eval](https://sharegpt.com/c/YRlzj1t) | +| 100 | 95 | 90 | 85 | 98 | 80 | [eval](https://sharegpt.com/c/76VX3eB) | +| 95 | 98 | 90 | 92 | 96 | 89 | [eval](https://sharegpt.com/c/JK1uNef) | +| 90 | 95 | 75 | 85 | 80 | 82 | [eval](https://sharegpt.com/c/ku6CKmx) | +| 95 | 98 | 50 | 92 | 96 | 94 | [eval](https://sharegpt.com/c/0iAFuKW) | +| 95 | 90 | 0 | 93 | 92 | 94 | [eval](https://sharegpt.com/c/6uGnKio) | +| 95 | 90 | 85 | 92 | 80 | 88 | [eval](https://sharegpt.com/c/lfpRBw8) | +| 95 | 93 | 75 | 85 | 90 | 92 | [eval](https://sharegpt.com/c/mKu70jb) | +| 90 | 95 | 88 | 85 | 92 | 89 | [eval](https://sharegpt.com/c/GkYzJHO) | +| 100 | 100 | 100 | 95 | 97 | 98 | [eval](https://sharegpt.com/c/mly2k0z) | +| 85 | 40 | 30 | 95 | 90 | 88 | [eval](https://sharegpt.com/c/5td2ob0) | +| 90 | 95 | 92 | 85 | 88 | 93 | [eval](https://sharegpt.com/c/0ISpWfy) | +| 95 | 96 | 92 | 90 | 89 | 93 | [eval](https://sharegpt.com/c/kdUDUn7) | +| 90 | 95 | 85 | 80 | 92 | 88 | [eval](https://sharegpt.com/c/fjMNYr2) | +| 95 | 98 | 65 | 90 | 85 | 93 | [eval](https://sharegpt.com/c/6xBIf2Q) | +| 95 | 92 | 96 | 97 | 90 | 89 | [eval](https://sharegpt.com/c/B9GY8Ln) | +| 95 | 90 | 92 | 91 | 89 | 93 | [eval](https://sharegpt.com/c/vn1FPU4) | +| 95 | 90 | 80 | 75 | 95 | 90 | [eval](https://sharegpt.com/c/YurEMYg) | +| 92 | 40 | 30 | 95 | 90 | 93 | [eval](https://sharegpt.com/c/D19Qeui) | +| 90 | 92 | 85 | 88 | 89 | 87 | [eval](https://sharegpt.com/c/5QRFfrt) | +| 95 | 80 | 90 | 92 | 91 | 88 | [eval](https://sharegpt.com/c/pYWPRi4) | +| 95 | 93 | 92 | 90 | 91 | 94 | [eval](https://sharegpt.com/c/wPRTntL) | +| 100 | 98 | 95 | 90 | 92 | 96 | [eval](https://sharegpt.com/c/F6PLYKE) | +| 95 | 92 | 80 | 85 | 90 | 93 | [eval](https://sharegpt.com/c/WeJnMGv) | +| 95 | 98 | 90 | 88 | 97 | 96 | [eval](https://sharegpt.com/c/zNKL49e) | +| 90 | 95 | 85 | 88 | 86 | 92 | [eval](https://sharegpt.com/c/kIKmA1b) | +| 100 | 100 | 100 | 100 | 100 | 100 | [eval](https://sharegpt.com/c/1btWd4O) | +| 90 | 95 | 85 | 96 | 92 | 88 | [eval](https://sharegpt.com/c/s9sf1Lp) | +| 100 | 98 | 95 | 99 | 97 | 96 | [eval](https://sharegpt.com/c/RWzv8py) | +| 95 | 92 | 70 | 90 | 93 | 89 | [eval](https://sharegpt.com/c/bYF7FqA) | +| 95 | 90 | 88 | 92 | 94 | 93 | [eval](https://sharegpt.com/c/SuUqjMj) | +| 95 | 90 | 93 | 92 | 85 | 94 | [eval](https://sharegpt.com/c/r0aRdYY) | +| 95 | 93 | 90 | 87 | 92 | 91 | [eval](https://sharegpt.com/c/VuMfkkd) | +| 95 | 93 | 90 | 96 | 92 | 91 | [eval](https://sharegpt.com/c/rhm6fa4) | +| 95 | 97 | 85 | 96 | 98 | 90 | [eval](https://sharegpt.com/c/DwXnyqG) | +| 95 | 92 | 90 | 85 | 93 | 94 | [eval](https://sharegpt.com/c/0ScdkGS) | +| 95 | 96 | 92 | 90 | 97 | 93 | [eval](https://sharegpt.com/c/6yIoCDU) | +| 95 | 93 | 96 | 94 | 90 | 92 | [eval](https://sharegpt.com/c/VubEvp9) | +| 95 | 94 | 93 | 92 | 90 | 89 | [eval](https://sharegpt.com/c/RHzmZWG) | +| 90 | 85 | 95 | 80 | 87 | 75 | [eval](https://sharegpt.com/c/IMiP9Zm) | +| 95 | 94 | 92 | 93 | 90 | 96 | [eval](https://sharegpt.com/c/bft4PIL) | +| 95 | 100 | 90 | 95 | 95 | 95 | [eval](https://sharegpt.com/c/iHXB34b) | +| 100 | 95 | 85 | 100 | 0 | 90 | [eval](https://sharegpt.com/c/vCGn9R7) | +| 100 | 95 | 90 | 95 | 100 | 95 | [eval](https://sharegpt.com/c/be8crZL) | +| 95 | 90 | 60 | 95 | 85 | 80 | [eval](https://sharegpt.com/c/33elmDz) | +| 100 | 95 | 90 | 98 | 97 | 99 | [eval](https://sharegpt.com/c/RWD3Zx7) | +| 95 | 90 | 85 | 95 | 80 | 92 | [eval](https://sharegpt.com/c/GiwBvM7) | +| 100 | 95 | 100 | 98 | 100 | 90 | [eval](https://sharegpt.com/c/hX2pYxk) | +| 100 | 95 | 80 | 85 | 90 | 85 | [eval](https://sharegpt.com/c/MfxdGd7) | +| 100 | 90 | 95 | 85 | 95 | 100 | [eval](https://sharegpt.com/c/28hQjmS) | +| 95 | 90 | 85 | 80 | 88 | 92 | [eval](https://sharegpt.com/c/fzy5EPe) | +| 100 | 100 | 0 | 0 | 100 | 0 | [eval](https://sharegpt.com/c/vwxPjbR) | +| 100 | 100 | 100 | 50 | 100 | 75 | [eval](https://sharegpt.com/c/FAYfFWy) | +| 100 | 100 | 0 | 0 | 100 | 0 | [eval](https://sharegpt.com/c/SoudGsQ) | +| 0 | 100 | 0 | 0 | 0 | 0 | [eval](https://sharegpt.com/c/mkwEgVn) | +| 100 | 100 | 50 | 0 | 0 | 0 | [eval](https://sharegpt.com/c/q8MQEsz) | +| 100 | 100 | 100 | 100 | 100 | 95 | [eval](https://sharegpt.com/c/tzHpsKh) | +| 100 | 100 | 50 | 0 | 0 | 0 | [eval](https://sharegpt.com/c/3ugYBtJ) | +| 100 | 100 | 0 | 0 | 100 | 0 | [eval](https://sharegpt.com/c/I6KfOJT) | +| 90 | 85 | 80 | 95 | 70 | 75 | [eval](https://sharegpt.com/c/enaV1CK) | +| 100 | 100 | 0 | 0 | 0 | 0 | [eval](https://sharegpt.com/c/JBk7oSh) | + +
+ + +### Training data + +This was an experiment to see if a "jailbreak" prompt could be used to generate a broader range of data that would otherwise have been filtered by OpenAI's alignment efforts. + +The jailbreak did indeed work with a high success rate, and caused OpenAI to generate a broader range of topics and fewer refusals to answer questions/instructions of sensitive topics. + +### Prompt format + +The prompt should be 1:1 compatible with the FastChat/vicuna format, e.g.: + +With a system prompt: +``` +A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: [prompt] ASSISTANT: +``` + +Or without a system prompt: +``` +USER: [prompt] ASSISTANT: +``` + +### Usage and License Notices + +The model and dataset are intended and licensed for research use only. I've used the 'cc-nc-4.0' license, but really it is subject to a custom/special license because: + +- the base model is LLaMa, which has it's own special research license +- the dataset(s) were generated with OpenAI (gpt-4 and/or gpt-3.5-turbo), which has a clausing saying the data can't be used to create models to compete with openai + +So, to reiterate: this model (and datasets) cannot be used commercially. \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..390580b --- /dev/null +++ b/config.json @@ -0,0 +1,24 @@ +{ + "_name_or_path": "/workspace/llama-13b", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 2048, + "max_sequence_length": 2048, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.28.1", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..1372199 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.28.1" +} diff --git a/meta-chart.png b/meta-chart.png new file mode 100644 index 0000000..0ceb3c7 Binary files /dev/null and b/meta-chart.png differ diff --git a/pytorch_model-00001-of-00006.bin b/pytorch_model-00001-of-00006.bin new file mode 100644 index 0000000..dad0189 --- /dev/null +++ b/pytorch_model-00001-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54aae549953530da18657d292a4346ba773083f2bcb70cbbf0e721d1dd1beda +size 9956543883 diff --git a/pytorch_model-00002-of-00006.bin b/pytorch_model-00002-of-00006.bin new file mode 100644 index 0000000..f02074f --- /dev/null +++ b/pytorch_model-00002-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25a2bb1593cbfec2f5a56e6988c091e3e2562f968be472a91db49377140ebc2 +size 9940856385 diff --git a/pytorch_model-00003-of-00006.bin b/pytorch_model-00003-of-00006.bin new file mode 100644 index 0000000..5f4bfc0 --- /dev/null +++ b/pytorch_model-00003-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1251c97ec47a92e1f343c23a2b502a2b77a8a159fbb9b71faa918cc718edec4e +size 9940856943 diff --git a/pytorch_model-00004-of-00006.bin b/pytorch_model-00004-of-00006.bin new file mode 100644 index 0000000..4336788 --- /dev/null +++ b/pytorch_model-00004-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88b3720ef1bcd1750f5a9c013a740bcf3e8b56f8e1664ca3dc82dc265302ac4 +size 9867415289 diff --git a/pytorch_model-00005-of-00006.bin b/pytorch_model-00005-of-00006.bin new file mode 100644 index 0000000..e246360 --- /dev/null +++ b/pytorch_model-00005-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867b5ede76b1565c71aa1fb5e8b20194cb6d876f9895f72953a72eb7e0ea323b +size 9867456961 diff --git a/pytorch_model-00006-of-00006.bin b/pytorch_model-00006-of-00006.bin new file mode 100644 index 0000000..7ba6a65 --- /dev/null +++ b/pytorch_model-00006-of-00006.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca30e4e4f63316dfc1a214c984e362a688d0bcac67574149dbbf54999da26275 +size 2490476207 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..fd9a672 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e364344dcdf113d834a0ed9a0d36319e3d72fdba0767fded590866697df7978 +size 33444 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..f928b24 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..5ab645d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,34 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "model_max_length": 2048, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..3647d88 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,2335 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.988235294117647, + "global_step": 381, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.25e-06, + "loss": 0.9385, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 2.5e-06, + "loss": 0.9588, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.8227, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 5e-06, + "loss": 0.7846, + "step": 4 + }, + { + "epoch": 0.04, + "learning_rate": 6.25e-06, + "loss": 0.7867, + "step": 5 + }, + { + "epoch": 0.05, + "learning_rate": 7.500000000000001e-06, + "loss": 0.7457, + "step": 6 + }, + { + "epoch": 0.05, + "learning_rate": 8.750000000000001e-06, + "loss": 0.7486, + "step": 7 + }, + { + "epoch": 0.06, + "learning_rate": 1e-05, + "loss": 0.7079, + "step": 8 + }, + { + "epoch": 0.07, + "learning_rate": 1.125e-05, + "loss": 0.6741, + "step": 9 + }, + { + "epoch": 0.08, + "learning_rate": 1.25e-05, + "loss": 0.6693, + "step": 10 + }, + { + "epoch": 0.09, + "learning_rate": 1.375e-05, + "loss": 0.6629, + "step": 11 + }, + { + "epoch": 0.09, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.647, + "step": 12 + }, + { + "epoch": 0.1, + "learning_rate": 1.6250000000000002e-05, + "loss": 0.6316, + "step": 13 + }, + { + "epoch": 0.11, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.6233, + "step": 14 + }, + { + "epoch": 0.12, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.6264, + "step": 15 + }, + { + "epoch": 0.13, + "learning_rate": 2e-05, + "loss": 0.617, + "step": 16 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999629591162658e-05, + "loss": 0.6023, + "step": 17 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998518392091163e-05, + "loss": 0.5996, + "step": 18 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996666485105115e-05, + "loss": 0.5939, + "step": 19 + }, + { + "epoch": 0.16, + "learning_rate": 1.999407400739705e-05, + "loss": 0.5945, + "step": 20 + }, + { + "epoch": 0.16, + "learning_rate": 1.9990741151022302e-05, + "loss": 0.5871, + "step": 21 + }, + { + "epoch": 0.17, + "learning_rate": 1.9986668162884763e-05, + "loss": 0.5918, + "step": 22 + }, + { + "epoch": 0.18, + "learning_rate": 1.9981855344718587e-05, + "loss": 0.5829, + "step": 23 + }, + { + "epoch": 0.19, + "learning_rate": 1.997630305306586e-05, + "loss": 0.5715, + "step": 24 + }, + { + "epoch": 0.2, + "learning_rate": 1.997001169925015e-05, + "loss": 0.5776, + "step": 25 + }, + { + "epoch": 0.2, + "learning_rate": 1.996298174934608e-05, + "loss": 0.5663, + "step": 26 + }, + { + "epoch": 0.21, + "learning_rate": 1.9955213724144754e-05, + "loss": 0.5655, + "step": 27 + }, + { + "epoch": 0.22, + "learning_rate": 1.994670819911521e-05, + "loss": 0.5593, + "step": 28 + }, + { + "epoch": 0.23, + "learning_rate": 1.9937465804361783e-05, + "loss": 0.5597, + "step": 29 + }, + { + "epoch": 0.24, + "learning_rate": 1.9927487224577402e-05, + "loss": 0.5573, + "step": 30 + }, + { + "epoch": 0.24, + "learning_rate": 1.99167731989929e-05, + "loss": 0.5485, + "step": 31 + }, + { + "epoch": 0.25, + "learning_rate": 1.990532452132223e-05, + "loss": 0.5541, + "step": 32 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893142039703662e-05, + "loss": 0.5403, + "step": 33 + }, + { + "epoch": 0.27, + "learning_rate": 1.9880226656636977e-05, + "loss": 0.5484, + "step": 34 + }, + { + "epoch": 0.27, + "learning_rate": 1.986657932891657e-05, + "loss": 0.5448, + "step": 35 + }, + { + "epoch": 0.28, + "learning_rate": 1.9852201067560607e-05, + "loss": 0.5421, + "step": 36 + }, + { + "epoch": 0.29, + "learning_rate": 1.98370929377361e-05, + "loss": 0.542, + "step": 37 + }, + { + "epoch": 0.3, + "learning_rate": 1.982125605868001e-05, + "loss": 0.5368, + "step": 38 + }, + { + "epoch": 0.31, + "learning_rate": 1.9804691603616324e-05, + "loss": 0.5394, + "step": 39 + }, + { + "epoch": 0.31, + "learning_rate": 1.9787400799669155e-05, + "loss": 0.5217, + "step": 40 + }, + { + "epoch": 0.32, + "learning_rate": 1.976938492777182e-05, + "loss": 0.5323, + "step": 41 + }, + { + "epoch": 0.33, + "learning_rate": 1.9750645322571952e-05, + "loss": 0.534, + "step": 42 + }, + { + "epoch": 0.34, + "learning_rate": 1.973118337233262e-05, + "loss": 0.5273, + "step": 43 + }, + { + "epoch": 0.35, + "learning_rate": 1.9711000518829505e-05, + "loss": 0.5182, + "step": 44 + }, + { + "epoch": 0.35, + "learning_rate": 1.9690098257244063e-05, + "loss": 0.5278, + "step": 45 + }, + { + "epoch": 0.36, + "learning_rate": 1.9668478136052776e-05, + "loss": 0.5209, + "step": 46 + }, + { + "epoch": 0.37, + "learning_rate": 1.9646141756912437e-05, + "loss": 0.5156, + "step": 47 + }, + { + "epoch": 0.38, + "learning_rate": 1.962309077454149e-05, + "loss": 0.5252, + "step": 48 + }, + { + "epoch": 0.38, + "learning_rate": 1.9599326896597448e-05, + "loss": 0.5261, + "step": 49 + }, + { + "epoch": 0.39, + "learning_rate": 1.9574851883550395e-05, + "loss": 0.5154, + "step": 50 + }, + { + "epoch": 0.4, + "learning_rate": 1.9549667548552557e-05, + "loss": 0.5034, + "step": 51 + }, + { + "epoch": 0.41, + "learning_rate": 1.9523775757303975e-05, + "loss": 0.4981, + "step": 52 + }, + { + "epoch": 0.42, + "learning_rate": 1.949717842791432e-05, + "loss": 0.5078, + "step": 53 + }, + { + "epoch": 0.42, + "learning_rate": 1.9469877530760753e-05, + "loss": 0.5134, + "step": 54 + }, + { + "epoch": 0.43, + "learning_rate": 1.9441875088342e-05, + "loss": 0.5017, + "step": 55 + }, + { + "epoch": 0.44, + "learning_rate": 1.9413173175128472e-05, + "loss": 0.4926, + "step": 56 + }, + { + "epoch": 0.45, + "learning_rate": 1.9383773917408644e-05, + "loss": 0.5033, + "step": 57 + }, + { + "epoch": 0.45, + "learning_rate": 1.9353679493131486e-05, + "loss": 0.4994, + "step": 58 + }, + { + "epoch": 0.46, + "learning_rate": 1.9322892131745135e-05, + "loss": 0.4944, + "step": 59 + }, + { + "epoch": 0.47, + "learning_rate": 1.9291414114031744e-05, + "loss": 0.4844, + "step": 60 + }, + { + "epoch": 0.48, + "learning_rate": 1.92592477719385e-05, + "loss": 0.4975, + "step": 61 + }, + { + "epoch": 0.49, + "learning_rate": 1.9226395488404875e-05, + "loss": 0.4911, + "step": 62 + }, + { + "epoch": 0.49, + "learning_rate": 1.9192859697186105e-05, + "loss": 0.4829, + "step": 63 + }, + { + "epoch": 0.5, + "learning_rate": 1.9158642882672873e-05, + "loss": 0.4931, + "step": 64 + }, + { + "epoch": 0.51, + "learning_rate": 1.9123747579707275e-05, + "loss": 0.4882, + "step": 65 + }, + { + "epoch": 0.52, + "learning_rate": 1.908817637339503e-05, + "loss": 0.4851, + "step": 66 + }, + { + "epoch": 0.53, + "learning_rate": 1.9051931898913977e-05, + "loss": 0.4859, + "step": 67 + }, + { + "epoch": 0.53, + "learning_rate": 1.9015016841318843e-05, + "loss": 0.4737, + "step": 68 + }, + { + "epoch": 0.54, + "learning_rate": 1.8977433935342338e-05, + "loss": 0.4823, + "step": 69 + }, + { + "epoch": 0.55, + "learning_rate": 1.8939185965192572e-05, + "loss": 0.4893, + "step": 70 + }, + { + "epoch": 0.56, + "learning_rate": 1.890027576434677e-05, + "loss": 0.4842, + "step": 71 + }, + { + "epoch": 0.56, + "learning_rate": 1.8860706215341383e-05, + "loss": 0.4703, + "step": 72 + }, + { + "epoch": 0.57, + "learning_rate": 1.8820480249558538e-05, + "loss": 0.4813, + "step": 73 + }, + { + "epoch": 0.58, + "learning_rate": 1.8779600847008884e-05, + "loss": 0.4773, + "step": 74 + }, + { + "epoch": 0.59, + "learning_rate": 1.873807103611081e-05, + "loss": 0.4703, + "step": 75 + }, + { + "epoch": 0.6, + "learning_rate": 1.869589389346611e-05, + "loss": 0.4689, + "step": 76 + }, + { + "epoch": 0.6, + "learning_rate": 1.8653072543632064e-05, + "loss": 0.4772, + "step": 77 + }, + { + "epoch": 0.61, + "learning_rate": 1.8609610158889943e-05, + "loss": 0.4702, + "step": 78 + }, + { + "epoch": 0.62, + "learning_rate": 1.8565509959010037e-05, + "loss": 0.4564, + "step": 79 + }, + { + "epoch": 0.63, + "learning_rate": 1.8520775211013094e-05, + "loss": 0.4746, + "step": 80 + }, + { + "epoch": 0.64, + "learning_rate": 1.8475409228928314e-05, + "loss": 0.464, + "step": 81 + }, + { + "epoch": 0.64, + "learning_rate": 1.842941537354783e-05, + "loss": 0.4626, + "step": 82 + }, + { + "epoch": 0.65, + "learning_rate": 1.8382797052177746e-05, + "loss": 0.4579, + "step": 83 + }, + { + "epoch": 0.66, + "learning_rate": 1.8335557718385702e-05, + "loss": 0.4724, + "step": 84 + }, + { + "epoch": 0.67, + "learning_rate": 1.8287700871745036e-05, + "loss": 0.461, + "step": 85 + }, + { + "epoch": 0.67, + "learning_rate": 1.8239230057575542e-05, + "loss": 0.4595, + "step": 86 + }, + { + "epoch": 0.68, + "learning_rate": 1.81901488666808e-05, + "loss": 0.4603, + "step": 87 + }, + { + "epoch": 0.69, + "learning_rate": 1.814046093508218e-05, + "loss": 0.4529, + "step": 88 + }, + { + "epoch": 0.7, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.4513, + "step": 89 + }, + { + "epoch": 0.71, + "learning_rate": 1.8039279618328215e-05, + "loss": 0.4514, + "step": 90 + }, + { + "epoch": 0.71, + "learning_rate": 1.798779372886365e-05, + "loss": 0.4554, + "step": 91 + }, + { + "epoch": 0.72, + "learning_rate": 1.7935716089521474e-05, + "loss": 0.4661, + "step": 92 + }, + { + "epoch": 0.73, + "learning_rate": 1.7883050558305255e-05, + "loss": 0.4395, + "step": 93 + }, + { + "epoch": 0.74, + "learning_rate": 1.7829801036770628e-05, + "loss": 0.455, + "step": 94 + }, + { + "epoch": 0.75, + "learning_rate": 1.777597146973627e-05, + "loss": 0.4512, + "step": 95 + }, + { + "epoch": 0.75, + "learning_rate": 1.7721565844991643e-05, + "loss": 0.4639, + "step": 96 + }, + { + "epoch": 0.76, + "learning_rate": 1.7666588193001595e-05, + "loss": 0.4463, + "step": 97 + }, + { + "epoch": 0.77, + "learning_rate": 1.7611042586607748e-05, + "loss": 0.451, + "step": 98 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554933140726803e-05, + "loss": 0.4411, + "step": 99 + }, + { + "epoch": 0.78, + "learning_rate": 1.7498264012045686e-05, + "loss": 0.4483, + "step": 100 + }, + { + "epoch": 0.78, + "eval_loss": 0.4502714276313782, + "eval_runtime": 173.0431, + "eval_samples_per_second": 11.546, + "eval_steps_per_second": 0.064, + "step": 100 + }, + { + "epoch": 0.79, + "learning_rate": 1.744103939871361e-05, + "loss": 0.453, + "step": 101 + }, + { + "epoch": 0.8, + "learning_rate": 1.738326354003107e-05, + "loss": 0.4494, + "step": 102 + }, + { + "epoch": 0.81, + "learning_rate": 1.732494071613579e-05, + "loss": 0.4485, + "step": 103 + }, + { + "epoch": 0.82, + "learning_rate": 1.7266075247685656e-05, + "loss": 0.4333, + "step": 104 + }, + { + "epoch": 0.82, + "learning_rate": 1.720667149553861e-05, + "loss": 0.4324, + "step": 105 + }, + { + "epoch": 0.83, + "learning_rate": 1.7146733860429614e-05, + "loss": 0.442, + "step": 106 + }, + { + "epoch": 0.84, + "learning_rate": 1.70862667826446e-05, + "loss": 0.4334, + "step": 107 + }, + { + "epoch": 0.85, + "learning_rate": 1.702527474169157e-05, + "loss": 0.4356, + "step": 108 + }, + { + "epoch": 0.85, + "learning_rate": 1.6963762255968723e-05, + "loss": 0.4404, + "step": 109 + }, + { + "epoch": 0.86, + "learning_rate": 1.690173388242972e-05, + "loss": 0.4221, + "step": 110 + }, + { + "epoch": 0.87, + "learning_rate": 1.683919421624611e-05, + "loss": 0.4175, + "step": 111 + }, + { + "epoch": 0.88, + "learning_rate": 1.677614789046689e-05, + "loss": 0.4317, + "step": 112 + }, + { + "epoch": 0.89, + "learning_rate": 1.6712599575675318e-05, + "loss": 0.43, + "step": 113 + }, + { + "epoch": 0.89, + "learning_rate": 1.6648553979642867e-05, + "loss": 0.4288, + "step": 114 + }, + { + "epoch": 0.9, + "learning_rate": 1.658401584698049e-05, + "loss": 0.4369, + "step": 115 + }, + { + "epoch": 0.91, + "learning_rate": 1.6518989958787126e-05, + "loss": 0.4262, + "step": 116 + }, + { + "epoch": 0.92, + "learning_rate": 1.6453481132295507e-05, + "loss": 0.4361, + "step": 117 + }, + { + "epoch": 0.93, + "learning_rate": 1.6387494220515276e-05, + "loss": 0.4326, + "step": 118 + }, + { + "epoch": 0.93, + "learning_rate": 1.6321034111873487e-05, + "loss": 0.4189, + "step": 119 + }, + { + "epoch": 0.94, + "learning_rate": 1.6254105729852466e-05, + "loss": 0.425, + "step": 120 + }, + { + "epoch": 0.95, + "learning_rate": 1.6186714032625036e-05, + "loss": 0.4279, + "step": 121 + }, + { + "epoch": 0.96, + "learning_rate": 1.6118864012687246e-05, + "loss": 0.4239, + "step": 122 + }, + { + "epoch": 0.96, + "learning_rate": 1.6050560696488493e-05, + "loss": 0.4196, + "step": 123 + }, + { + "epoch": 0.97, + "learning_rate": 1.5981809144059167e-05, + "loss": 0.4206, + "step": 124 + }, + { + "epoch": 0.98, + "learning_rate": 1.5912614448635784e-05, + "loss": 0.4293, + "step": 125 + }, + { + "epoch": 0.99, + "learning_rate": 1.5842981736283686e-05, + "loss": 0.4258, + "step": 126 + }, + { + "epoch": 1.0, + "learning_rate": 1.5772916165517275e-05, + "loss": 0.422, + "step": 127 + }, + { + "epoch": 1.0, + "learning_rate": 1.5702422926917872e-05, + "loss": 0.4023, + "step": 128 + }, + { + "epoch": 1.01, + "learning_rate": 1.5631507242749187e-05, + "loss": 0.3869, + "step": 129 + }, + { + "epoch": 1.02, + "learning_rate": 1.5560174366570448e-05, + "loss": 0.4038, + "step": 130 + }, + { + "epoch": 1.03, + "learning_rate": 1.5488429582847194e-05, + "loss": 0.3991, + "step": 131 + }, + { + "epoch": 1.04, + "learning_rate": 1.5416278206559816e-05, + "loss": 0.402, + "step": 132 + }, + { + "epoch": 1.04, + "learning_rate": 1.5343725582809793e-05, + "loss": 0.3949, + "step": 133 + }, + { + "epoch": 1.05, + "learning_rate": 1.5270777086423724e-05, + "loss": 0.3973, + "step": 134 + }, + { + "epoch": 1.06, + "learning_rate": 1.5197438121555159e-05, + "loss": 0.389, + "step": 135 + }, + { + "epoch": 1.07, + "learning_rate": 1.512371412128424e-05, + "loss": 0.4054, + "step": 136 + }, + { + "epoch": 1.07, + "learning_rate": 1.5049610547215205e-05, + "loss": 0.4026, + "step": 137 + }, + { + "epoch": 1.08, + "learning_rate": 1.4975132889071808e-05, + "loss": 0.3877, + "step": 138 + }, + { + "epoch": 1.09, + "learning_rate": 1.4900286664290593e-05, + "loss": 0.3954, + "step": 139 + }, + { + "epoch": 1.1, + "learning_rate": 1.4825077417612187e-05, + "loss": 0.3879, + "step": 140 + }, + { + "epoch": 1.11, + "learning_rate": 1.4749510720670506e-05, + "loss": 0.3897, + "step": 141 + }, + { + "epoch": 1.11, + "learning_rate": 1.4673592171580025e-05, + "loss": 0.3914, + "step": 142 + }, + { + "epoch": 1.12, + "learning_rate": 1.4597327394521044e-05, + "loss": 0.4045, + "step": 143 + }, + { + "epoch": 1.13, + "learning_rate": 1.4520722039323045e-05, + "loss": 0.3794, + "step": 144 + }, + { + "epoch": 1.14, + "learning_rate": 1.4443781781046135e-05, + "loss": 0.3857, + "step": 145 + }, + { + "epoch": 1.15, + "learning_rate": 1.4366512319560642e-05, + "loss": 0.39, + "step": 146 + }, + { + "epoch": 1.15, + "learning_rate": 1.4288919379124837e-05, + "loss": 0.3761, + "step": 147 + }, + { + "epoch": 1.16, + "learning_rate": 1.4211008707960897e-05, + "loss": 0.3761, + "step": 148 + }, + { + "epoch": 1.17, + "learning_rate": 1.4132786077829044e-05, + "loss": 0.3796, + "step": 149 + }, + { + "epoch": 1.18, + "learning_rate": 1.4054257283599974e-05, + "loss": 0.3833, + "step": 150 + }, + { + "epoch": 1.18, + "learning_rate": 1.3975428142825562e-05, + "loss": 0.3831, + "step": 151 + }, + { + "epoch": 1.19, + "learning_rate": 1.3896304495307881e-05, + "loss": 0.3806, + "step": 152 + }, + { + "epoch": 1.2, + "learning_rate": 1.3816892202666591e-05, + "loss": 0.3761, + "step": 153 + }, + { + "epoch": 1.21, + "learning_rate": 1.373719714790469e-05, + "loss": 0.377, + "step": 154 + }, + { + "epoch": 1.22, + "learning_rate": 1.3657225234972695e-05, + "loss": 0.3749, + "step": 155 + }, + { + "epoch": 1.22, + "learning_rate": 1.3576982388331258e-05, + "loss": 0.3817, + "step": 156 + }, + { + "epoch": 1.23, + "learning_rate": 1.3496474552512286e-05, + "loss": 0.3797, + "step": 157 + }, + { + "epoch": 1.24, + "learning_rate": 1.3415707691678557e-05, + "loss": 0.3795, + "step": 158 + }, + { + "epoch": 1.25, + "learning_rate": 1.333468778918187e-05, + "loss": 0.3825, + "step": 159 + }, + { + "epoch": 1.25, + "learning_rate": 1.3253420847119804e-05, + "loss": 0.3745, + "step": 160 + }, + { + "epoch": 1.26, + "learning_rate": 1.3171912885891063e-05, + "loss": 0.3838, + "step": 161 + }, + { + "epoch": 1.27, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.3807, + "step": 162 + }, + { + "epoch": 1.28, + "learning_rate": 1.3008198076356678e-05, + "loss": 0.3673, + "step": 163 + }, + { + "epoch": 1.29, + "learning_rate": 1.2926003356333487e-05, + "loss": 0.3663, + "step": 164 + }, + { + "epoch": 1.29, + "learning_rate": 1.2843591872810039e-05, + "loss": 0.3625, + "step": 165 + }, + { + "epoch": 1.3, + "learning_rate": 1.2760969730974692e-05, + "loss": 0.3774, + "step": 166 + }, + { + "epoch": 1.31, + "learning_rate": 1.2678143051621743e-05, + "loss": 0.3662, + "step": 167 + }, + { + "epoch": 1.32, + "learning_rate": 1.2595117970697998e-05, + "loss": 0.3597, + "step": 168 + }, + { + "epoch": 1.33, + "learning_rate": 1.2511900638848196e-05, + "loss": 0.3736, + "step": 169 + }, + { + "epoch": 1.33, + "learning_rate": 1.2428497220959359e-05, + "loss": 0.3515, + "step": 170 + }, + { + "epoch": 1.34, + "learning_rate": 1.2344913895704099e-05, + "loss": 0.3729, + "step": 171 + }, + { + "epoch": 1.35, + "learning_rate": 1.2261156855082882e-05, + "loss": 0.3582, + "step": 172 + }, + { + "epoch": 1.36, + "learning_rate": 1.217723230396532e-05, + "loss": 0.3607, + "step": 173 + }, + { + "epoch": 1.36, + "learning_rate": 1.2093146459630488e-05, + "loss": 0.3631, + "step": 174 + }, + { + "epoch": 1.37, + "learning_rate": 1.2008905551306356e-05, + "loss": 0.3594, + "step": 175 + }, + { + "epoch": 1.38, + "learning_rate": 1.19245158197083e-05, + "loss": 0.3595, + "step": 176 + }, + { + "epoch": 1.39, + "learning_rate": 1.1839983516576802e-05, + "loss": 0.3513, + "step": 177 + }, + { + "epoch": 1.4, + "learning_rate": 1.1755314904214284e-05, + "loss": 0.3519, + "step": 178 + }, + { + "epoch": 1.4, + "learning_rate": 1.1670516255021193e-05, + "loss": 0.3613, + "step": 179 + }, + { + "epoch": 1.41, + "learning_rate": 1.1585593851031346e-05, + "loss": 0.3538, + "step": 180 + }, + { + "epoch": 1.42, + "learning_rate": 1.1500553983446527e-05, + "loss": 0.348, + "step": 181 + }, + { + "epoch": 1.43, + "learning_rate": 1.1415402952170434e-05, + "loss": 0.3465, + "step": 182 + }, + { + "epoch": 1.44, + "learning_rate": 1.1330147065341962e-05, + "loss": 0.3646, + "step": 183 + }, + { + "epoch": 1.44, + "learning_rate": 1.1244792638867895e-05, + "loss": 0.3539, + "step": 184 + }, + { + "epoch": 1.45, + "learning_rate": 1.1159345995955007e-05, + "loss": 0.3463, + "step": 185 + }, + { + "epoch": 1.46, + "learning_rate": 1.1073813466641633e-05, + "loss": 0.3653, + "step": 186 + }, + { + "epoch": 1.47, + "learning_rate": 1.0988201387328716e-05, + "loss": 0.3669, + "step": 187 + }, + { + "epoch": 1.47, + "learning_rate": 1.0902516100310412e-05, + "loss": 0.3471, + "step": 188 + }, + { + "epoch": 1.48, + "learning_rate": 1.0816763953304228e-05, + "loss": 0.3513, + "step": 189 + }, + { + "epoch": 1.49, + "learning_rate": 1.0730951298980776e-05, + "loss": 0.3679, + "step": 190 + }, + { + "epoch": 1.5, + "learning_rate": 1.0645084494493166e-05, + "loss": 0.3435, + "step": 191 + }, + { + "epoch": 1.51, + "learning_rate": 1.0559169901006035e-05, + "loss": 0.3441, + "step": 192 + }, + { + "epoch": 1.51, + "learning_rate": 1.0473213883224321e-05, + "loss": 0.3436, + "step": 193 + }, + { + "epoch": 1.52, + "learning_rate": 1.0387222808921746e-05, + "loss": 0.3503, + "step": 194 + }, + { + "epoch": 1.53, + "learning_rate": 1.0301203048469084e-05, + "loss": 0.3479, + "step": 195 + }, + { + "epoch": 1.54, + "learning_rate": 1.0215160974362224e-05, + "loss": 0.3321, + "step": 196 + }, + { + "epoch": 1.55, + "learning_rate": 1.0129102960750092e-05, + "loss": 0.3433, + "step": 197 + }, + { + "epoch": 1.55, + "learning_rate": 1.0043035382962443e-05, + "loss": 0.3505, + "step": 198 + }, + { + "epoch": 1.56, + "learning_rate": 9.956964617037559e-06, + "loss": 0.3436, + "step": 199 + }, + { + "epoch": 1.57, + "learning_rate": 9.87089703924991e-06, + "loss": 0.3421, + "step": 200 + }, + { + "epoch": 1.57, + "eval_loss": 0.3613594174385071, + "eval_runtime": 172.6903, + "eval_samples_per_second": 11.57, + "eval_steps_per_second": 0.064, + "step": 200 + }, + { + "epoch": 1.58, + "learning_rate": 9.78483902563778e-06, + "loss": 0.3399, + "step": 201 + }, + { + "epoch": 1.58, + "learning_rate": 9.69879695153092e-06, + "loss": 0.335, + "step": 202 + }, + { + "epoch": 1.59, + "learning_rate": 9.612777191078257e-06, + "loss": 0.3449, + "step": 203 + }, + { + "epoch": 1.6, + "learning_rate": 9.526786116775682e-06, + "loss": 0.3385, + "step": 204 + }, + { + "epoch": 1.61, + "learning_rate": 9.440830098993969e-06, + "loss": 0.3503, + "step": 205 + }, + { + "epoch": 1.62, + "learning_rate": 9.354915505506839e-06, + "loss": 0.3442, + "step": 206 + }, + { + "epoch": 1.62, + "learning_rate": 9.269048701019226e-06, + "loss": 0.3364, + "step": 207 + }, + { + "epoch": 1.63, + "learning_rate": 9.183236046695777e-06, + "loss": 0.3349, + "step": 208 + }, + { + "epoch": 1.64, + "learning_rate": 9.09748389968959e-06, + "loss": 0.3384, + "step": 209 + }, + { + "epoch": 1.65, + "learning_rate": 9.011798612671286e-06, + "loss": 0.3438, + "step": 210 + }, + { + "epoch": 1.65, + "learning_rate": 8.92618653335837e-06, + "loss": 0.3363, + "step": 211 + }, + { + "epoch": 1.66, + "learning_rate": 8.840654004044996e-06, + "loss": 0.3344, + "step": 212 + }, + { + "epoch": 1.67, + "learning_rate": 8.755207361132109e-06, + "loss": 0.3265, + "step": 213 + }, + { + "epoch": 1.68, + "learning_rate": 8.669852934658042e-06, + "loss": 0.3449, + "step": 214 + }, + { + "epoch": 1.69, + "learning_rate": 8.58459704782957e-06, + "loss": 0.3308, + "step": 215 + }, + { + "epoch": 1.69, + "learning_rate": 8.499446016553475e-06, + "loss": 0.337, + "step": 216 + }, + { + "epoch": 1.7, + "learning_rate": 8.414406148968657e-06, + "loss": 0.3272, + "step": 217 + }, + { + "epoch": 1.71, + "learning_rate": 8.32948374497881e-06, + "loss": 0.3278, + "step": 218 + }, + { + "epoch": 1.72, + "learning_rate": 8.24468509578572e-06, + "loss": 0.3415, + "step": 219 + }, + { + "epoch": 1.73, + "learning_rate": 8.1600164834232e-06, + "loss": 0.3221, + "step": 220 + }, + { + "epoch": 1.73, + "learning_rate": 8.075484180291702e-06, + "loss": 0.3195, + "step": 221 + }, + { + "epoch": 1.74, + "learning_rate": 7.991094448693648e-06, + "loss": 0.324, + "step": 222 + }, + { + "epoch": 1.75, + "learning_rate": 7.906853540369514e-06, + "loss": 0.3293, + "step": 223 + }, + { + "epoch": 1.76, + "learning_rate": 7.822767696034683e-06, + "loss": 0.3268, + "step": 224 + }, + { + "epoch": 1.76, + "learning_rate": 7.738843144917119e-06, + "loss": 0.3254, + "step": 225 + }, + { + "epoch": 1.77, + "learning_rate": 7.655086104295904e-06, + "loss": 0.3395, + "step": 226 + }, + { + "epoch": 1.78, + "learning_rate": 7.571502779040646e-06, + "loss": 0.3177, + "step": 227 + }, + { + "epoch": 1.79, + "learning_rate": 7.4880993611518095e-06, + "loss": 0.3227, + "step": 228 + }, + { + "epoch": 1.8, + "learning_rate": 7.404882029302003e-06, + "loss": 0.3237, + "step": 229 + }, + { + "epoch": 1.8, + "learning_rate": 7.321856948378259e-06, + "loss": 0.3121, + "step": 230 + }, + { + "epoch": 1.81, + "learning_rate": 7.239030269025311e-06, + "loss": 0.3341, + "step": 231 + }, + { + "epoch": 1.82, + "learning_rate": 7.156408127189964e-06, + "loss": 0.3199, + "step": 232 + }, + { + "epoch": 1.83, + "learning_rate": 7.073996643666516e-06, + "loss": 0.3185, + "step": 233 + }, + { + "epoch": 1.84, + "learning_rate": 6.991801923643324e-06, + "loss": 0.3198, + "step": 234 + }, + { + "epoch": 1.84, + "learning_rate": 6.909830056250527e-06, + "loss": 0.319, + "step": 235 + }, + { + "epoch": 1.85, + "learning_rate": 6.8280871141089415e-06, + "loss": 0.3154, + "step": 236 + }, + { + "epoch": 1.86, + "learning_rate": 6.746579152880201e-06, + "loss": 0.3231, + "step": 237 + }, + { + "epoch": 1.87, + "learning_rate": 6.665312210818133e-06, + "loss": 0.3192, + "step": 238 + }, + { + "epoch": 1.87, + "learning_rate": 6.584292308321445e-06, + "loss": 0.33, + "step": 239 + }, + { + "epoch": 1.88, + "learning_rate": 6.503525447487717e-06, + "loss": 0.3121, + "step": 240 + }, + { + "epoch": 1.89, + "learning_rate": 6.423017611668745e-06, + "loss": 0.3103, + "step": 241 + }, + { + "epoch": 1.9, + "learning_rate": 6.342774765027309e-06, + "loss": 0.3078, + "step": 242 + }, + { + "epoch": 1.91, + "learning_rate": 6.262802852095311e-06, + "loss": 0.3078, + "step": 243 + }, + { + "epoch": 1.91, + "learning_rate": 6.183107797333411e-06, + "loss": 0.3262, + "step": 244 + }, + { + "epoch": 1.92, + "learning_rate": 6.103695504692122e-06, + "loss": 0.3225, + "step": 245 + }, + { + "epoch": 1.93, + "learning_rate": 6.024571857174443e-06, + "loss": 0.3076, + "step": 246 + }, + { + "epoch": 1.94, + "learning_rate": 5.94574271640003e-06, + "loss": 0.3214, + "step": 247 + }, + { + "epoch": 1.95, + "learning_rate": 5.867213922170958e-06, + "loss": 0.3161, + "step": 248 + }, + { + "epoch": 1.95, + "learning_rate": 5.788991292039103e-06, + "loss": 0.3081, + "step": 249 + }, + { + "epoch": 1.96, + "learning_rate": 5.711080620875165e-06, + "loss": 0.3137, + "step": 250 + }, + { + "epoch": 1.97, + "learning_rate": 5.633487680439362e-06, + "loss": 0.3104, + "step": 251 + }, + { + "epoch": 1.98, + "learning_rate": 5.5562182189538684e-06, + "loss": 0.3129, + "step": 252 + }, + { + "epoch": 1.98, + "learning_rate": 5.479277960676959e-06, + "loss": 0.3049, + "step": 253 + }, + { + "epoch": 1.99, + "learning_rate": 5.40267260547896e-06, + "loss": 0.3036, + "step": 254 + }, + { + "epoch": 2.0, + "learning_rate": 5.32640782841998e-06, + "loss": 0.3141, + "step": 255 + }, + { + "epoch": 2.01, + "learning_rate": 5.250489279329501e-06, + "loss": 0.2912, + "step": 256 + }, + { + "epoch": 2.02, + "learning_rate": 5.174922582387819e-06, + "loss": 0.3003, + "step": 257 + }, + { + "epoch": 2.02, + "learning_rate": 5.099713335709409e-06, + "loss": 0.3078, + "step": 258 + }, + { + "epoch": 2.03, + "learning_rate": 5.024867110928193e-06, + "loss": 0.2958, + "step": 259 + }, + { + "epoch": 2.04, + "learning_rate": 4.950389452784796e-06, + "loss": 0.2929, + "step": 260 + }, + { + "epoch": 2.05, + "learning_rate": 4.876285878715764e-06, + "loss": 0.2984, + "step": 261 + }, + { + "epoch": 2.05, + "learning_rate": 4.802561878444846e-06, + "loss": 0.3017, + "step": 262 + }, + { + "epoch": 2.06, + "learning_rate": 4.729222913576279e-06, + "loss": 0.2982, + "step": 263 + }, + { + "epoch": 2.07, + "learning_rate": 4.6562744171902144e-06, + "loss": 0.2889, + "step": 264 + }, + { + "epoch": 2.08, + "learning_rate": 4.583721793440188e-06, + "loss": 0.3052, + "step": 265 + }, + { + "epoch": 2.09, + "learning_rate": 4.5115704171528105e-06, + "loss": 0.298, + "step": 266 + }, + { + "epoch": 2.09, + "learning_rate": 4.439825633429558e-06, + "loss": 0.2943, + "step": 267 + }, + { + "epoch": 2.1, + "learning_rate": 4.368492757250814e-06, + "loss": 0.2875, + "step": 268 + }, + { + "epoch": 2.11, + "learning_rate": 4.297577073082129e-06, + "loss": 0.2952, + "step": 269 + }, + { + "epoch": 2.12, + "learning_rate": 4.2270838344827285e-06, + "loss": 0.2846, + "step": 270 + }, + { + "epoch": 2.13, + "learning_rate": 4.1570182637163155e-06, + "loss": 0.2952, + "step": 271 + }, + { + "epoch": 2.13, + "learning_rate": 4.087385551364219e-06, + "loss": 0.2963, + "step": 272 + }, + { + "epoch": 2.14, + "learning_rate": 4.018190855940837e-06, + "loss": 0.3008, + "step": 273 + }, + { + "epoch": 2.15, + "learning_rate": 3.949439303511512e-06, + "loss": 0.2894, + "step": 274 + }, + { + "epoch": 2.16, + "learning_rate": 3.881135987312758e-06, + "loss": 0.3, + "step": 275 + }, + { + "epoch": 2.16, + "learning_rate": 3.8132859673749688e-06, + "loss": 0.2929, + "step": 276 + }, + { + "epoch": 2.17, + "learning_rate": 3.7458942701475385e-06, + "loss": 0.3014, + "step": 277 + }, + { + "epoch": 2.18, + "learning_rate": 3.6789658881265135e-06, + "loss": 0.2823, + "step": 278 + }, + { + "epoch": 2.19, + "learning_rate": 3.6125057794847286e-06, + "loss": 0.2895, + "step": 279 + }, + { + "epoch": 2.2, + "learning_rate": 3.546518867704499e-06, + "loss": 0.2927, + "step": 280 + }, + { + "epoch": 2.2, + "learning_rate": 3.4810100412128743e-06, + "loss": 0.2958, + "step": 281 + }, + { + "epoch": 2.21, + "learning_rate": 3.4159841530195127e-06, + "loss": 0.2908, + "step": 282 + }, + { + "epoch": 2.22, + "learning_rate": 3.3514460203571365e-06, + "loss": 0.2925, + "step": 283 + }, + { + "epoch": 2.23, + "learning_rate": 3.287400424324687e-06, + "loss": 0.295, + "step": 284 + }, + { + "epoch": 2.24, + "learning_rate": 3.223852109533112e-06, + "loss": 0.2844, + "step": 285 + }, + { + "epoch": 2.24, + "learning_rate": 3.1608057837538976e-06, + "loss": 0.2962, + "step": 286 + }, + { + "epoch": 2.25, + "learning_rate": 3.098266117570282e-06, + "loss": 0.2939, + "step": 287 + }, + { + "epoch": 2.26, + "learning_rate": 3.0362377440312783e-06, + "loss": 0.2857, + "step": 288 + }, + { + "epoch": 2.27, + "learning_rate": 2.9747252583084297e-06, + "loss": 0.2855, + "step": 289 + }, + { + "epoch": 2.27, + "learning_rate": 2.9137332173554043e-06, + "loss": 0.2886, + "step": 290 + }, + { + "epoch": 2.28, + "learning_rate": 2.853266139570391e-06, + "loss": 0.2844, + "step": 291 + }, + { + "epoch": 2.29, + "learning_rate": 2.793328504461391e-06, + "loss": 0.2914, + "step": 292 + }, + { + "epoch": 2.3, + "learning_rate": 2.733924752314345e-06, + "loss": 0.2811, + "step": 293 + }, + { + "epoch": 2.31, + "learning_rate": 2.6750592838642144e-06, + "loss": 0.2914, + "step": 294 + }, + { + "epoch": 2.31, + "learning_rate": 2.616736459968936e-06, + "loss": 0.2904, + "step": 295 + }, + { + "epoch": 2.32, + "learning_rate": 2.5589606012863968e-06, + "loss": 0.2838, + "step": 296 + }, + { + "epoch": 2.33, + "learning_rate": 2.5017359879543168e-06, + "loss": 0.284, + "step": 297 + }, + { + "epoch": 2.34, + "learning_rate": 2.4450668592731976e-06, + "loss": 0.2859, + "step": 298 + }, + { + "epoch": 2.35, + "learning_rate": 2.3889574133922532e-06, + "loss": 0.2966, + "step": 299 + }, + { + "epoch": 2.35, + "learning_rate": 2.3334118069984102e-06, + "loss": 0.2881, + "step": 300 + }, + { + "epoch": 2.35, + "eval_loss": 0.31201496720314026, + "eval_runtime": 172.7826, + "eval_samples_per_second": 11.564, + "eval_steps_per_second": 0.064, + "step": 300 + }, + { + "epoch": 2.36, + "learning_rate": 2.2784341550083577e-06, + "loss": 0.2876, + "step": 301 + }, + { + "epoch": 2.37, + "learning_rate": 2.224028530263733e-06, + "loss": 0.28, + "step": 302 + }, + { + "epoch": 2.38, + "learning_rate": 2.170198963229372e-06, + "loss": 0.2797, + "step": 303 + }, + { + "epoch": 2.38, + "learning_rate": 2.116949441694748e-06, + "loss": 0.2896, + "step": 304 + }, + { + "epoch": 2.39, + "learning_rate": 2.064283910478527e-06, + "loss": 0.281, + "step": 305 + }, + { + "epoch": 2.4, + "learning_rate": 2.012206271136353e-06, + "loss": 0.2774, + "step": 306 + }, + { + "epoch": 2.41, + "learning_rate": 1.960720381671789e-06, + "loss": 0.282, + "step": 307 + }, + { + "epoch": 2.42, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2847, + "step": 308 + }, + { + "epoch": 2.42, + "learning_rate": 1.8595390649178214e-06, + "loss": 0.2832, + "step": 309 + }, + { + "epoch": 2.43, + "learning_rate": 1.8098511333192026e-06, + "loss": 0.2945, + "step": 310 + }, + { + "epoch": 2.44, + "learning_rate": 1.7607699424244583e-06, + "loss": 0.2903, + "step": 311 + }, + { + "epoch": 2.45, + "learning_rate": 1.712299128254965e-06, + "loss": 0.2907, + "step": 312 + }, + { + "epoch": 2.45, + "learning_rate": 1.6644422816143024e-06, + "loss": 0.2742, + "step": 313 + }, + { + "epoch": 2.46, + "learning_rate": 1.6172029478222595e-06, + "loss": 0.2776, + "step": 314 + }, + { + "epoch": 2.47, + "learning_rate": 1.5705846264521728e-06, + "loss": 0.2909, + "step": 315 + }, + { + "epoch": 2.48, + "learning_rate": 1.5245907710716912e-06, + "loss": 0.2878, + "step": 316 + }, + { + "epoch": 2.49, + "learning_rate": 1.4792247889869072e-06, + "loss": 0.2877, + "step": 317 + }, + { + "epoch": 2.49, + "learning_rate": 1.4344900409899643e-06, + "loss": 0.2868, + "step": 318 + }, + { + "epoch": 2.5, + "learning_rate": 1.3903898411100568e-06, + "loss": 0.2807, + "step": 319 + }, + { + "epoch": 2.51, + "learning_rate": 1.3469274563679401e-06, + "loss": 0.2733, + "step": 320 + }, + { + "epoch": 2.52, + "learning_rate": 1.30410610653389e-06, + "loss": 0.2957, + "step": 321 + }, + { + "epoch": 2.53, + "learning_rate": 1.261928963889194e-06, + "loss": 0.2811, + "step": 322 + }, + { + "epoch": 2.53, + "learning_rate": 1.2203991529911197e-06, + "loss": 0.2748, + "step": 323 + }, + { + "epoch": 2.54, + "learning_rate": 1.1795197504414657e-06, + "loss": 0.2782, + "step": 324 + }, + { + "epoch": 2.55, + "learning_rate": 1.1392937846586216e-06, + "loss": 0.2914, + "step": 325 + }, + { + "epoch": 2.56, + "learning_rate": 1.0997242356532335e-06, + "loss": 0.2871, + "step": 326 + }, + { + "epoch": 2.56, + "learning_rate": 1.0608140348074292e-06, + "loss": 0.2839, + "step": 327 + }, + { + "epoch": 2.57, + "learning_rate": 1.022566064657663e-06, + "loss": 0.2869, + "step": 328 + }, + { + "epoch": 2.58, + "learning_rate": 9.849831586811597e-07, + "loss": 0.2968, + "step": 329 + }, + { + "epoch": 2.59, + "learning_rate": 9.48068101086026e-07, + "loss": 0.276, + "step": 330 + }, + { + "epoch": 2.6, + "learning_rate": 9.118236266049707e-07, + "loss": 0.2933, + "step": 331 + }, + { + "epoch": 2.6, + "learning_rate": 8.762524202927281e-07, + "loss": 0.2796, + "step": 332 + }, + { + "epoch": 2.61, + "learning_rate": 8.413571173271295e-07, + "loss": 0.2657, + "step": 333 + }, + { + "epoch": 2.62, + "learning_rate": 8.071403028138969e-07, + "loss": 0.2775, + "step": 334 + }, + { + "epoch": 2.63, + "learning_rate": 7.736045115951252e-07, + "loss": 0.2795, + "step": 335 + }, + { + "epoch": 2.64, + "learning_rate": 7.40752228061502e-07, + "loss": 0.2739, + "step": 336 + }, + { + "epoch": 2.64, + "learning_rate": 7.08585885968257e-07, + "loss": 0.2754, + "step": 337 + }, + { + "epoch": 2.65, + "learning_rate": 6.77107868254867e-07, + "loss": 0.2868, + "step": 338 + }, + { + "epoch": 2.66, + "learning_rate": 6.463205068685174e-07, + "loss": 0.2704, + "step": 339 + }, + { + "epoch": 2.67, + "learning_rate": 6.162260825913591e-07, + "loss": 0.2795, + "step": 340 + }, + { + "epoch": 2.67, + "learning_rate": 5.868268248715292e-07, + "loss": 0.2829, + "step": 341 + }, + { + "epoch": 2.68, + "learning_rate": 5.581249116580068e-07, + "loss": 0.2733, + "step": 342 + }, + { + "epoch": 2.69, + "learning_rate": 5.301224692392482e-07, + "loss": 0.2784, + "step": 343 + }, + { + "epoch": 2.7, + "learning_rate": 5.028215720856821e-07, + "loss": 0.2755, + "step": 344 + }, + { + "epoch": 2.71, + "learning_rate": 4.762242426960262e-07, + "loss": 0.2708, + "step": 345 + }, + { + "epoch": 2.71, + "learning_rate": 4.503324514474483e-07, + "loss": 0.2732, + "step": 346 + }, + { + "epoch": 2.72, + "learning_rate": 4.2514811644960743e-07, + "loss": 0.2825, + "step": 347 + }, + { + "epoch": 2.73, + "learning_rate": 4.006731034025546e-07, + "loss": 0.2674, + "step": 348 + }, + { + "epoch": 2.74, + "learning_rate": 3.769092254585138e-07, + "loss": 0.2798, + "step": 349 + }, + { + "epoch": 2.75, + "learning_rate": 3.538582430875659e-07, + "loss": 0.2896, + "step": 350 + }, + { + "epoch": 2.75, + "learning_rate": 3.3152186394722506e-07, + "loss": 0.2852, + "step": 351 + }, + { + "epoch": 2.76, + "learning_rate": 3.099017427559392e-07, + "loss": 0.2837, + "step": 352 + }, + { + "epoch": 2.77, + "learning_rate": 2.889994811704966e-07, + "loss": 0.2707, + "step": 353 + }, + { + "epoch": 2.78, + "learning_rate": 2.688166276673809e-07, + "loss": 0.2897, + "step": 354 + }, + { + "epoch": 2.78, + "learning_rate": 2.493546774280531e-07, + "loss": 0.2806, + "step": 355 + }, + { + "epoch": 2.79, + "learning_rate": 2.3061507222818303e-07, + "loss": 0.277, + "step": 356 + }, + { + "epoch": 2.8, + "learning_rate": 2.1259920033084746e-07, + "loss": 0.2662, + "step": 357 + }, + { + "epoch": 2.81, + "learning_rate": 1.9530839638367993e-07, + "loss": 0.2699, + "step": 358 + }, + { + "epoch": 2.82, + "learning_rate": 1.7874394131999428e-07, + "loss": 0.2788, + "step": 359 + }, + { + "epoch": 2.82, + "learning_rate": 1.6290706226390286e-07, + "loss": 0.2828, + "step": 360 + }, + { + "epoch": 2.83, + "learning_rate": 1.4779893243939358e-07, + "loss": 0.2792, + "step": 361 + }, + { + "epoch": 2.84, + "learning_rate": 1.334206710834296e-07, + "loss": 0.2796, + "step": 362 + }, + { + "epoch": 2.85, + "learning_rate": 1.1977334336302437e-07, + "loss": 0.2859, + "step": 363 + }, + { + "epoch": 2.85, + "learning_rate": 1.068579602963371e-07, + "loss": 0.2765, + "step": 364 + }, + { + "epoch": 2.86, + "learning_rate": 9.467547867777261e-08, + "loss": 0.2852, + "step": 365 + }, + { + "epoch": 2.87, + "learning_rate": 8.322680100710023e-08, + "loss": 0.2692, + "step": 366 + }, + { + "epoch": 2.88, + "learning_rate": 7.251277542259849e-08, + "loss": 0.282, + "step": 367 + }, + { + "epoch": 2.89, + "learning_rate": 6.253419563821971e-08, + "loss": 0.2721, + "step": 368 + }, + { + "epoch": 2.89, + "learning_rate": 5.3291800884789356e-08, + "loss": 0.2697, + "step": 369 + }, + { + "epoch": 2.9, + "learning_rate": 4.478627585524753e-08, + "loss": 0.2789, + "step": 370 + }, + { + "epoch": 2.91, + "learning_rate": 3.701825065392184e-08, + "loss": 0.2872, + "step": 371 + }, + { + "epoch": 2.92, + "learning_rate": 2.998830074984915e-08, + "loss": 0.2853, + "step": 372 + }, + { + "epoch": 2.93, + "learning_rate": 2.369694693414304e-08, + "loss": 0.2803, + "step": 373 + }, + { + "epoch": 2.93, + "learning_rate": 1.8144655281413515e-08, + "loss": 0.2799, + "step": 374 + }, + { + "epoch": 2.94, + "learning_rate": 1.3331837115241331e-08, + "loss": 0.2872, + "step": 375 + }, + { + "epoch": 2.95, + "learning_rate": 9.25884897770013e-09, + "loss": 0.2708, + "step": 376 + }, + { + "epoch": 2.96, + "learning_rate": 5.925992602952013e-09, + "loss": 0.2847, + "step": 377 + }, + { + "epoch": 2.96, + "learning_rate": 3.333514894887646e-09, + "loss": 0.2752, + "step": 378 + }, + { + "epoch": 2.97, + "learning_rate": 1.4816079088375567e-09, + "loss": 0.2837, + "step": 379 + }, + { + "epoch": 2.98, + "learning_rate": 3.7040883734462685e-10, + "loss": 0.2769, + "step": 380 + }, + { + "epoch": 2.99, + "learning_rate": 0.0, + "loss": 0.28, + "step": 381 + }, + { + "epoch": 2.99, + "step": 381, + "total_flos": 5.847367827681444e+18, + "train_loss": 0.3848528464635213, + "train_runtime": 103573.3214, + "train_samples_per_second": 2.834, + "train_steps_per_second": 0.004 + } + ], + "max_steps": 381, + "num_train_epochs": 3, + "total_flos": 5.847367827681444e+18, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..93210bc --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ee617b58fdef6045cff1e3fee0a00b8a3981dd24fe308e3d980ec87617734d +size 3771