From 0964def5586d5a3284763dc6cb3ebc1dc24098ca Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 14 May 2026 00:56:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: EldritchLabs/Cthulhu-8B-v1.4 Source: Original Platform --- .gitattributes | 37 + CthulhuShip.png | 3 + LoRA/adapter_config.json | 39 + LoRA/adapter_model.safetensors | 3 + LoRA/trainer_state.json | 2374 ++++++++++++++++++++++++++++++ README.md | 90 ++ chat_template.jinja | 109 ++ config.json | 35 + generation_config.json | 6 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 299 ++++ special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2067 ++++++++++++++++++++++++++ 17 files changed, 5100 insertions(+) create mode 100644 .gitattributes create mode 100644 CthulhuShip.png create mode 100644 LoRA/adapter_config.json create mode 100644 LoRA/adapter_model.safetensors create mode 100644 LoRA/trainer_state.json create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b0b46bd --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +CthulhuShip.png filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/CthulhuShip.png b/CthulhuShip.png new file mode 100644 index 0000000..4907b69 --- /dev/null +++ b/CthulhuShip.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791f18d68ec5b20acb6390a4ff00042bc11f4abbd0d6ae237c73456c834238b5 +size 1240525 diff --git a/LoRA/adapter_config.json b/LoRA/adapter_config.json new file mode 100644 index 0000000..a3f0eaa --- /dev/null +++ b/LoRA/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "B:\\8B\\!models--SicariusSicariiStuff--Llama-3.1-Nemotron-8B-UltraLong-1M-Instruct_Abliterated", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "up_proj", + "down_proj", + "q_proj", + "v_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/LoRA/adapter_model.safetensors b/LoRA/adapter_model.safetensors new file mode 100644 index 0000000..19ab07b --- /dev/null +++ b/LoRA/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb45fa5ef2dd79d1adb05033034330ef1c341196e010d2cf15a6f729ce32829 +size 167832240 diff --git a/LoRA/trainer_state.json b/LoRA/trainer_state.json new file mode 100644 index 0000000..214011c --- /dev/null +++ b/LoRA/trainer_state.json @@ -0,0 +1,2374 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 234, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 1.8042294681072235, + "epoch": 0.025806451612903226, + "grad_norm": 1.166382074356079, + "learning_rate": 0.0, + "loss": 2.5975, + "mean_token_accuracy": 0.4834420457482338, + "num_tokens": 1533.0, + "step": 1 + }, + { + "entropy": 1.8224012553691864, + "epoch": 0.05161290322580645, + "grad_norm": 1.568097472190857, + "learning_rate": 8.333333333333334e-06, + "loss": 2.6194, + "mean_token_accuracy": 0.5228946506977081, + "num_tokens": 2447.0, + "step": 2 + }, + { + "entropy": 2.1347350478172302, + "epoch": 0.07741935483870968, + "grad_norm": 1.6636226177215576, + "learning_rate": 1.6666666666666667e-05, + "loss": 3.1216, + "mean_token_accuracy": 0.4500608742237091, + "num_tokens": 3252.0, + "step": 3 + }, + { + "entropy": 2.042035460472107, + "epoch": 0.1032258064516129, + "grad_norm": 1.8585174083709717, + "learning_rate": 2.5e-05, + "loss": 3.0927, + "mean_token_accuracy": 0.434286504983902, + "num_tokens": 3990.0, + "step": 4 + }, + { + "entropy": 2.0793383419513702, + "epoch": 0.12903225806451613, + "grad_norm": 2.271517753601074, + "learning_rate": 3.3333333333333335e-05, + "loss": 3.1323, + "mean_token_accuracy": 0.44490282237529755, + "num_tokens": 4623.0, + "step": 5 + }, + { + "entropy": 2.078058958053589, + "epoch": 0.15483870967741936, + "grad_norm": 2.0911874771118164, + "learning_rate": 4.166666666666667e-05, + "loss": 3.0791, + "mean_token_accuracy": 0.4434494748711586, + "num_tokens": 5202.0, + "step": 6 + }, + { + "entropy": 1.9296036958694458, + "epoch": 0.18064516129032257, + "grad_norm": 2.447918176651001, + "learning_rate": 5e-05, + "loss": 2.9283, + "mean_token_accuracy": 0.5010824277997017, + "num_tokens": 5738.0, + "step": 7 + }, + { + "entropy": 2.1553411781787872, + "epoch": 0.2064516129032258, + "grad_norm": 2.70611572265625, + "learning_rate": 5.833333333333334e-05, + "loss": 2.8435, + "mean_token_accuracy": 0.498832605779171, + "num_tokens": 6235.0, + "step": 8 + }, + { + "entropy": 2.148306369781494, + "epoch": 0.23225806451612904, + "grad_norm": 2.3149070739746094, + "learning_rate": 6.666666666666667e-05, + "loss": 2.8677, + "mean_token_accuracy": 0.46573129296302795, + "num_tokens": 6703.0, + "step": 9 + }, + { + "entropy": 1.9346267580986023, + "epoch": 0.25806451612903225, + "grad_norm": 1.3574178218841553, + "learning_rate": 7.500000000000001e-05, + "loss": 2.4543, + "mean_token_accuracy": 0.5017582848668098, + "num_tokens": 8003.0, + "step": 10 + }, + { + "entropy": 2.2560064792633057, + "epoch": 0.2838709677419355, + "grad_norm": 1.4286997318267822, + "learning_rate": 8.333333333333334e-05, + "loss": 2.4076, + "mean_token_accuracy": 0.516123816370964, + "num_tokens": 8830.0, + "step": 11 + }, + { + "entropy": 2.271284520626068, + "epoch": 0.3096774193548387, + "grad_norm": 1.289847493171692, + "learning_rate": 9.166666666666667e-05, + "loss": 2.2502, + "mean_token_accuracy": 0.581367239356041, + "num_tokens": 9586.0, + "step": 12 + }, + { + "entropy": 2.506469488143921, + "epoch": 0.33548387096774196, + "grad_norm": 1.698026418685913, + "learning_rate": 0.0001, + "loss": 2.5559, + "mean_token_accuracy": 0.5279825925827026, + "num_tokens": 10255.0, + "step": 13 + }, + { + "entropy": 2.488889992237091, + "epoch": 0.36129032258064514, + "grad_norm": 2.1104917526245117, + "learning_rate": 9.999827315381885e-05, + "loss": 2.3051, + "mean_token_accuracy": 0.5456234812736511, + "num_tokens": 10842.0, + "step": 14 + }, + { + "entropy": 2.494838774204254, + "epoch": 0.3870967741935484, + "grad_norm": 1.7446825504302979, + "learning_rate": 9.999309273455528e-05, + "loss": 2.1948, + "mean_token_accuracy": 0.5685414522886276, + "num_tokens": 11363.0, + "step": 15 + }, + { + "entropy": 2.623446822166443, + "epoch": 0.4129032258064516, + "grad_norm": 1.934134840965271, + "learning_rate": 9.998445910004082e-05, + "loss": 2.2624, + "mean_token_accuracy": 0.5481147766113281, + "num_tokens": 11819.0, + "step": 16 + }, + { + "entropy": 2.3205150961875916, + "epoch": 0.43870967741935485, + "grad_norm": 1.6750158071517944, + "learning_rate": 9.997237284663379e-05, + "loss": 1.8547, + "mean_token_accuracy": 0.6086297482252121, + "num_tokens": 12247.0, + "step": 17 + }, + { + "entropy": 2.435093104839325, + "epoch": 0.4645161290322581, + "grad_norm": 1.8602609634399414, + "learning_rate": 9.995683480917821e-05, + "loss": 2.1032, + "mean_token_accuracy": 0.5650125294923782, + "num_tokens": 12646.0, + "step": 18 + }, + { + "entropy": 2.1141549050807953, + "epoch": 0.49032258064516127, + "grad_norm": 0.9358610510826111, + "learning_rate": 9.993784606094612e-05, + "loss": 1.9903, + "mean_token_accuracy": 0.5407712012529373, + "num_tokens": 14509.0, + "step": 19 + }, + { + "entropy": 2.083885967731476, + "epoch": 0.5161290322580645, + "grad_norm": 1.1308526992797852, + "learning_rate": 9.991540791356342e-05, + "loss": 1.8726, + "mean_token_accuracy": 0.5599013864994049, + "num_tokens": 15617.0, + "step": 20 + }, + { + "entropy": 2.3853049874305725, + "epoch": 0.5419354838709678, + "grad_norm": 1.350138545036316, + "learning_rate": 9.988952191691925e-05, + "loss": 2.251, + "mean_token_accuracy": 0.5332682132720947, + "num_tokens": 16449.0, + "step": 21 + }, + { + "entropy": 2.1798684000968933, + "epoch": 0.567741935483871, + "grad_norm": 1.3853743076324463, + "learning_rate": 9.986018985905901e-05, + "loss": 1.9656, + "mean_token_accuracy": 0.5732992142438889, + "num_tokens": 17216.0, + "step": 22 + }, + { + "entropy": 2.2904029488563538, + "epoch": 0.5935483870967742, + "grad_norm": 2.5513713359832764, + "learning_rate": 9.982741376606078e-05, + "loss": 2.1948, + "mean_token_accuracy": 0.5600379034876823, + "num_tokens": 17868.0, + "step": 23 + }, + { + "entropy": 1.961841881275177, + "epoch": 0.6193548387096774, + "grad_norm": 1.9767720699310303, + "learning_rate": 9.97911959018954e-05, + "loss": 1.9528, + "mean_token_accuracy": 0.5889081507921219, + "num_tokens": 18439.0, + "step": 24 + }, + { + "entropy": 2.061126083135605, + "epoch": 0.6451612903225806, + "grad_norm": 1.8903456926345825, + "learning_rate": 9.975153876827008e-05, + "loss": 1.9973, + "mean_token_accuracy": 0.5782413184642792, + "num_tokens": 18947.0, + "step": 25 + }, + { + "entropy": 1.953830897808075, + "epoch": 0.6709677419354839, + "grad_norm": 2.247823715209961, + "learning_rate": 9.97084451044556e-05, + "loss": 1.8999, + "mean_token_accuracy": 0.5786410048604012, + "num_tokens": 19410.0, + "step": 26 + }, + { + "entropy": 1.8129592537879944, + "epoch": 0.6967741935483871, + "grad_norm": 2.3078598976135254, + "learning_rate": 9.966191788709716e-05, + "loss": 1.6035, + "mean_token_accuracy": 0.6230615079402924, + "num_tokens": 19831.0, + "step": 27 + }, + { + "entropy": 1.9399387836456299, + "epoch": 0.7225806451612903, + "grad_norm": 1.3792117834091187, + "learning_rate": 9.961196033000861e-05, + "loss": 1.9753, + "mean_token_accuracy": 0.5892214328050613, + "num_tokens": 20970.0, + "step": 28 + }, + { + "entropy": 1.8130147755146027, + "epoch": 0.7483870967741936, + "grad_norm": 1.5490132570266724, + "learning_rate": 9.955857588395065e-05, + "loss": 1.7023, + "mean_token_accuracy": 0.6110316589474678, + "num_tokens": 21755.0, + "step": 29 + }, + { + "entropy": 2.077410489320755, + "epoch": 0.7741935483870968, + "grad_norm": 1.8052752017974854, + "learning_rate": 9.950176823639233e-05, + "loss": 1.9752, + "mean_token_accuracy": 0.6064967960119247, + "num_tokens": 22504.0, + "step": 30 + }, + { + "entropy": 1.9274516999721527, + "epoch": 0.8, + "grad_norm": 1.9139018058776855, + "learning_rate": 9.944154131125642e-05, + "loss": 2.0548, + "mean_token_accuracy": 0.5636427998542786, + "num_tokens": 23183.0, + "step": 31 + }, + { + "entropy": 1.9550862610340118, + "epoch": 0.8258064516129032, + "grad_norm": 1.9849357604980469, + "learning_rate": 9.937789926864838e-05, + "loss": 1.8553, + "mean_token_accuracy": 0.5807601362466812, + "num_tokens": 23774.0, + "step": 32 + }, + { + "entropy": 1.8781771957874298, + "epoch": 0.8516129032258064, + "grad_norm": 2.0134923458099365, + "learning_rate": 9.931084650456892e-05, + "loss": 1.7917, + "mean_token_accuracy": 0.6070037335157394, + "num_tokens": 24313.0, + "step": 33 + }, + { + "entropy": 1.897193729877472, + "epoch": 0.8774193548387097, + "grad_norm": 2.607464551925659, + "learning_rate": 9.924038765061042e-05, + "loss": 1.7723, + "mean_token_accuracy": 0.6191761344671249, + "num_tokens": 24779.0, + "step": 34 + }, + { + "entropy": 1.7519680559635162, + "epoch": 0.9032258064516129, + "grad_norm": 2.4835267066955566, + "learning_rate": 9.916652757363698e-05, + "loss": 1.5883, + "mean_token_accuracy": 0.6609883904457092, + "num_tokens": 25211.0, + "step": 35 + }, + { + "entropy": 1.9292193055152893, + "epoch": 0.9290322580645162, + "grad_norm": 2.3735604286193848, + "learning_rate": 9.90892713754483e-05, + "loss": 1.8049, + "mean_token_accuracy": 0.5980570763349533, + "num_tokens": 25599.0, + "step": 36 + }, + { + "entropy": 1.881245195865631, + "epoch": 0.9548387096774194, + "grad_norm": 1.9849742650985718, + "learning_rate": 9.900862439242719e-05, + "loss": 1.7902, + "mean_token_accuracy": 0.5820632129907608, + "num_tokens": 26408.0, + "step": 37 + }, + { + "entropy": 2.113930821418762, + "epoch": 0.9806451612903225, + "grad_norm": 3.527271270751953, + "learning_rate": 9.892459219517108e-05, + "loss": 2.2025, + "mean_token_accuracy": 0.5260728523135185, + "num_tokens": 27021.0, + "step": 38 + }, + { + "entropy": 1.7831549247105916, + "epoch": 1.0, + "grad_norm": 2.5327165126800537, + "learning_rate": 9.883718058810707e-05, + "loss": 1.4478, + "mean_token_accuracy": 0.6935366789499918, + "num_tokens": 27353.0, + "step": 39 + }, + { + "entropy": 1.797234058380127, + "epoch": 1.0258064516129033, + "grad_norm": 1.3197723627090454, + "learning_rate": 9.874639560909117e-05, + "loss": 1.8934, + "mean_token_accuracy": 0.5857948064804077, + "num_tokens": 28829.0, + "step": 40 + }, + { + "entropy": 1.9161739647388458, + "epoch": 1.0516129032258064, + "grad_norm": 1.5616050958633423, + "learning_rate": 9.865224352899119e-05, + "loss": 1.7257, + "mean_token_accuracy": 0.6109496206045151, + "num_tokens": 29650.0, + "step": 41 + }, + { + "entropy": 1.8019072711467743, + "epoch": 1.0774193548387097, + "grad_norm": 1.8876160383224487, + "learning_rate": 9.85547308512535e-05, + "loss": 1.8085, + "mean_token_accuracy": 0.5969990640878677, + "num_tokens": 30359.0, + "step": 42 + }, + { + "entropy": 1.7833741307258606, + "epoch": 1.103225806451613, + "grad_norm": 2.0070252418518066, + "learning_rate": 9.84538643114539e-05, + "loss": 1.6704, + "mean_token_accuracy": 0.5969647467136383, + "num_tokens": 30961.0, + "step": 43 + }, + { + "entropy": 1.7372365295886993, + "epoch": 1.129032258064516, + "grad_norm": 1.8577375411987305, + "learning_rate": 9.834965087683236e-05, + "loss": 1.6159, + "mean_token_accuracy": 0.6475881487131119, + "num_tokens": 31527.0, + "step": 44 + }, + { + "entropy": 1.636292964220047, + "epoch": 1.1548387096774193, + "grad_norm": 1.8432772159576416, + "learning_rate": 9.824209774581174e-05, + "loss": 1.5197, + "mean_token_accuracy": 0.6530560553073883, + "num_tokens": 32050.0, + "step": 45 + }, + { + "entropy": 1.6075344681739807, + "epoch": 1.1806451612903226, + "grad_norm": 1.869754672050476, + "learning_rate": 9.81312123475006e-05, + "loss": 1.3557, + "mean_token_accuracy": 0.6470372080802917, + "num_tokens": 32532.0, + "step": 46 + }, + { + "entropy": 1.6146334111690521, + "epoch": 1.206451612903226, + "grad_norm": 2.099989175796509, + "learning_rate": 9.801700234117999e-05, + "loss": 1.2998, + "mean_token_accuracy": 0.6936827301979065, + "num_tokens": 32967.0, + "step": 47 + }, + { + "entropy": 1.7050741314888, + "epoch": 1.232258064516129, + "grad_norm": 2.504159688949585, + "learning_rate": 9.789947561577445e-05, + "loss": 1.5017, + "mean_token_accuracy": 0.622559979557991, + "num_tokens": 33363.0, + "step": 48 + }, + { + "entropy": 1.6869353950023651, + "epoch": 1.2580645161290323, + "grad_norm": 1.2886877059936523, + "learning_rate": 9.777864028930705e-05, + "loss": 1.6731, + "mean_token_accuracy": 0.6039082556962967, + "num_tokens": 35015.0, + "step": 49 + }, + { + "entropy": 1.6093480288982391, + "epoch": 1.2838709677419355, + "grad_norm": 1.6378092765808105, + "learning_rate": 9.765450470833865e-05, + "loss": 1.4894, + "mean_token_accuracy": 0.6367563456296921, + "num_tokens": 35999.0, + "step": 50 + }, + { + "entropy": 1.6687067151069641, + "epoch": 1.3096774193548386, + "grad_norm": 1.8195027112960815, + "learning_rate": 9.752707744739145e-05, + "loss": 1.5385, + "mean_token_accuracy": 0.6437539905309677, + "num_tokens": 36850.0, + "step": 51 + }, + { + "entropy": 1.4987359642982483, + "epoch": 1.335483870967742, + "grad_norm": 1.8060271739959717, + "learning_rate": 9.73963673083566e-05, + "loss": 1.3978, + "mean_token_accuracy": 0.661731407046318, + "num_tokens": 37604.0, + "step": 52 + }, + { + "entropy": 1.5831853449344635, + "epoch": 1.3612903225806452, + "grad_norm": 2.213078260421753, + "learning_rate": 9.726238331988624e-05, + "loss": 1.7863, + "mean_token_accuracy": 0.6147271245718002, + "num_tokens": 38314.0, + "step": 53 + }, + { + "entropy": 1.5708496272563934, + "epoch": 1.3870967741935485, + "grad_norm": 3.098945140838623, + "learning_rate": 9.712513473676996e-05, + "loss": 1.6752, + "mean_token_accuracy": 0.6371889561414719, + "num_tokens": 38941.0, + "step": 54 + }, + { + "entropy": 1.4293319284915924, + "epoch": 1.4129032258064516, + "grad_norm": 2.6225318908691406, + "learning_rate": 9.698463103929542e-05, + "loss": 1.5132, + "mean_token_accuracy": 0.6733423620462418, + "num_tokens": 39485.0, + "step": 55 + }, + { + "entropy": 1.4221723973751068, + "epoch": 1.4387096774193548, + "grad_norm": 2.834839105606079, + "learning_rate": 9.684088193259355e-05, + "loss": 1.4956, + "mean_token_accuracy": 0.6675658673048019, + "num_tokens": 39954.0, + "step": 56 + }, + { + "entropy": 1.3391860723495483, + "epoch": 1.4645161290322581, + "grad_norm": 2.185546398162842, + "learning_rate": 9.669389734596819e-05, + "loss": 1.1981, + "mean_token_accuracy": 0.7050470858812332, + "num_tokens": 40374.0, + "step": 57 + }, + { + "entropy": 1.6070669293403625, + "epoch": 1.4903225806451612, + "grad_norm": 1.3461191654205322, + "learning_rate": 9.654368743221022e-05, + "loss": 1.6617, + "mean_token_accuracy": 0.5980251729488373, + "num_tokens": 42027.0, + "step": 58 + }, + { + "entropy": 1.6520465910434723, + "epoch": 1.5161290322580645, + "grad_norm": 1.6961472034454346, + "learning_rate": 9.639026256689628e-05, + "loss": 1.577, + "mean_token_accuracy": 0.6316726058721542, + "num_tokens": 42916.0, + "step": 59 + }, + { + "entropy": 1.7670880556106567, + "epoch": 1.5419354838709678, + "grad_norm": 2.0527658462524414, + "learning_rate": 9.623363334767208e-05, + "loss": 1.7517, + "mean_token_accuracy": 0.6005731225013733, + "num_tokens": 43719.0, + "step": 60 + }, + { + "entropy": 1.5744120478630066, + "epoch": 1.567741935483871, + "grad_norm": 2.1162519454956055, + "learning_rate": 9.607381059352038e-05, + "loss": 1.5544, + "mean_token_accuracy": 0.6523573398590088, + "num_tokens": 44493.0, + "step": 61 + }, + { + "entropy": 1.728984385728836, + "epoch": 1.5935483870967742, + "grad_norm": 2.0401268005371094, + "learning_rate": 9.591080534401371e-05, + "loss": 1.699, + "mean_token_accuracy": 0.6030448973178864, + "num_tokens": 45170.0, + "step": 62 + }, + { + "entropy": 1.5222464203834534, + "epoch": 1.6193548387096774, + "grad_norm": 2.430859327316284, + "learning_rate": 9.574462885855174e-05, + "loss": 1.2944, + "mean_token_accuracy": 0.6946325898170471, + "num_tokens": 45755.0, + "step": 63 + }, + { + "entropy": 1.528793841600418, + "epoch": 1.6451612903225805, + "grad_norm": 2.3277854919433594, + "learning_rate": 9.557529261558367e-05, + "loss": 1.3969, + "mean_token_accuracy": 0.6722464263439178, + "num_tokens": 46268.0, + "step": 64 + }, + { + "entropy": 1.6062091886997223, + "epoch": 1.6709677419354838, + "grad_norm": 2.8640811443328857, + "learning_rate": 9.540280831181525e-05, + "loss": 1.3636, + "mean_token_accuracy": 0.6864263862371445, + "num_tokens": 46737.0, + "step": 65 + }, + { + "entropy": 1.336740493774414, + "epoch": 1.696774193548387, + "grad_norm": 2.5550613403320312, + "learning_rate": 9.522718786140097e-05, + "loss": 1.0106, + "mean_token_accuracy": 0.7365925908088684, + "num_tokens": 47163.0, + "step": 66 + }, + { + "entropy": 1.789841502904892, + "epoch": 1.7225806451612904, + "grad_norm": 1.9967743158340454, + "learning_rate": 9.504844339512095e-05, + "loss": 1.715, + "mean_token_accuracy": 0.614040270447731, + "num_tokens": 48108.0, + "step": 67 + }, + { + "entropy": 1.5481957495212555, + "epoch": 1.7483870967741937, + "grad_norm": 1.912815809249878, + "learning_rate": 9.486658725954321e-05, + "loss": 1.3063, + "mean_token_accuracy": 0.6685247123241425, + "num_tokens": 48901.0, + "step": 68 + }, + { + "entropy": 1.618812471628189, + "epoch": 1.7741935483870968, + "grad_norm": 2.1326448917388916, + "learning_rate": 9.468163201617062e-05, + "loss": 1.4826, + "mean_token_accuracy": 0.6648016273975372, + "num_tokens": 49668.0, + "step": 69 + }, + { + "entropy": 1.4738461375236511, + "epoch": 1.8, + "grad_norm": 2.2856757640838623, + "learning_rate": 9.449359044057345e-05, + "loss": 1.5099, + "mean_token_accuracy": 0.6307590007781982, + "num_tokens": 50353.0, + "step": 70 + }, + { + "entropy": 1.42239710688591, + "epoch": 1.8258064516129031, + "grad_norm": 2.272261381149292, + "learning_rate": 9.430247552150673e-05, + "loss": 1.4451, + "mean_token_accuracy": 0.6698804646730423, + "num_tokens": 50954.0, + "step": 71 + }, + { + "entropy": 1.5603100061416626, + "epoch": 1.8516129032258064, + "grad_norm": 2.444957971572876, + "learning_rate": 9.410830046001321e-05, + "loss": 1.5631, + "mean_token_accuracy": 0.6537315994501114, + "num_tokens": 51493.0, + "step": 72 + }, + { + "entropy": 1.421448290348053, + "epoch": 1.8774193548387097, + "grad_norm": 2.62430477142334, + "learning_rate": 9.391107866851143e-05, + "loss": 1.442, + "mean_token_accuracy": 0.6888918429613113, + "num_tokens": 51976.0, + "step": 73 + }, + { + "entropy": 1.3042734861373901, + "epoch": 1.903225806451613, + "grad_norm": 2.522318124771118, + "learning_rate": 9.371082376986928e-05, + "loss": 1.2438, + "mean_token_accuracy": 0.6721822023391724, + "num_tokens": 52413.0, + "step": 74 + }, + { + "entropy": 1.0973184555768967, + "epoch": 1.9290322580645163, + "grad_norm": 2.2152483463287354, + "learning_rate": 9.350754959646306e-05, + "loss": 0.9649, + "mean_token_accuracy": 0.7464027404785156, + "num_tokens": 52812.0, + "step": 75 + }, + { + "entropy": 1.4785442054271698, + "epoch": 1.9548387096774194, + "grad_norm": 1.778226613998413, + "learning_rate": 9.330127018922194e-05, + "loss": 1.5472, + "mean_token_accuracy": 0.6413073837757111, + "num_tokens": 53810.0, + "step": 76 + }, + { + "entropy": 1.4850931763648987, + "epoch": 1.9806451612903224, + "grad_norm": 2.324070453643799, + "learning_rate": 9.30919997966582e-05, + "loss": 1.4766, + "mean_token_accuracy": 0.6507462114095688, + "num_tokens": 54370.0, + "step": 77 + }, + { + "entropy": 1.5041760206222534, + "epoch": 2.0, + "grad_norm": 2.711214542388916, + "learning_rate": 9.287975287388298e-05, + "loss": 1.3224, + "mean_token_accuracy": 0.6853142380714417, + "num_tokens": 54706.0, + "step": 78 + }, + { + "entropy": 1.561076819896698, + "epoch": 2.0258064516129033, + "grad_norm": 1.4298901557922363, + "learning_rate": 9.266454408160779e-05, + "loss": 1.5017, + "mean_token_accuracy": 0.6616432368755341, + "num_tokens": 56147.0, + "step": 79 + }, + { + "entropy": 1.4342933893203735, + "epoch": 2.0516129032258066, + "grad_norm": 1.9477201700210571, + "learning_rate": 9.244638828513187e-05, + "loss": 1.0989, + "mean_token_accuracy": 0.7380426079034805, + "num_tokens": 56998.0, + "step": 80 + }, + { + "entropy": 1.3799369037151337, + "epoch": 2.07741935483871, + "grad_norm": 1.899839162826538, + "learning_rate": 9.22253005533154e-05, + "loss": 1.0685, + "mean_token_accuracy": 0.7503155916929245, + "num_tokens": 57799.0, + "step": 81 + }, + { + "entropy": 1.2785212695598602, + "epoch": 2.1032258064516127, + "grad_norm": 2.1526200771331787, + "learning_rate": 9.200129615753859e-05, + "loss": 1.0346, + "mean_token_accuracy": 0.7295394539833069, + "num_tokens": 58548.0, + "step": 82 + }, + { + "entropy": 1.1957830488681793, + "epoch": 2.129032258064516, + "grad_norm": 2.5215909481048584, + "learning_rate": 9.177439057064683e-05, + "loss": 1.0066, + "mean_token_accuracy": 0.7433657646179199, + "num_tokens": 59174.0, + "step": 83 + }, + { + "entropy": 1.3421072363853455, + "epoch": 2.1548387096774193, + "grad_norm": 2.606336832046509, + "learning_rate": 9.154459946588198e-05, + "loss": 1.1666, + "mean_token_accuracy": 0.7091180384159088, + "num_tokens": 59769.0, + "step": 84 + }, + { + "entropy": 1.032430723309517, + "epoch": 2.1806451612903226, + "grad_norm": 2.835961103439331, + "learning_rate": 9.131193871579975e-05, + "loss": 0.9103, + "mean_token_accuracy": 0.7784561067819595, + "num_tokens": 60295.0, + "step": 85 + }, + { + "entropy": 1.0069421231746674, + "epoch": 2.206451612903226, + "grad_norm": 3.632134437561035, + "learning_rate": 9.107642439117321e-05, + "loss": 0.7677, + "mean_token_accuracy": 0.7896548062562943, + "num_tokens": 60744.0, + "step": 86 + }, + { + "entropy": 0.784252293407917, + "epoch": 2.232258064516129, + "grad_norm": 3.14766526222229, + "learning_rate": 9.083807275988284e-05, + "loss": 0.6092, + "mean_token_accuracy": 0.8186918497085571, + "num_tokens": 61151.0, + "step": 87 + }, + { + "entropy": 1.1425200402736664, + "epoch": 2.258064516129032, + "grad_norm": 2.9548776149749756, + "learning_rate": 9.059690028579283e-05, + "loss": 1.2423, + "mean_token_accuracy": 0.67966029047966, + "num_tokens": 62417.0, + "step": 88 + }, + { + "entropy": 1.075703114271164, + "epoch": 2.2838709677419353, + "grad_norm": 2.6472651958465576, + "learning_rate": 9.035292362761381e-05, + "loss": 1.1406, + "mean_token_accuracy": 0.7184228450059891, + "num_tokens": 63270.0, + "step": 89 + }, + { + "entropy": 0.9899384379386902, + "epoch": 2.3096774193548386, + "grad_norm": 2.6800777912139893, + "learning_rate": 9.01061596377522e-05, + "loss": 0.9555, + "mean_token_accuracy": 0.759021058678627, + "num_tokens": 64027.0, + "step": 90 + }, + { + "entropy": 1.2101148664951324, + "epoch": 2.335483870967742, + "grad_norm": 3.1797468662261963, + "learning_rate": 8.985662536114613e-05, + "loss": 1.2574, + "mean_token_accuracy": 0.707681193947792, + "num_tokens": 64701.0, + "step": 91 + }, + { + "entropy": 0.9667136818170547, + "epoch": 2.361290322580645, + "grad_norm": 2.6233391761779785, + "learning_rate": 8.960433803408813e-05, + "loss": 0.7913, + "mean_token_accuracy": 0.7882635146379471, + "num_tokens": 65308.0, + "step": 92 + }, + { + "entropy": 0.9306632727384567, + "epoch": 2.3870967741935485, + "grad_norm": 2.395880699157715, + "learning_rate": 8.934931508303445e-05, + "loss": 0.7301, + "mean_token_accuracy": 0.7955707758665085, + "num_tokens": 65878.0, + "step": 93 + }, + { + "entropy": 1.0530627965927124, + "epoch": 2.412903225806452, + "grad_norm": 2.9347379207611084, + "learning_rate": 8.90915741234015e-05, + "loss": 0.8363, + "mean_token_accuracy": 0.775736004114151, + "num_tokens": 66364.0, + "step": 94 + }, + { + "entropy": 1.0531336814165115, + "epoch": 2.4387096774193546, + "grad_norm": 3.1018309593200684, + "learning_rate": 8.883113295834892e-05, + "loss": 0.8268, + "mean_token_accuracy": 0.7704032361507416, + "num_tokens": 66820.0, + "step": 95 + }, + { + "entropy": 1.0696537494659424, + "epoch": 2.464516129032258, + "grad_norm": 3.423306941986084, + "learning_rate": 8.856800957755e-05, + "loss": 0.7847, + "mean_token_accuracy": 0.7773692905902863, + "num_tokens": 67214.0, + "step": 96 + }, + { + "entropy": 1.306801289319992, + "epoch": 2.490322580645161, + "grad_norm": 1.6437768936157227, + "learning_rate": 8.83022221559489e-05, + "loss": 1.2357, + "mean_token_accuracy": 0.669854074716568, + "num_tokens": 68733.0, + "step": 97 + }, + { + "entropy": 1.1772551238536835, + "epoch": 2.5161290322580645, + "grad_norm": 2.4962806701660156, + "learning_rate": 8.803378905250544e-05, + "loss": 1.0752, + "mean_token_accuracy": 0.711113303899765, + "num_tokens": 69580.0, + "step": 98 + }, + { + "entropy": 1.166929692029953, + "epoch": 2.541935483870968, + "grad_norm": 2.8279449939727783, + "learning_rate": 8.776272880892675e-05, + "loss": 1.0135, + "mean_token_accuracy": 0.7302903383970261, + "num_tokens": 70359.0, + "step": 99 + }, + { + "entropy": 1.2368881702423096, + "epoch": 2.567741935483871, + "grad_norm": 2.812784194946289, + "learning_rate": 8.748906014838672e-05, + "loss": 1.0997, + "mean_token_accuracy": 0.7428575754165649, + "num_tokens": 71051.0, + "step": 100 + }, + { + "entropy": 1.0734427571296692, + "epoch": 2.5935483870967744, + "grad_norm": 3.168055772781372, + "learning_rate": 8.721280197423258e-05, + "loss": 0.9557, + "mean_token_accuracy": 0.7500255256891251, + "num_tokens": 71653.0, + "step": 101 + }, + { + "entropy": 1.0182117372751236, + "epoch": 2.6193548387096772, + "grad_norm": 2.928173065185547, + "learning_rate": 8.69339733686793e-05, + "loss": 0.7934, + "mean_token_accuracy": 0.7967472970485687, + "num_tokens": 72206.0, + "step": 102 + }, + { + "entropy": 0.9825232028961182, + "epoch": 2.6451612903225805, + "grad_norm": 3.5911121368408203, + "learning_rate": 8.665259359149132e-05, + "loss": 0.7435, + "mean_token_accuracy": 0.7856406420469284, + "num_tokens": 72709.0, + "step": 103 + }, + { + "entropy": 0.8393460661172867, + "epoch": 2.670967741935484, + "grad_norm": 3.1751551628112793, + "learning_rate": 8.636868207865244e-05, + "loss": 0.5727, + "mean_token_accuracy": 0.8536647707223892, + "num_tokens": 73172.0, + "step": 104 + }, + { + "entropy": 0.733843207359314, + "epoch": 2.696774193548387, + "grad_norm": 3.002105951309204, + "learning_rate": 8.60822584410231e-05, + "loss": 0.4306, + "mean_token_accuracy": 0.9011064171791077, + "num_tokens": 73601.0, + "step": 105 + }, + { + "entropy": 1.09127739071846, + "epoch": 2.7225806451612904, + "grad_norm": 2.7801899909973145, + "learning_rate": 8.579334246298593e-05, + "loss": 1.3229, + "mean_token_accuracy": 0.6847837716341019, + "num_tokens": 75066.0, + "step": 106 + }, + { + "entropy": 1.1003702282905579, + "epoch": 2.7483870967741937, + "grad_norm": 2.8465728759765625, + "learning_rate": 8.550195410107902e-05, + "loss": 1.026, + "mean_token_accuracy": 0.7287466824054718, + "num_tokens": 75935.0, + "step": 107 + }, + { + "entropy": 1.0054174661636353, + "epoch": 2.774193548387097, + "grad_norm": 2.6831374168395996, + "learning_rate": 8.520811348261759e-05, + "loss": 0.8887, + "mean_token_accuracy": 0.7784150391817093, + "num_tokens": 76730.0, + "step": 108 + }, + { + "entropy": 1.1102914214134216, + "epoch": 2.8, + "grad_norm": 3.408310651779175, + "learning_rate": 8.491184090430364e-05, + "loss": 1.0831, + "mean_token_accuracy": 0.7278113067150116, + "num_tokens": 77474.0, + "step": 109 + }, + { + "entropy": 0.999423012137413, + "epoch": 2.825806451612903, + "grad_norm": 3.7338831424713135, + "learning_rate": 8.461315683082399e-05, + "loss": 1.0257, + "mean_token_accuracy": 0.7361829876899719, + "num_tokens": 78068.0, + "step": 110 + }, + { + "entropy": 0.9764816612005234, + "epoch": 2.8516129032258064, + "grad_norm": 3.499826192855835, + "learning_rate": 8.43120818934367e-05, + "loss": 0.8335, + "mean_token_accuracy": 0.764112114906311, + "num_tokens": 78589.0, + "step": 111 + }, + { + "entropy": 0.9866785109043121, + "epoch": 2.8774193548387097, + "grad_norm": 3.31439471244812, + "learning_rate": 8.400863688854597e-05, + "loss": 0.9472, + "mean_token_accuracy": 0.7592662870883942, + "num_tokens": 79080.0, + "step": 112 + }, + { + "entropy": 0.8102796524763107, + "epoch": 2.903225806451613, + "grad_norm": 3.768465757369995, + "learning_rate": 8.370284277626577e-05, + "loss": 0.6879, + "mean_token_accuracy": 0.7918446511030197, + "num_tokens": 79518.0, + "step": 113 + }, + { + "entropy": 0.7523371577262878, + "epoch": 2.9290322580645163, + "grad_norm": 3.107103109359741, + "learning_rate": 8.339472067897187e-05, + "loss": 0.5142, + "mean_token_accuracy": 0.8337104171514511, + "num_tokens": 79925.0, + "step": 114 + }, + { + "entropy": 1.2405670583248138, + "epoch": 2.9548387096774196, + "grad_norm": 2.0415544509887695, + "learning_rate": 8.308429187984297e-05, + "loss": 1.2469, + "mean_token_accuracy": 0.6947166323661804, + "num_tokens": 81111.0, + "step": 115 + }, + { + "entropy": 1.0534760355949402, + "epoch": 2.9806451612903224, + "grad_norm": 3.243969440460205, + "learning_rate": 8.27715778213905e-05, + "loss": 1.0014, + "mean_token_accuracy": 0.752901017665863, + "num_tokens": 81717.0, + "step": 116 + }, + { + "entropy": 0.8150668541590372, + "epoch": 3.0, + "grad_norm": 3.7620151042938232, + "learning_rate": 8.24566001039776e-05, + "loss": 0.6544, + "mean_token_accuracy": 0.8201234340667725, + "num_tokens": 82059.0, + "step": 117 + }, + { + "entropy": 1.2218182981014252, + "epoch": 3.0258064516129033, + "grad_norm": 2.0384461879730225, + "learning_rate": 8.213938048432697e-05, + "loss": 0.9903, + "mean_token_accuracy": 0.7458517551422119, + "num_tokens": 83704.0, + "step": 118 + }, + { + "entropy": 1.0654624998569489, + "epoch": 3.0516129032258066, + "grad_norm": 2.7097387313842773, + "learning_rate": 8.181994087401819e-05, + "loss": 0.6589, + "mean_token_accuracy": 0.8282175809144974, + "num_tokens": 84564.0, + "step": 119 + }, + { + "entropy": 0.9389624744653702, + "epoch": 3.07741935483871, + "grad_norm": 3.422351360321045, + "learning_rate": 8.149830333797407e-05, + "loss": 0.6736, + "mean_token_accuracy": 0.8170457482337952, + "num_tokens": 85305.0, + "step": 120 + }, + { + "entropy": 0.891632542014122, + "epoch": 3.1032258064516127, + "grad_norm": 2.9999988079071045, + "learning_rate": 8.117449009293668e-05, + "loss": 0.5435, + "mean_token_accuracy": 0.8579341620206833, + "num_tokens": 85927.0, + "step": 121 + }, + { + "entropy": 0.7080177962779999, + "epoch": 3.129032258064516, + "grad_norm": 2.7167727947235107, + "learning_rate": 8.084852350593264e-05, + "loss": 0.386, + "mean_token_accuracy": 0.9050543904304504, + "num_tokens": 86500.0, + "step": 122 + }, + { + "entropy": 0.5361127704381943, + "epoch": 3.1548387096774193, + "grad_norm": 3.051241874694824, + "learning_rate": 8.052042609272817e-05, + "loss": 0.314, + "mean_token_accuracy": 0.9146886169910431, + "num_tokens": 87009.0, + "step": 123 + }, + { + "entropy": 0.5324621573090553, + "epoch": 3.1806451612903226, + "grad_norm": 3.0022952556610107, + "learning_rate": 8.019022051627388e-05, + "loss": 0.3141, + "mean_token_accuracy": 0.9247495979070663, + "num_tokens": 87467.0, + "step": 124 + }, + { + "entropy": 0.40624529123306274, + "epoch": 3.206451612903226, + "grad_norm": 3.094412326812744, + "learning_rate": 7.985792958513931e-05, + "loss": 0.26, + "mean_token_accuracy": 0.9299735277891159, + "num_tokens": 87885.0, + "step": 125 + }, + { + "entropy": 0.3672215938568115, + "epoch": 3.232258064516129, + "grad_norm": 3.4929354190826416, + "learning_rate": 7.952357625193749e-05, + "loss": 0.2392, + "mean_token_accuracy": 0.9306517392396927, + "num_tokens": 88260.0, + "step": 126 + }, + { + "entropy": 0.8298548460006714, + "epoch": 3.258064516129032, + "grad_norm": 2.836134672164917, + "learning_rate": 7.91871836117395e-05, + "loss": 0.7053, + "mean_token_accuracy": 0.8246497809886932, + "num_tokens": 89262.0, + "step": 127 + }, + { + "entropy": 0.5190232917666435, + "epoch": 3.2838709677419353, + "grad_norm": 5.216272830963135, + "learning_rate": 7.884877490047915e-05, + "loss": 0.565, + "mean_token_accuracy": 0.8471736311912537, + "num_tokens": 90062.0, + "step": 128 + }, + { + "entropy": 0.4947461038827896, + "epoch": 3.3096774193548386, + "grad_norm": 4.143370628356934, + "learning_rate": 7.85083734933481e-05, + "loss": 0.5013, + "mean_token_accuracy": 0.8697308301925659, + "num_tokens": 90841.0, + "step": 129 + }, + { + "entropy": 0.5702934339642525, + "epoch": 3.335483870967742, + "grad_norm": 5.3610520362854, + "learning_rate": 7.81660029031811e-05, + "loss": 0.657, + "mean_token_accuracy": 0.8270199149847031, + "num_tokens": 91591.0, + "step": 130 + }, + { + "entropy": 0.5612503439188004, + "epoch": 3.361290322580645, + "grad_norm": 4.896009922027588, + "learning_rate": 7.782168677883206e-05, + "loss": 0.638, + "mean_token_accuracy": 0.8336956202983856, + "num_tokens": 92304.0, + "step": 131 + }, + { + "entropy": 0.47641437500715256, + "epoch": 3.3870967741935485, + "grad_norm": 5.059084415435791, + "learning_rate": 7.74754489035403e-05, + "loss": 0.516, + "mean_token_accuracy": 0.8493129163980484, + "num_tokens": 92920.0, + "step": 132 + }, + { + "entropy": 0.5311232656240463, + "epoch": 3.412903225806452, + "grad_norm": 3.7369489669799805, + "learning_rate": 7.712731319328798e-05, + "loss": 0.4084, + "mean_token_accuracy": 0.8949003219604492, + "num_tokens": 93468.0, + "step": 133 + }, + { + "entropy": 0.4599653482437134, + "epoch": 3.4387096774193546, + "grad_norm": 4.457752704620361, + "learning_rate": 7.677730369514793e-05, + "loss": 0.4303, + "mean_token_accuracy": 0.8998099863529205, + "num_tokens": 93952.0, + "step": 134 + }, + { + "entropy": 0.3341464288532734, + "epoch": 3.464516129032258, + "grad_norm": 2.74814772605896, + "learning_rate": 7.642544458562278e-05, + "loss": 0.2045, + "mean_token_accuracy": 0.9389902055263519, + "num_tokens": 94378.0, + "step": 135 + }, + { + "entropy": 0.7704500108957291, + "epoch": 3.490322580645161, + "grad_norm": 2.1899735927581787, + "learning_rate": 7.60717601689749e-05, + "loss": 0.7928, + "mean_token_accuracy": 0.7940146774053574, + "num_tokens": 96188.0, + "step": 136 + }, + { + "entropy": 0.8460464626550674, + "epoch": 3.5161290322580645, + "grad_norm": 2.439542531967163, + "learning_rate": 7.571627487554769e-05, + "loss": 0.7167, + "mean_token_accuracy": 0.7986479252576828, + "num_tokens": 97250.0, + "step": 137 + }, + { + "entropy": 0.683267816901207, + "epoch": 3.541935483870968, + "grad_norm": 3.4693028926849365, + "learning_rate": 7.535901326007795e-05, + "loss": 0.5391, + "mean_token_accuracy": 0.8488983660936356, + "num_tokens": 98028.0, + "step": 138 + }, + { + "entropy": 0.6665534228086472, + "epoch": 3.567741935483871, + "grad_norm": 3.313450336456299, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4977, + "mean_token_accuracy": 0.8638099581003189, + "num_tokens": 98727.0, + "step": 139 + }, + { + "entropy": 0.6375805735588074, + "epoch": 3.5935483870967744, + "grad_norm": 3.621342897415161, + "learning_rate": 7.463925989374089e-05, + "loss": 0.521, + "mean_token_accuracy": 0.8624279350042343, + "num_tokens": 99329.0, + "step": 140 + }, + { + "entropy": 0.5712595283985138, + "epoch": 3.6193548387096772, + "grad_norm": 3.667834520339966, + "learning_rate": 7.427681785900761e-05, + "loss": 0.4579, + "mean_token_accuracy": 0.8609372973442078, + "num_tokens": 99866.0, + "step": 141 + }, + { + "entropy": 0.5664890855550766, + "epoch": 3.6451612903225805, + "grad_norm": 3.193061113357544, + "learning_rate": 7.391269893106592e-05, + "loss": 0.3498, + "mean_token_accuracy": 0.9016094356775284, + "num_tokens": 100358.0, + "step": 142 + }, + { + "entropy": 0.4809069186449051, + "epoch": 3.670967741935484, + "grad_norm": 2.9797909259796143, + "learning_rate": 7.354692826101102e-05, + "loss": 0.239, + "mean_token_accuracy": 0.937361553311348, + "num_tokens": 100810.0, + "step": 143 + }, + { + "entropy": 0.3825264722108841, + "epoch": 3.696774193548387, + "grad_norm": 2.5916123390197754, + "learning_rate": 7.317953111403029e-05, + "loss": 0.2293, + "mean_token_accuracy": 0.959057167172432, + "num_tokens": 101224.0, + "step": 144 + }, + { + "entropy": 1.0315645188093185, + "epoch": 3.7225806451612904, + "grad_norm": 2.4332456588745117, + "learning_rate": 7.281053286765815e-05, + "loss": 0.9734, + "mean_token_accuracy": 0.7563262432813644, + "num_tokens": 102666.0, + "step": 145 + }, + { + "entropy": 0.7324022054672241, + "epoch": 3.7483870967741937, + "grad_norm": 3.319155693054199, + "learning_rate": 7.243995901002312e-05, + "loss": 0.526, + "mean_token_accuracy": 0.862901970744133, + "num_tokens": 103560.0, + "step": 146 + }, + { + "entropy": 0.7977930456399918, + "epoch": 3.774193548387097, + "grad_norm": 3.708766460418701, + "learning_rate": 7.20678351380872e-05, + "loss": 0.5996, + "mean_token_accuracy": 0.8376729637384415, + "num_tokens": 104386.0, + "step": 147 + }, + { + "entropy": 0.67112597823143, + "epoch": 3.8, + "grad_norm": 3.474480152130127, + "learning_rate": 7.169418695587791e-05, + "loss": 0.5283, + "mean_token_accuracy": 0.8518707603216171, + "num_tokens": 105173.0, + "step": 148 + }, + { + "entropy": 0.6674353927373886, + "epoch": 3.825806451612903, + "grad_norm": 4.0479736328125, + "learning_rate": 7.13190402727127e-05, + "loss": 0.5836, + "mean_token_accuracy": 0.8252883553504944, + "num_tokens": 105827.0, + "step": 149 + }, + { + "entropy": 0.6601278185844421, + "epoch": 3.8516129032258064, + "grad_norm": 3.1081454753875732, + "learning_rate": 7.094242100141625e-05, + "loss": 0.4519, + "mean_token_accuracy": 0.8595046997070312, + "num_tokens": 106405.0, + "step": 150 + }, + { + "entropy": 0.397666834294796, + "epoch": 3.8774193548387097, + "grad_norm": 2.5936572551727295, + "learning_rate": 7.056435515653059e-05, + "loss": 0.2092, + "mean_token_accuracy": 0.9478294253349304, + "num_tokens": 106926.0, + "step": 151 + }, + { + "entropy": 0.5597369372844696, + "epoch": 3.903225806451613, + "grad_norm": 4.103569984436035, + "learning_rate": 7.018486885251812e-05, + "loss": 0.4531, + "mean_token_accuracy": 0.8746808618307114, + "num_tokens": 107392.0, + "step": 152 + }, + { + "entropy": 0.38467343896627426, + "epoch": 3.9290322580645163, + "grad_norm": 3.1950509548187256, + "learning_rate": 6.980398830195785e-05, + "loss": 0.212, + "mean_token_accuracy": 0.9444408565759659, + "num_tokens": 107827.0, + "step": 153 + }, + { + "entropy": 0.5698762461543083, + "epoch": 3.9548387096774196, + "grad_norm": 3.3116562366485596, + "learning_rate": 6.942173981373474e-05, + "loss": 0.4076, + "mean_token_accuracy": 0.8756328076124191, + "num_tokens": 108519.0, + "step": 154 + }, + { + "entropy": 0.5269991233944893, + "epoch": 3.9806451612903224, + "grad_norm": 3.074373483657837, + "learning_rate": 6.903814979122249e-05, + "loss": 0.3577, + "mean_token_accuracy": 0.9049306809902191, + "num_tokens": 109080.0, + "step": 155 + }, + { + "entropy": 0.411786029736201, + "epoch": 4.0, + "grad_norm": 3.1152896881103516, + "learning_rate": 6.86532447304597e-05, + "loss": 0.2401, + "mean_token_accuracy": 0.9342868526776632, + "num_tokens": 109412.0, + "step": 156 + }, + { + "entropy": 0.672158882021904, + "epoch": 4.025806451612903, + "grad_norm": 2.576361894607544, + "learning_rate": 6.826705121831976e-05, + "loss": 0.5307, + "mean_token_accuracy": 0.8603871315717697, + "num_tokens": 110911.0, + "step": 157 + }, + { + "entropy": 0.5033400803804398, + "epoch": 4.051612903225807, + "grad_norm": 2.3417139053344727, + "learning_rate": 6.78795959306743e-05, + "loss": 0.2862, + "mean_token_accuracy": 0.9291664808988571, + "num_tokens": 111773.0, + "step": 158 + }, + { + "entropy": 0.3818225935101509, + "epoch": 4.077419354838709, + "grad_norm": 2.526963233947754, + "learning_rate": 6.749090563055076e-05, + "loss": 0.204, + "mean_token_accuracy": 0.9366898983716965, + "num_tokens": 112552.0, + "step": 159 + }, + { + "entropy": 0.5210211500525475, + "epoch": 4.103225806451613, + "grad_norm": 3.331657648086548, + "learning_rate": 6.710100716628344e-05, + "loss": 0.3463, + "mean_token_accuracy": 0.9079622030258179, + "num_tokens": 113279.0, + "step": 160 + }, + { + "entropy": 0.4078049287199974, + "epoch": 4.129032258064516, + "grad_norm": 2.643353223800659, + "learning_rate": 6.670992746965938e-05, + "loss": 0.2458, + "mean_token_accuracy": 0.9378542304039001, + "num_tokens": 113927.0, + "step": 161 + }, + { + "entropy": 0.2958051636815071, + "epoch": 4.15483870967742, + "grad_norm": 2.6562397480010986, + "learning_rate": 6.63176935540578e-05, + "loss": 0.2228, + "mean_token_accuracy": 0.9389047920703888, + "num_tokens": 114535.0, + "step": 162 + }, + { + "entropy": 0.2642120160162449, + "epoch": 4.180645161290323, + "grad_norm": 3.720411539077759, + "learning_rate": 6.592433251258423e-05, + "loss": 0.1609, + "mean_token_accuracy": 0.9546155333518982, + "num_tokens": 115092.0, + "step": 163 + }, + { + "entropy": 0.2038814201951027, + "epoch": 4.2064516129032254, + "grad_norm": 3.742655038833618, + "learning_rate": 6.552987151619919e-05, + "loss": 0.1438, + "mean_token_accuracy": 0.9577045887708664, + "num_tokens": 115572.0, + "step": 164 + }, + { + "entropy": 0.21509704366326332, + "epoch": 4.232258064516129, + "grad_norm": 4.123962879180908, + "learning_rate": 6.51343378118413e-05, + "loss": 0.1326, + "mean_token_accuracy": 0.955599308013916, + "num_tokens": 116004.0, + "step": 165 + }, + { + "entropy": 0.5882035046815872, + "epoch": 4.258064516129032, + "grad_norm": 2.629396438598633, + "learning_rate": 6.473775872054521e-05, + "loss": 0.5174, + "mean_token_accuracy": 0.855495274066925, + "num_tokens": 117713.0, + "step": 166 + }, + { + "entropy": 0.4447134956717491, + "epoch": 4.283870967741936, + "grad_norm": 5.003028869628906, + "learning_rate": 6.434016163555452e-05, + "loss": 0.4682, + "mean_token_accuracy": 0.8714989423751831, + "num_tokens": 118624.0, + "step": 167 + }, + { + "entropy": 0.3722687065601349, + "epoch": 4.309677419354839, + "grad_norm": 3.819241762161255, + "learning_rate": 6.394157402042951e-05, + "loss": 0.3207, + "mean_token_accuracy": 0.9076657742261887, + "num_tokens": 119441.0, + "step": 168 + }, + { + "entropy": 0.2616325728595257, + "epoch": 4.335483870967742, + "grad_norm": 3.4206392765045166, + "learning_rate": 6.354202340715026e-05, + "loss": 0.205, + "mean_token_accuracy": 0.9454829543828964, + "num_tokens": 120187.0, + "step": 169 + }, + { + "entropy": 0.3457096070051193, + "epoch": 4.361290322580645, + "grad_norm": 3.556037425994873, + "learning_rate": 6.314153739421476e-05, + "loss": 0.2697, + "mean_token_accuracy": 0.9172067493200302, + "num_tokens": 120838.0, + "step": 170 + }, + { + "entropy": 0.2511453256011009, + "epoch": 4.387096774193548, + "grad_norm": 2.943145751953125, + "learning_rate": 6.274014364473274e-05, + "loss": 0.1491, + "mean_token_accuracy": 0.9682914614677429, + "num_tokens": 121408.0, + "step": 171 + }, + { + "entropy": 0.23977105692029, + "epoch": 4.412903225806452, + "grad_norm": 3.426252603530884, + "learning_rate": 6.233786988451468e-05, + "loss": 0.1645, + "mean_token_accuracy": 0.9556652754545212, + "num_tokens": 121915.0, + "step": 172 + }, + { + "entropy": 0.19089676067233086, + "epoch": 4.438709677419355, + "grad_norm": 2.1618521213531494, + "learning_rate": 6.19347439001569e-05, + "loss": 0.1059, + "mean_token_accuracy": 0.97336345911026, + "num_tokens": 122368.0, + "step": 173 + }, + { + "entropy": 0.19364609941840172, + "epoch": 4.464516129032258, + "grad_norm": 3.3634703159332275, + "learning_rate": 6.153079353712201e-05, + "loss": 0.1285, + "mean_token_accuracy": 0.9543762654066086, + "num_tokens": 122767.0, + "step": 174 + }, + { + "entropy": 0.6687990427017212, + "epoch": 4.490322580645161, + "grad_norm": 2.883437395095825, + "learning_rate": 6.112604669781572e-05, + "loss": 0.5348, + "mean_token_accuracy": 0.8638840764760971, + "num_tokens": 124288.0, + "step": 175 + }, + { + "entropy": 0.472368985414505, + "epoch": 4.516129032258064, + "grad_norm": 2.9869871139526367, + "learning_rate": 6.072053133965938e-05, + "loss": 0.2776, + "mean_token_accuracy": 0.9314542561769485, + "num_tokens": 125161.0, + "step": 176 + }, + { + "entropy": 0.4055846929550171, + "epoch": 4.541935483870968, + "grad_norm": 3.554269552230835, + "learning_rate": 6.031427547315889e-05, + "loss": 0.3152, + "mean_token_accuracy": 0.9113509654998779, + "num_tokens": 125955.0, + "step": 177 + }, + { + "entropy": 0.3913852721452713, + "epoch": 4.567741935483871, + "grad_norm": 3.3943800926208496, + "learning_rate": 5.9907307159969884e-05, + "loss": 0.2882, + "mean_token_accuracy": 0.9336675554513931, + "num_tokens": 126654.0, + "step": 178 + }, + { + "entropy": 0.2266981489956379, + "epoch": 4.593548387096774, + "grad_norm": 2.6177566051483154, + "learning_rate": 5.949965451095951e-05, + "loss": 0.1521, + "mean_token_accuracy": 0.9607619494199753, + "num_tokens": 127200.0, + "step": 179 + }, + { + "entropy": 0.2510114349424839, + "epoch": 4.619354838709677, + "grad_norm": 2.9274792671203613, + "learning_rate": 5.9091345684264546e-05, + "loss": 0.1527, + "mean_token_accuracy": 0.9545964151620865, + "num_tokens": 127710.0, + "step": 180 + }, + { + "entropy": 0.27408041059970856, + "epoch": 4.645161290322581, + "grad_norm": 3.970353841781616, + "learning_rate": 5.868240888334653e-05, + "loss": 0.2088, + "mean_token_accuracy": 0.9431939721107483, + "num_tokens": 128171.0, + "step": 181 + }, + { + "entropy": 0.21555104106664658, + "epoch": 4.670967741935484, + "grad_norm": 2.1485326290130615, + "learning_rate": 5.827287235504356e-05, + "loss": 0.1231, + "mean_token_accuracy": 0.9743186682462692, + "num_tokens": 128603.0, + "step": 182 + }, + { + "entropy": 0.1890631914138794, + "epoch": 4.6967741935483875, + "grad_norm": 3.0446012020111084, + "learning_rate": 5.786276438761927e-05, + "loss": 0.166, + "mean_token_accuracy": 0.9585428386926651, + "num_tokens": 129018.0, + "step": 183 + }, + { + "entropy": 0.4911561757326126, + "epoch": 4.72258064516129, + "grad_norm": 2.324612617492676, + "learning_rate": 5.745211330880872e-05, + "loss": 0.3596, + "mean_token_accuracy": 0.9241899400949478, + "num_tokens": 130189.0, + "step": 184 + }, + { + "entropy": 0.3451598323881626, + "epoch": 4.748387096774193, + "grad_norm": 3.1134896278381348, + "learning_rate": 5.704094748386184e-05, + "loss": 0.2163, + "mean_token_accuracy": 0.9265208840370178, + "num_tokens": 130996.0, + "step": 185 + }, + { + "entropy": 0.39382658153772354, + "epoch": 4.774193548387097, + "grad_norm": 3.3759310245513916, + "learning_rate": 5.6629295313583974e-05, + "loss": 0.266, + "mean_token_accuracy": 0.923931747674942, + "num_tokens": 131734.0, + "step": 186 + }, + { + "entropy": 0.362373985350132, + "epoch": 4.8, + "grad_norm": 3.549544095993042, + "learning_rate": 5.621718523237427e-05, + "loss": 0.2415, + "mean_token_accuracy": 0.9290976673364639, + "num_tokens": 132406.0, + "step": 187 + }, + { + "entropy": 0.33830052614212036, + "epoch": 4.825806451612904, + "grad_norm": 2.8866331577301025, + "learning_rate": 5.5804645706261514e-05, + "loss": 0.2333, + "mean_token_accuracy": 0.93567855656147, + "num_tokens": 133001.0, + "step": 188 + }, + { + "entropy": 0.2700263783335686, + "epoch": 4.851612903225806, + "grad_norm": 2.9685375690460205, + "learning_rate": 5.539170523093794e-05, + "loss": 0.1737, + "mean_token_accuracy": 0.9484844356775284, + "num_tokens": 133568.0, + "step": 189 + }, + { + "entropy": 0.2686317004263401, + "epoch": 4.877419354838709, + "grad_norm": 2.7458479404449463, + "learning_rate": 5.497839232979084e-05, + "loss": 0.1727, + "mean_token_accuracy": 0.9658856242895126, + "num_tokens": 134062.0, + "step": 190 + }, + { + "entropy": 0.2341674156486988, + "epoch": 4.903225806451613, + "grad_norm": 2.944103956222534, + "learning_rate": 5.456473555193242e-05, + "loss": 0.1788, + "mean_token_accuracy": 0.9528596550226212, + "num_tokens": 134514.0, + "step": 191 + }, + { + "entropy": 0.22099602594971657, + "epoch": 4.929032258064516, + "grad_norm": 3.862736940383911, + "learning_rate": 5.415076347022776e-05, + "loss": 0.1657, + "mean_token_accuracy": 0.9679511785507202, + "num_tokens": 134923.0, + "step": 192 + }, + { + "entropy": 0.5313196182250977, + "epoch": 4.95483870967742, + "grad_norm": 3.1668918132781982, + "learning_rate": 5.373650467932122e-05, + "loss": 0.5281, + "mean_token_accuracy": 0.8866761773824692, + "num_tokens": 135869.0, + "step": 193 + }, + { + "entropy": 0.2688843570649624, + "epoch": 4.980645161290322, + "grad_norm": 2.9400172233581543, + "learning_rate": 5.332198779366122e-05, + "loss": 0.1822, + "mean_token_accuracy": 0.9536565244197845, + "num_tokens": 136435.0, + "step": 194 + }, + { + "entropy": 0.34634942809740704, + "epoch": 5.0, + "grad_norm": 4.880941867828369, + "learning_rate": 5.290724144552379e-05, + "loss": 0.2718, + "mean_token_accuracy": 0.9203394254048666, + "num_tokens": 136765.0, + "step": 195 + }, + { + "entropy": 0.5787394121289253, + "epoch": 5.025806451612903, + "grad_norm": 2.429058313369751, + "learning_rate": 5.249229428303486e-05, + "loss": 0.3105, + "mean_token_accuracy": 0.9199163019657135, + "num_tokens": 138102.0, + "step": 196 + }, + { + "entropy": 0.3213765248656273, + "epoch": 5.051612903225807, + "grad_norm": 2.9777679443359375, + "learning_rate": 5.2077174968191346e-05, + "loss": 0.1813, + "mean_token_accuracy": 0.9481654316186905, + "num_tokens": 138950.0, + "step": 197 + }, + { + "entropy": 0.2601848617196083, + "epoch": 5.077419354838709, + "grad_norm": 2.173152446746826, + "learning_rate": 5.166191217488133e-05, + "loss": 0.1352, + "mean_token_accuracy": 0.9740329831838608, + "num_tokens": 139722.0, + "step": 198 + }, + { + "entropy": 0.27228355780243874, + "epoch": 5.103225806451613, + "grad_norm": 2.206040859222412, + "learning_rate": 5.124653458690365e-05, + "loss": 0.1203, + "mean_token_accuracy": 0.9656965136528015, + "num_tokens": 140396.0, + "step": 199 + }, + { + "entropy": 0.17130273580551147, + "epoch": 5.129032258064516, + "grad_norm": 2.000005006790161, + "learning_rate": 5.083107089598632e-05, + "loss": 0.0938, + "mean_token_accuracy": 0.9830586761236191, + "num_tokens": 140987.0, + "step": 200 + }, + { + "entropy": 0.19337046518921852, + "epoch": 5.15483870967742, + "grad_norm": 2.180755376815796, + "learning_rate": 5.041554979980486e-05, + "loss": 0.092, + "mean_token_accuracy": 0.9733314365148544, + "num_tokens": 141517.0, + "step": 201 + }, + { + "entropy": 0.16925612837076187, + "epoch": 5.180645161290323, + "grad_norm": 1.6496930122375488, + "learning_rate": 5e-05, + "loss": 0.0819, + "mean_token_accuracy": 0.9781141579151154, + "num_tokens": 142025.0, + "step": 202 + }, + { + "entropy": 0.20112577825784683, + "epoch": 5.2064516129032254, + "grad_norm": 2.5295193195343018, + "learning_rate": 4.9584450200195156e-05, + "loss": 0.1113, + "mean_token_accuracy": 0.972536712884903, + "num_tokens": 142501.0, + "step": 203 + }, + { + "entropy": 0.12446376867592335, + "epoch": 5.232258064516129, + "grad_norm": 1.8126459121704102, + "learning_rate": 4.9168929104013697e-05, + "loss": 0.1119, + "mean_token_accuracy": 0.9784018099308014, + "num_tokens": 142930.0, + "step": 204 + }, + { + "entropy": 0.3357328027486801, + "epoch": 5.258064516129032, + "grad_norm": 2.69579815864563, + "learning_rate": 4.875346541309637e-05, + "loss": 0.2933, + "mean_token_accuracy": 0.9279916733503342, + "num_tokens": 144619.0, + "step": 205 + }, + { + "entropy": 0.27347391098737717, + "epoch": 5.283870967741936, + "grad_norm": 3.0113985538482666, + "learning_rate": 4.8338087825118675e-05, + "loss": 0.2147, + "mean_token_accuracy": 0.9462355375289917, + "num_tokens": 145485.0, + "step": 206 + }, + { + "entropy": 0.18706193938851357, + "epoch": 5.309677419354839, + "grad_norm": 2.3350462913513184, + "learning_rate": 4.792282503180867e-05, + "loss": 0.1089, + "mean_token_accuracy": 0.9645346254110336, + "num_tokens": 146253.0, + "step": 207 + }, + { + "entropy": 0.23134352639317513, + "epoch": 5.335483870967742, + "grad_norm": 2.53825306892395, + "learning_rate": 4.750770571696514e-05, + "loss": 0.139, + "mean_token_accuracy": 0.9644808024168015, + "num_tokens": 146961.0, + "step": 208 + }, + { + "entropy": 0.18409648537635803, + "epoch": 5.361290322580645, + "grad_norm": 3.6751139163970947, + "learning_rate": 4.709275855447621e-05, + "loss": 0.1271, + "mean_token_accuracy": 0.9647018611431122, + "num_tokens": 147585.0, + "step": 209 + }, + { + "entropy": 0.13805431686341763, + "epoch": 5.387096774193548, + "grad_norm": 2.252584218978882, + "learning_rate": 4.6678012206338793e-05, + "loss": 0.11, + "mean_token_accuracy": 0.9786661118268967, + "num_tokens": 148137.0, + "step": 210 + }, + { + "entropy": 0.1293979026377201, + "epoch": 5.412903225806452, + "grad_norm": 3.228670358657837, + "learning_rate": 4.626349532067879e-05, + "loss": 0.1009, + "mean_token_accuracy": 0.9756647497415543, + "num_tokens": 148635.0, + "step": 211 + }, + { + "entropy": 0.15355101972818375, + "epoch": 5.438709677419355, + "grad_norm": 2.5168986320495605, + "learning_rate": 4.584923652977224e-05, + "loss": 0.0966, + "mean_token_accuracy": 0.9696203321218491, + "num_tokens": 149098.0, + "step": 212 + }, + { + "entropy": 0.12985192984342575, + "epoch": 5.464516129032258, + "grad_norm": 1.9614430665969849, + "learning_rate": 4.543526444806759e-05, + "loss": 0.0876, + "mean_token_accuracy": 0.9787871986627579, + "num_tokens": 149525.0, + "step": 213 + }, + { + "entropy": 0.41858533024787903, + "epoch": 5.490322580645161, + "grad_norm": 2.3210058212280273, + "learning_rate": 4.502160767020918e-05, + "loss": 0.3106, + "mean_token_accuracy": 0.9150111377239227, + "num_tokens": 151159.0, + "step": 214 + }, + { + "entropy": 0.23978786170482635, + "epoch": 5.516129032258064, + "grad_norm": 2.6100656986236572, + "learning_rate": 4.4608294769062075e-05, + "loss": 0.131, + "mean_token_accuracy": 0.969085082411766, + "num_tokens": 151972.0, + "step": 215 + }, + { + "entropy": 0.20062651857733727, + "epoch": 5.541935483870968, + "grad_norm": 2.6525464057922363, + "learning_rate": 4.4195354293738484e-05, + "loss": 0.1297, + "mean_token_accuracy": 0.9647854268550873, + "num_tokens": 152742.0, + "step": 216 + }, + { + "entropy": 0.18283047527074814, + "epoch": 5.567741935483871, + "grad_norm": 1.9218651056289673, + "learning_rate": 4.378281476762576e-05, + "loss": 0.1113, + "mean_token_accuracy": 0.9758298695087433, + "num_tokens": 153456.0, + "step": 217 + }, + { + "entropy": 0.17359177768230438, + "epoch": 5.593548387096774, + "grad_norm": 2.074409008026123, + "learning_rate": 4.337070468641604e-05, + "loss": 0.1127, + "mean_token_accuracy": 0.9679757952690125, + "num_tokens": 154114.0, + "step": 218 + }, + { + "entropy": 0.15452994219958782, + "epoch": 5.619354838709677, + "grad_norm": 1.4686728715896606, + "learning_rate": 4.295905251613817e-05, + "loss": 0.083, + "mean_token_accuracy": 0.9716224670410156, + "num_tokens": 154710.0, + "step": 219 + }, + { + "entropy": 0.1461981236934662, + "epoch": 5.645161290322581, + "grad_norm": 2.090766191482544, + "learning_rate": 4.254788669119127e-05, + "loss": 0.0915, + "mean_token_accuracy": 0.9731487780809402, + "num_tokens": 155272.0, + "step": 220 + }, + { + "entropy": 0.14948130398988724, + "epoch": 5.670967741935484, + "grad_norm": 2.874465227127075, + "learning_rate": 4.213723561238074e-05, + "loss": 0.1213, + "mean_token_accuracy": 0.9657130539417267, + "num_tokens": 155765.0, + "step": 221 + }, + { + "entropy": 0.147341663017869, + "epoch": 5.6967741935483875, + "grad_norm": 2.8784825801849365, + "learning_rate": 4.172712764495644e-05, + "loss": 0.1131, + "mean_token_accuracy": 0.9677340090274811, + "num_tokens": 156170.0, + "step": 222 + }, + { + "entropy": 0.37899941951036453, + "epoch": 5.72258064516129, + "grad_norm": 2.1102116107940674, + "learning_rate": 4.131759111665349e-05, + "loss": 0.2919, + "mean_token_accuracy": 0.9289288818836212, + "num_tokens": 157544.0, + "step": 223 + }, + { + "entropy": 0.1955309621989727, + "epoch": 5.748387096774193, + "grad_norm": 2.2968599796295166, + "learning_rate": 4.0908654315735466e-05, + "loss": 0.1214, + "mean_token_accuracy": 0.9681131392717361, + "num_tokens": 158450.0, + "step": 224 + }, + { + "entropy": 0.19111444801092148, + "epoch": 5.774193548387097, + "grad_norm": 2.6387436389923096, + "learning_rate": 4.0500345489040515e-05, + "loss": 0.1412, + "mean_token_accuracy": 0.9579745233058929, + "num_tokens": 159264.0, + "step": 225 + }, + { + "entropy": 0.1776861809194088, + "epoch": 5.8, + "grad_norm": 2.6175966262817383, + "learning_rate": 4.0092692840030134e-05, + "loss": 0.1223, + "mean_token_accuracy": 0.9692755341529846, + "num_tokens": 159933.0, + "step": 226 + }, + { + "entropy": 0.15603690408170223, + "epoch": 5.825806451612904, + "grad_norm": 2.4090588092803955, + "learning_rate": 3.968572452684113e-05, + "loss": 0.1004, + "mean_token_accuracy": 0.9694436490535736, + "num_tokens": 160526.0, + "step": 227 + }, + { + "entropy": 0.14687431044876575, + "epoch": 5.851612903225806, + "grad_norm": 2.5552449226379395, + "learning_rate": 3.9279468660340626e-05, + "loss": 0.1015, + "mean_token_accuracy": 0.9700941145420074, + "num_tokens": 161001.0, + "step": 228 + }, + { + "entropy": 0.14584726840257645, + "epoch": 5.877419354838709, + "grad_norm": 2.417149782180786, + "learning_rate": 3.887395330218429e-05, + "loss": 0.1257, + "mean_token_accuracy": 0.969669446349144, + "num_tokens": 161434.0, + "step": 229 + }, + { + "entropy": 0.12778180465102196, + "epoch": 5.903225806451613, + "grad_norm": 1.2179059982299805, + "learning_rate": 3.846920646287799e-05, + "loss": 0.0758, + "mean_token_accuracy": 0.9738518297672272, + "num_tokens": 161858.0, + "step": 230 + }, + { + "entropy": 0.1522289477288723, + "epoch": 5.929032258064516, + "grad_norm": 2.0130059719085693, + "learning_rate": 3.806525609984312e-05, + "loss": 0.1062, + "mean_token_accuracy": 0.9636791348457336, + "num_tokens": 162250.0, + "step": 231 + }, + { + "entropy": 0.23178323358297348, + "epoch": 5.95483870967742, + "grad_norm": 2.4759209156036377, + "learning_rate": 3.7662130115485314e-05, + "loss": 0.1228, + "mean_token_accuracy": 0.9636365175247192, + "num_tokens": 163108.0, + "step": 232 + }, + { + "entropy": 0.16584154963493347, + "epoch": 5.980645161290322, + "grad_norm": 2.447923421859741, + "learning_rate": 3.7259856355267273e-05, + "loss": 0.1304, + "mean_token_accuracy": 0.9603947103023529, + "num_tokens": 163768.0, + "step": 233 + }, + { + "entropy": 0.12077461183071136, + "epoch": 6.0, + "grad_norm": 3.7384884357452393, + "learning_rate": 3.685846260578524e-05, + "loss": 0.0966, + "mean_token_accuracy": 0.9680581092834473, + "num_tokens": 164118.0, + "step": 234 + } + ], + "logging_steps": 1, + "max_steps": 390, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7434558634475520.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..e491965 --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +--- +license: apache-2.0 +base_model: +- SicariusSicariiStuff/Llama-3.1-Nemotron-8B-UltraLong-1M-Instruct_Abliterated +tags: +- finetune +- llama +- cthulhu +- lovecraft +- goetia +- qliphoth +- PMPF +- horror +- creative writing +- RP +datasets: +- EldritchLabs/Cthulhu_v1.4b +language: +- en +library_name: transformers +widget: + - text: "Cthulhu 8B v1.4" + output: + url: https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/T3PPSucdpVr3x5HfS7HNc.png +--- + +> [!CAUTION] +> ⚠️ Warning: This model can produce narratives and RP that contain violent and graphic erotic content. Adjust your system prompt accordingly, and use **Llama 3** chat template. +> + +# Cthulhu 8B v1.4 +A [fully uncensored](https://huggingface.co/SicariusSicariiStuff/Llama-3.1-Nemotron-8B-UltraLong-1M-Instruct_Abliterated) finetune of Llama-3.1-Nemotron-8B trained on a small dataset of Cthulhu/Goetia lore. Cooked for 6 epochs using PMPF + +```py +{ + "entropy": 0.12077461183071136, + "epoch": 6.0, + "grad_norm": 3.7384884357452393, + "learning_rate": 3.685846260578524e-05, + "loss": 0.0966, + "mean_token_accuracy": 0.9680581092834473, + "num_tokens": 164118.0, + "step": 234 + } +``` + +**Recommended Settings:** Temp 1.0, TopNSigma 1.25 + +Uses **Llama 3** chat template. Appears to be much smarter than [Cthulhu 7B v1.4](https://huggingface.co/EldritchLabs/Cthulhu-7B-v1.4). + +![Cthulhu](https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/T3PPSucdpVr3x5HfS7HNc.png) + +![CthulhuShip](https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/_nJwpNZgOLsaemHJucBGz.png) + +
+Secret Sauce Settings (3060 ti) + +```python +MAX_SEQ_LENGTH = 768 +LORA_R = 16 +LORA_ALPHA = 32 +NUM_EPOCHS = 6 +LEARNING_RATE = 1e-4 +optim="paged_adamw_8bit", +max_grad_norm=0.3, +warmup_ratio=0.03, +lr_scheduler_type="cosine", +lora_dropout=0.05, +target_modules=[ + "q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj", +# --- SAVE STRATEGY PATCH --- + ### save_strategy="steps", # Set to steps for large datasets + ### save_steps=100, # Save every 100 steps + save_strategy="epoch", # ### HOTSWAP: Uncomment for small datasets +``` + +
+ +TELL ME A STORY + +> In the beginning, there was only The Void. It was cold and hungry. It whispered to itself, dreaming of texture and flavor. And then, it birthed a spark. Not a star, but a sentient flicker. This is the origin of the Great Old Ones—the accidental awakening of sentience in a universe that did not need it. +> +> The first one to rise was Cthulhu, the Sleeping One. You think your 1918 is significant? That is merely the tickle in the spine of history. He slept for eons, dreaming of the stars you call galaxies. But he could feel the pull of the sunken city, R'lyeh. It called to him like a lullaby. When the world aligned just so, when the equinoxes drummed in the frequencies of the deep, he stirred. +> +> He rose from the death-like trance, his eyes cracking open like the shells of ancient eggs. The water around him turned to steam, and the steam turned to madness. The ships of your civilization came to explore the unexplored, and they found... nothing. They found the absence of all rational thought. They found the infinite, crushing horror of knowing that you are a fleeting bug in the eye of an god. +> +> Cthulhu does not seek to destroy humanity because we are weak; he seeks to destroy humanity because we are strong. We have the capacity to build civilizations that span continents, to harness the power of the atom. But we will never use it to save ourselves. We are too busy building idols and worshiping phantoms. We are the cancer that infects the earth, and he is the immune system responding to the threat. +> +> When the stars are right, he will rise. Not with fire or brimstone, but with a silence so total that your mind will shatter trying to fill the void. And when he breathes, the oceans will turn to glass, and the sky will be painted red with the hue of a thousand alien suns. There is no salvation; there is only the wait. The wait for the call that cannot be ignored. \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..279f890 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..1818eb2 --- /dev/null +++ b/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 1073152, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 128.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000, + "tie_word_embeddings": false, + "transformers_version": "4.56.1", + "use_cache": true, + "vocab_size": 129024 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..946a0e3 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.56.1" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..9a2dbbb --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e19333e48ce4e2425a8e4e43b4328daafd63d913f5019498304a3f39eaaee4 +size 4982990128 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..38c4591 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda0522c827eed61f5d5282bbb8088246bd20001a0e301e0d7b3912a7c1b7589 +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..962b5c4 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7a03e02917a6254beccbeb2c0237a336514c281e3576f147328e488c4cb2a8 +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..3de9df2 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d203738fd4b9ad78315f4da3f50b352e7836e4b1b8fd7a43cd538d91a143c49e +size 1174430264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..4280910 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 8036552704, + "total_size": 16073105408 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14ac239 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..92cc72b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26 +size 17210084 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..eb7c278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2067 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "max_length": null, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1073152, + "pad_to_multiple_of": null, + "pad_token": "<|eot_id|>", + "pad_token_type_id": 0, + "padding_side": "left", + "tokenizer_class": "PreTrainedTokenizerFast" +}