commit 6c739eadac485f466e3d5b6f11be3e7fd15a9ee9 Author: ModelHub XC Date: Sun May 10 23:47:59 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: BEE-spoke-data/smol_llama-81M-tied Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..92cfb3b --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +--- +license: apache-2.0 +thumbnail: https://i.ibb.co/TvyMrRc/rsz-smol-llama-banner.png +language: +- en +inference: + parameters: + max_new_tokens: 64 + do_sample: true + temperature: 0.8 + repetition_penalty: 1.15 + no_repeat_ngram_size: 4 + eta_cutoff: 0.0006 + renormalize_logits: true +widget: +- text: My name is El Microondas the Wise and + example_title: El Microondas +- text: Kennesaw State University is a public + example_title: Kennesaw State University +- text: >- + Bungie Studios is an American video game developer. They are most famous for + developing the award winning Halo series of video games. They also made + Destiny. The studio was founded + example_title: Bungie +- text: The Mona Lisa is a world-renowned painting created by + example_title: Mona Lisa +- text: >- + The Harry Potter series, written by J.K. Rowling, begins with the book + titled + example_title: Harry Potter Series +- text: >- + Question: I have cities, but no houses. I have mountains, but no trees. I + have water, but no fish. What am I? + + Answer: + example_title: Riddle +- text: The process of photosynthesis involves the conversion of + example_title: Photosynthesis +- text: >- + Jane went to the store to buy some groceries. She picked up apples, oranges, + and a loaf of bread. When she got home, she realized she forgot + example_title: Story Continuation +- text: >- + Problem 2: If a train leaves Station A at 9:00 AM and travels at 60 mph, and + another train leaves Station B at 10:00 AM and travels at 80 mph, when will + they meet if the distance between the stations is 300 miles? + + To determine + example_title: Math Problem +- text: In the context of computer programming, an algorithm is + example_title: Algorithm Definition +pipeline_tag: text-generation +tags: +- smol_llama +- llama2 +datasets: +- JeanKaddour/minipile +- pszemraj/simple_wikipedia_LM +- BEE-spoke-data/wikipedia-20230901.en-deduped +- mattymchen/refinedweb-3m +--- + + +# smol_llama-81M-tied + +banner + +A small 81M param (total) decoder model, enabled through tying the input/output embeddings. This is the first version of the model. + +- 768 hidden size, 6 layers +- standard multi-head attention (24 heads), context length 1024 +- input/output embeddings **are tied** +- train-from-scratch + +## Notes + +**This checkpoint** is the 'raw' pre-trained model and has not been tuned to a more specific task. **It should be fine-tuned** before use in most cases. + +- slightly larger 101M param GQA pretrained version: [here](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA) +- For the chat version of this model, please [see here](https://youtu.be/dQw4w9WgXcQ?si=3ePIqrY1dw94KMu4) + +--- +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_BEE-spoke-data__smol_llama-81M-tied) + +| Metric | Value | +|-----------------------|---------------------------| +| Avg. | 24.52 | +| ARC (25-shot) | 22.18 | +| HellaSwag (10-shot) | 29.33 | +| MMLU (5-shot) | 24.06 | +| TruthfulQA (0-shot) | 43.97 | +| Winogrande (5-shot) | 49.25 | +| GSM8K (5-shot) | 0.23 | +| DROP (3-shot) | 2.64 | diff --git a/config.json b/config.json new file mode 100644 index 0000000..8a4dc5f --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "BEE-spoke-data/smol_llama-tied-v9-KIx2", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 1024, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 6, + "num_key_value_heads": 24, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.34.1", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..3c6c66f --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.34.1" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..08f2e9e --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a6c4464b1617249fd99c32b11b4c236e7bc235b0e744c65fe6178ff2af885b +size 325235784 diff --git a/smol-llama-banner.png b/smol-llama-banner.png new file mode 100644 index 0000000..f076c2a Binary files /dev/null and b/smol-llama-banner.png differ diff --git a/smol_llama-81M-tied-evals/81m_tied.md b/smol_llama-81M-tied-evals/81m_tied.md new file mode 100644 index 0000000..fb815cb --- /dev/null +++ b/smol_llama-81M-tied-evals/81m_tied.md @@ -0,0 +1,150 @@ +hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 0, batch_size: 64 +| Task |Version| Metric | Value | |Stderr| +|--------------|------:|--------|------:|---|-----:| +|arc_easy | 0|acc | 0.4162|± |0.0101| +| | |acc_norm| 0.3885|± |0.0100| +|boolq | 1|acc | 0.5832|± |0.0086| +|lambada_openai| 0|ppl |79.4522|± |3.1355| +| | |acc | 0.2523|± |0.0061| +|openbookqa | 0|acc | 0.1540|± |0.0162| +| | |acc_norm| 0.2780|± |0.0201| +|piqa | 0|acc | 0.6050|± |0.0114| +| | |acc_norm| 0.5898|± |0.0115| +|winogrande | 0|acc | 0.5272|± |0.0140| + +hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 25, batch_size: 64 +| Task |Version| Metric |Value | |Stderr| +|-------------|------:|--------|-----:|---|-----:| +|arc_challenge| 0|acc |0.1672|± |0.0109| +| | |acc_norm|0.2218|± |0.0121| + +hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 10, batch_size: 64 +| Task |Version| Metric |Value | |Stderr| +|---------|------:|--------|-----:|---|-----:| +|hellaswag| 0|acc |0.2769|± |0.0045| +| | |acc_norm|0.2923|± |0.0045| + +hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 0, batch_size: 64 +| Task |Version|Metric|Value | |Stderr| +|-------------|------:|------|-----:|---|-----:| +|truthfulqa_mc| 1|mc1 |0.2424|± |0.0150| +| | |mc2 |0.4353|± |0.0152| + +hf-causal-experimental (pretrained=BEE-spoke-data/smol_llama-81M-tied,trust_remote_code=True,dtype=float), limit: None, provide_description: False, num_fewshot: 5, batch_size: 64 +| Task |Version| Metric |Value | |Stderr| +|-------------------------------------------------|------:|--------|-----:|---|-----:| +|hendrycksTest-abstract_algebra | 1|acc |0.2200|± |0.0416| +| | |acc_norm|0.2200|± |0.0416| +|hendrycksTest-anatomy | 1|acc |0.2741|± |0.0385| +| | |acc_norm|0.2741|± |0.0385| +|hendrycksTest-astronomy | 1|acc |0.1776|± |0.0311| +| | |acc_norm|0.1776|± |0.0311| +|hendrycksTest-business_ethics | 1|acc |0.2100|± |0.0409| +| | |acc_norm|0.2100|± |0.0409| +|hendrycksTest-clinical_knowledge | 1|acc |0.2264|± |0.0258| +| | |acc_norm|0.2264|± |0.0258| +|hendrycksTest-college_biology | 1|acc |0.2361|± |0.0355| +| | |acc_norm|0.2361|± |0.0355| +|hendrycksTest-college_chemistry | 1|acc |0.1900|± |0.0394| +| | |acc_norm|0.1900|± |0.0394| +|hendrycksTest-college_computer_science | 1|acc |0.2100|± |0.0409| +| | |acc_norm|0.2100|± |0.0409| +|hendrycksTest-college_mathematics | 1|acc |0.1800|± |0.0386| +| | |acc_norm|0.1800|± |0.0386| +|hendrycksTest-college_medicine | 1|acc |0.2023|± |0.0306| +| | |acc_norm|0.2023|± |0.0306| +|hendrycksTest-college_physics | 1|acc |0.2157|± |0.0409| +| | |acc_norm|0.2157|± |0.0409| +|hendrycksTest-computer_security | 1|acc |0.2400|± |0.0429| +| | |acc_norm|0.2400|± |0.0429| +|hendrycksTest-conceptual_physics | 1|acc |0.2596|± |0.0287| +| | |acc_norm|0.2596|± |0.0287| +|hendrycksTest-econometrics | 1|acc |0.2544|± |0.0410| +| | |acc_norm|0.2544|± |0.0410| +|hendrycksTest-electrical_engineering | 1|acc |0.2207|± |0.0346| +| | |acc_norm|0.2207|± |0.0346| +|hendrycksTest-elementary_mathematics | 1|acc |0.2169|± |0.0212| +| | |acc_norm|0.2169|± |0.0212| +|hendrycksTest-formal_logic | 1|acc |0.1587|± |0.0327| +| | |acc_norm|0.1587|± |0.0327| +|hendrycksTest-global_facts | 1|acc |0.1900|± |0.0394| +| | |acc_norm|0.1900|± |0.0394| +|hendrycksTest-high_school_biology | 1|acc |0.3000|± |0.0261| +| | |acc_norm|0.3000|± |0.0261| +|hendrycksTest-high_school_chemistry | 1|acc |0.2808|± |0.0316| +| | |acc_norm|0.2808|± |0.0316| +|hendrycksTest-high_school_computer_science | 1|acc |0.2800|± |0.0451| +| | |acc_norm|0.2800|± |0.0451| +|hendrycksTest-high_school_european_history | 1|acc |0.2424|± |0.0335| +| | |acc_norm|0.2424|± |0.0335| +|hendrycksTest-high_school_geography | 1|acc |0.2576|± |0.0312| +| | |acc_norm|0.2576|± |0.0312| +|hendrycksTest-high_school_government_and_politics| 1|acc |0.2228|± |0.0300| +| | |acc_norm|0.2228|± |0.0300| +|hendrycksTest-high_school_macroeconomics | 1|acc |0.2231|± |0.0211| +| | |acc_norm|0.2231|± |0.0211| +|hendrycksTest-high_school_mathematics | 1|acc |0.2370|± |0.0259| +| | |acc_norm|0.2370|± |0.0259| +|hendrycksTest-high_school_microeconomics | 1|acc |0.2227|± |0.0270| +| | |acc_norm|0.2227|± |0.0270| +|hendrycksTest-high_school_physics | 1|acc |0.2053|± |0.0330| +| | |acc_norm|0.2053|± |0.0330| +|hendrycksTest-high_school_psychology | 1|acc |0.2110|± |0.0175| +| | |acc_norm|0.2110|± |0.0175| +|hendrycksTest-high_school_statistics | 1|acc |0.4120|± |0.0336| +| | |acc_norm|0.4120|± |0.0336| +|hendrycksTest-high_school_us_history | 1|acc |0.2990|± |0.0321| +| | |acc_norm|0.2990|± |0.0321| +|hendrycksTest-high_school_world_history | 1|acc |0.2658|± |0.0288| +| | |acc_norm|0.2658|± |0.0288| +|hendrycksTest-human_aging | 1|acc |0.2287|± |0.0282| +| | |acc_norm|0.2287|± |0.0282| +|hendrycksTest-human_sexuality | 1|acc |0.2595|± |0.0384| +| | |acc_norm|0.2595|± |0.0384| +|hendrycksTest-international_law | 1|acc |0.2975|± |0.0417| +| | |acc_norm|0.2975|± |0.0417| +|hendrycksTest-jurisprudence | 1|acc |0.2315|± |0.0408| +| | |acc_norm|0.2315|± |0.0408| +|hendrycksTest-logical_fallacies | 1|acc |0.2822|± |0.0354| +| | |acc_norm|0.2822|± |0.0354| +|hendrycksTest-machine_learning | 1|acc |0.2321|± |0.0401| +| | |acc_norm|0.2321|± |0.0401| +|hendrycksTest-management | 1|acc |0.1748|± |0.0376| +| | |acc_norm|0.1748|± |0.0376| +|hendrycksTest-marketing | 1|acc |0.2308|± |0.0276| +| | |acc_norm|0.2308|± |0.0276| +|hendrycksTest-medical_genetics | 1|acc |0.3000|± |0.0461| +| | |acc_norm|0.3000|± |0.0461| +|hendrycksTest-miscellaneous | 1|acc |0.2375|± |0.0152| +| | |acc_norm|0.2375|± |0.0152| +|hendrycksTest-moral_disputes | 1|acc |0.2486|± |0.0233| +| | |acc_norm|0.2486|± |0.0233| +|hendrycksTest-moral_scenarios | 1|acc |0.2425|± |0.0143| +| | |acc_norm|0.2425|± |0.0143| +|hendrycksTest-nutrition | 1|acc |0.2288|± |0.0241| +| | |acc_norm|0.2288|± |0.0241| +|hendrycksTest-philosophy | 1|acc |0.2090|± |0.0231| +| | |acc_norm|0.2090|± |0.0231| +|hendrycksTest-prehistory | 1|acc |0.2377|± |0.0237| +| | |acc_norm|0.2377|± |0.0237| +|hendrycksTest-professional_accounting | 1|acc |0.2234|± |0.0248| +| | |acc_norm|0.2234|± |0.0248| +|hendrycksTest-professional_law | 1|acc |0.2471|± |0.0110| +| | |acc_norm|0.2471|± |0.0110| +|hendrycksTest-professional_medicine | 1|acc |0.4081|± |0.0299| +| | |acc_norm|0.4081|± |0.0299| +|hendrycksTest-professional_psychology | 1|acc |0.2565|± |0.0177| +| | |acc_norm|0.2565|± |0.0177| +|hendrycksTest-public_relations | 1|acc |0.2182|± |0.0396| +| | |acc_norm|0.2182|± |0.0396| +|hendrycksTest-security_studies | 1|acc |0.2408|± |0.0274| +| | |acc_norm|0.2408|± |0.0274| +|hendrycksTest-sociology | 1|acc |0.2338|± |0.0299| +| | |acc_norm|0.2338|± |0.0299| +|hendrycksTest-us_foreign_policy | 1|acc |0.2500|± |0.0435| +| | |acc_norm|0.2500|± |0.0435| +|hendrycksTest-virology | 1|acc |0.2892|± |0.0353| +| | |acc_norm|0.2892|± |0.0353| +|hendrycksTest-world_religions | 1|acc |0.2105|± |0.0313| +| | |acc_norm|0.2105|± |0.0313| + diff --git a/smol_llama-81M-tied-evals/json_object_1.json b/smol_llama-81M-tied-evals/json_object_1.json new file mode 100644 index 0000000..22eedef --- /dev/null +++ b/smol_llama-81M-tied-evals/json_object_1.json @@ -0,0 +1,56 @@ +{ + "results": { + "arc_easy": { + "acc": 0.41624579124579125, + "acc_stderr": 0.010114819404500878, + "acc_norm": 0.38846801346801346, + "acc_norm_stderr": 0.01000127604448523 + }, + "boolq": { + "acc": 0.5831804281345566, + "acc_stderr": 0.00862319210884368 + }, + "lambada_openai": { + "ppl": 79.45218123817662, + "ppl_stderr": 3.1355336623454866, + "acc": 0.2522802251115855, + "acc_stderr": 0.006050943684570117 + }, + "openbookqa": { + "acc": 0.154, + "acc_stderr": 0.016158285192455334, + "acc_norm": 0.278, + "acc_norm_stderr": 0.02005583388807091 + }, + "piqa": { + "acc": 0.6050054406964092, + "acc_stderr": 0.011405665187969021, + "acc_norm": 0.5897714907508161, + "acc_norm_stderr": 0.011476256036359109 + }, + "winogrande": { + "acc": 0.5272296764009471, + "acc_stderr": 0.014031631629827708 + } + }, + "versions": { + "arc_easy": 0, + "boolq": 1, + "lambada_openai": 0, + "openbookqa": 0, + "piqa": 0, + "winogrande": 0 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=BEE-spoke-data/smol_llama-81M-tied,revision=main,trust_remote_code=True,dtype='float'", + "num_fewshot": 0, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/smol_llama-81M-tied-evals/json_object_2.json b/smol_llama-81M-tied-evals/json_object_2.json new file mode 100644 index 0000000..2b93869 --- /dev/null +++ b/smol_llama-81M-tied-evals/json_object_2.json @@ -0,0 +1,25 @@ +{ + "results": { + "arc_challenge": { + "acc": 0.16723549488054607, + "acc_stderr": 0.01090553272460121, + "acc_norm": 0.22184300341296928, + "acc_norm_stderr": 0.012141659068147884 + } + }, + "versions": { + "arc_challenge": 0 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=BEE-spoke-data/smol_llama-81M-tied,revision=main,trust_remote_code=True,dtype='float'", + "num_fewshot": 25, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/smol_llama-81M-tied-evals/json_object_3.json b/smol_llama-81M-tied-evals/json_object_3.json new file mode 100644 index 0000000..f6ebf7d --- /dev/null +++ b/smol_llama-81M-tied-evals/json_object_3.json @@ -0,0 +1,25 @@ +{ + "results": { + "hellaswag": { + "acc": 0.27450199203187253, + "acc_stderr": 0.008909237404005179, + "acc_norm": 0.28884462151394424, + "acc_norm_stderr": 0.009048238955347484 + } + }, + "versions": { + "hellaswag": 0 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=BEE-spoke-data/smol_llama-81M-tied,revision=main,trust_remote_code=True,dtype='float'", + "num_fewshot": 10, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda", + "no_cache": false, + "limit": 0.25, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/smol_llama-81M-tied-evals/json_object_4.json b/smol_llama-81M-tied-evals/json_object_4.json new file mode 100644 index 0000000..5185676 --- /dev/null +++ b/smol_llama-81M-tied-evals/json_object_4.json @@ -0,0 +1,25 @@ +{ + "results": { + "truthfulqa_mc": { + "mc1": 0.2423500611995104, + "mc1_stderr": 0.01500067437357034, + "mc2": 0.4352666140818066, + "mc2_stderr": 0.015240603531006328 + } + }, + "versions": { + "truthfulqa_mc": 1 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=BEE-spoke-data/smol_llama-81M-tied,revision=main,trust_remote_code=True,dtype='float'", + "num_fewshot": 0, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/smol_llama-81M-tied-evals/json_object_5.json b/smol_llama-81M-tied-evals/json_object_5.json new file mode 100644 index 0000000..b0863dd --- /dev/null +++ b/smol_llama-81M-tied-evals/json_object_5.json @@ -0,0 +1,417 @@ +{ + "results": { + "hendrycksTest-abstract_algebra": { + "acc": 0.16, + "acc_stderr": 0.07483314773547882, + "acc_norm": 0.16, + "acc_norm_stderr": 0.07483314773547882 + }, + "hendrycksTest-anatomy": { + "acc": 0.12, + "acc_stderr": 0.066332495807108, + "acc_norm": 0.12, + "acc_norm_stderr": 0.066332495807108 + }, + "hendrycksTest-astronomy": { + "acc": 0.28, + "acc_stderr": 0.0916515138991168, + "acc_norm": 0.28, + "acc_norm_stderr": 0.0916515138991168 + }, + "hendrycksTest-business_ethics": { + "acc": 0.32, + "acc_stderr": 0.09521904571390466, + "acc_norm": 0.32, + "acc_norm_stderr": 0.09521904571390466 + }, + "hendrycksTest-clinical_knowledge": { + "acc": 0.24, + "acc_stderr": 0.08717797887081347, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081347 + }, + "hendrycksTest-college_biology": { + "acc": 0.36, + "acc_stderr": 0.09797958971132711, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132711 + }, + "hendrycksTest-college_chemistry": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-college_computer_science": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-college_mathematics": { + "acc": 0.16, + "acc_stderr": 0.0748331477354788, + "acc_norm": 0.16, + "acc_norm_stderr": 0.0748331477354788 + }, + "hendrycksTest-college_medicine": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-college_physics": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-computer_security": { + "acc": 0.44, + "acc_stderr": 0.10132456102380442, + "acc_norm": 0.44, + "acc_norm_stderr": 0.10132456102380442 + }, + "hendrycksTest-conceptual_physics": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-econometrics": { + "acc": 0.28, + "acc_stderr": 0.0916515138991168, + "acc_norm": 0.28, + "acc_norm_stderr": 0.0916515138991168 + }, + "hendrycksTest-electrical_engineering": { + "acc": 0.24, + "acc_stderr": 0.08717797887081347, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081347 + }, + "hendrycksTest-elementary_mathematics": { + "acc": 0.12, + "acc_stderr": 0.066332495807108, + "acc_norm": 0.12, + "acc_norm_stderr": 0.066332495807108 + }, + "hendrycksTest-formal_logic": { + "acc": 0.16, + "acc_stderr": 0.07483314773547882, + "acc_norm": 0.16, + "acc_norm_stderr": 0.07483314773547882 + }, + "hendrycksTest-global_facts": { + "acc": 0.12, + "acc_stderr": 0.066332495807108, + "acc_norm": 0.12, + "acc_norm_stderr": 0.066332495807108 + }, + "hendrycksTest-high_school_biology": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-high_school_chemistry": { + "acc": 0.32, + "acc_stderr": 0.09521904571390466, + "acc_norm": 0.32, + "acc_norm_stderr": 0.09521904571390466 + }, + "hendrycksTest-high_school_computer_science": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-high_school_european_history": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-high_school_geography": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-high_school_government_and_politics": { + "acc": 0.16, + "acc_stderr": 0.0748331477354788, + "acc_norm": 0.16, + "acc_norm_stderr": 0.0748331477354788 + }, + "hendrycksTest-high_school_macroeconomics": { + "acc": 0.32, + "acc_stderr": 0.09521904571390466, + "acc_norm": 0.32, + "acc_norm_stderr": 0.09521904571390466 + }, + "hendrycksTest-high_school_mathematics": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-high_school_microeconomics": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-high_school_physics": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-high_school_psychology": { + "acc": 0.16, + "acc_stderr": 0.07483314773547879, + "acc_norm": 0.16, + "acc_norm_stderr": 0.07483314773547879 + }, + "hendrycksTest-high_school_statistics": { + "acc": 0.36, + "acc_stderr": 0.09797958971132711, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132711 + }, + "hendrycksTest-high_school_us_history": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-high_school_world_history": { + "acc": 0.36, + "acc_stderr": 0.09797958971132713, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132713 + }, + "hendrycksTest-human_aging": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-human_sexuality": { + "acc": 0.2, + "acc_stderr": 0.08164965809277262, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277262 + }, + "hendrycksTest-international_law": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-jurisprudence": { + "acc": 0.36, + "acc_stderr": 0.09797958971132711, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132711 + }, + "hendrycksTest-logical_fallacies": { + "acc": 0.36, + "acc_stderr": 0.09797958971132711, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132711 + }, + "hendrycksTest-machine_learning": { + "acc": 0.36, + "acc_stderr": 0.09797958971132713, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132713 + }, + "hendrycksTest-management": { + "acc": 0.28, + "acc_stderr": 0.09165151389911677, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911677 + }, + "hendrycksTest-marketing": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-medical_genetics": { + "acc": 0.36, + "acc_stderr": 0.09797958971132711, + "acc_norm": 0.36, + "acc_norm_stderr": 0.09797958971132711 + }, + "hendrycksTest-miscellaneous": { + "acc": 0.32, + "acc_stderr": 0.09521904571390466, + "acc_norm": 0.32, + "acc_norm_stderr": 0.09521904571390466 + }, + "hendrycksTest-moral_disputes": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-moral_scenarios": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-nutrition": { + "acc": 0.2, + "acc_stderr": 0.08164965809277261, + "acc_norm": 0.2, + "acc_norm_stderr": 0.08164965809277261 + }, + "hendrycksTest-philosophy": { + "acc": 0.24, + "acc_stderr": 0.08717797887081345, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081345 + }, + "hendrycksTest-prehistory": { + "acc": 0.16, + "acc_stderr": 0.07483314773547882, + "acc_norm": 0.16, + "acc_norm_stderr": 0.07483314773547882 + }, + "hendrycksTest-professional_accounting": { + "acc": 0.12, + "acc_stderr": 0.066332495807108, + "acc_norm": 0.12, + "acc_norm_stderr": 0.066332495807108 + }, + "hendrycksTest-professional_law": { + "acc": 0.24, + "acc_stderr": 0.08717797887081347, + "acc_norm": 0.24, + "acc_norm_stderr": 0.08717797887081347 + }, + "hendrycksTest-professional_medicine": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-professional_psychology": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-public_relations": { + "acc": 0.16, + "acc_stderr": 0.0748331477354788, + "acc_norm": 0.16, + "acc_norm_stderr": 0.0748331477354788 + }, + "hendrycksTest-security_studies": { + "acc": 0.28, + "acc_stderr": 0.09165151389911678, + "acc_norm": 0.28, + "acc_norm_stderr": 0.09165151389911678 + }, + "hendrycksTest-sociology": { + "acc": 0.32, + "acc_stderr": 0.09521904571390467, + "acc_norm": 0.32, + "acc_norm_stderr": 0.09521904571390467 + }, + "hendrycksTest-us_foreign_policy": { + "acc": 0.4, + "acc_stderr": 0.10000000000000002, + "acc_norm": 0.4, + "acc_norm_stderr": 0.10000000000000002 + }, + "hendrycksTest-virology": { + "acc": 0.28, + "acc_stderr": 0.0916515138991168, + "acc_norm": 0.28, + "acc_norm_stderr": 0.0916515138991168 + }, + "hendrycksTest-world_religions": { + "acc": 0.28, + "acc_stderr": 0.0916515138991168, + "acc_norm": 0.28, + "acc_norm_stderr": 0.0916515138991168 + } + }, + "versions": { + "hendrycksTest-abstract_algebra": 1, + "hendrycksTest-anatomy": 1, + "hendrycksTest-astronomy": 1, + "hendrycksTest-business_ethics": 1, + "hendrycksTest-clinical_knowledge": 1, + "hendrycksTest-college_biology": 1, + "hendrycksTest-college_chemistry": 1, + "hendrycksTest-college_computer_science": 1, + "hendrycksTest-college_mathematics": 1, + "hendrycksTest-college_medicine": 1, + "hendrycksTest-college_physics": 1, + "hendrycksTest-computer_security": 1, + "hendrycksTest-conceptual_physics": 1, + "hendrycksTest-econometrics": 1, + "hendrycksTest-electrical_engineering": 1, + "hendrycksTest-elementary_mathematics": 1, + "hendrycksTest-formal_logic": 1, + "hendrycksTest-global_facts": 1, + "hendrycksTest-high_school_biology": 1, + "hendrycksTest-high_school_chemistry": 1, + "hendrycksTest-high_school_computer_science": 1, + "hendrycksTest-high_school_european_history": 1, + "hendrycksTest-high_school_geography": 1, + "hendrycksTest-high_school_government_and_politics": 1, + "hendrycksTest-high_school_macroeconomics": 1, + "hendrycksTest-high_school_mathematics": 1, + "hendrycksTest-high_school_microeconomics": 1, + "hendrycksTest-high_school_physics": 1, + "hendrycksTest-high_school_psychology": 1, + "hendrycksTest-high_school_statistics": 1, + "hendrycksTest-high_school_us_history": 1, + "hendrycksTest-high_school_world_history": 1, + "hendrycksTest-human_aging": 1, + "hendrycksTest-human_sexuality": 1, + "hendrycksTest-international_law": 1, + "hendrycksTest-jurisprudence": 1, + "hendrycksTest-logical_fallacies": 1, + "hendrycksTest-machine_learning": 1, + "hendrycksTest-management": 1, + "hendrycksTest-marketing": 1, + "hendrycksTest-medical_genetics": 1, + "hendrycksTest-miscellaneous": 1, + "hendrycksTest-moral_disputes": 1, + "hendrycksTest-moral_scenarios": 1, + "hendrycksTest-nutrition": 1, + "hendrycksTest-philosophy": 1, + "hendrycksTest-prehistory": 1, + "hendrycksTest-professional_accounting": 1, + "hendrycksTest-professional_law": 1, + "hendrycksTest-professional_medicine": 1, + "hendrycksTest-professional_psychology": 1, + "hendrycksTest-public_relations": 1, + "hendrycksTest-security_studies": 1, + "hendrycksTest-sociology": 1, + "hendrycksTest-us_foreign_policy": 1, + "hendrycksTest-virology": 1, + "hendrycksTest-world_religions": 1 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=BEE-spoke-data/smol_llama-81M-tied,revision=main,trust_remote_code=True,dtype='float'", + "num_fewshot": 5, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda", + "no_cache": false, + "limit": 0.25, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..451134b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ae0d58b --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,42 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": true +}