commit eb000cd44c5e2800c4065342673cc817d00dcd48 Author: ModelHub XC Date: Sun May 17 08:34:18 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: afrideva/verysmol_llama-v11-KIx2-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9bee17b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.fp16.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q2_k.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q6_k.gguf filter=lfs diff=lfs merge=lfs -text +verysmol_llama-v11-kix2.q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3737781 --- /dev/null +++ b/README.md @@ -0,0 +1,166 @@ +--- +base_model: BEE-spoke-data/verysmol_llama-v11-KIx2 +datasets: +- BEE-spoke-data/knowledge-inoc-concat-v1 +inference: false +license: apache-2.0 +metrics: +- accuracy +model_creator: BEE-spoke-data +model_name: verysmol_llama-v11-KIx2 +pipeline_tag: text-generation +quantized_by: afrideva +tags: +- generated_from_trainer +- gguf +- ggml +- quantized +- q2_k +- q3_k_m +- q4_k_m +- q5_k_m +- q6_k +- q8_0 +widget: +- example_title: El Microondas + text: My name is El Microondas the Wise and +- example_title: Kennesaw State University + text: Kennesaw State University is a public +- example_title: Bungie + text: Bungie Studios is an American video game developer. They are most famous for + developing the award winning Halo series of video games. They also made Destiny. + The studio was founded +- example_title: Mona Lisa + text: The Mona Lisa is a world-renowned painting created by +- example_title: Harry Potter Series + text: The Harry Potter series, written by J.K. Rowling, begins with the book titled +- example_title: Riddle + text: 'Question: I have cities, but no houses. I have mountains, but no trees. I + have water, but no fish. What am I? + + Answer:' +- example_title: Photosynthesis + text: The process of photosynthesis involves the conversion of +- example_title: Story Continuation + text: Jane went to the store to buy some groceries. She picked up apples, oranges, + and a loaf of bread. When she got home, she realized she forgot +- example_title: Math Problem + text: 'Problem 2: If a train leaves Station A at 9:00 AM and travels at 60 mph, + and another train leaves Station B at 10:00 AM and travels at 80 mph, when will + they meet if the distance between the stations is 300 miles? + + To determine' +- example_title: Algorithm Definition + text: In the context of computer programming, an algorithm is +--- +# BEE-spoke-data/verysmol_llama-v11-KIx2-GGUF + +Quantized GGUF model files for [verysmol_llama-v11-KIx2](https://huggingface.co/BEE-spoke-data/verysmol_llama-v11-KIx2) from [BEE-spoke-data](https://huggingface.co/BEE-spoke-data) + + +| Name | Quant method | Size | +| ---- | ---- | ---- | +| [verysmol_llama-v11-kix2.fp16.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.fp16.gguf) | fp16 | 116.89 MB | +| [verysmol_llama-v11-kix2.q2_k.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q2_k.gguf) | q2_k | 30.14 MB | +| [verysmol_llama-v11-kix2.q3_k_m.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q3_k_m.gguf) | q3_k_m | 33.71 MB | +| [verysmol_llama-v11-kix2.q4_k_m.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q4_k_m.gguf) | q4_k_m | 38.34 MB | +| [verysmol_llama-v11-kix2.q5_k_m.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q5_k_m.gguf) | q5_k_m | 43.21 MB | +| [verysmol_llama-v11-kix2.q6_k.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q6_k.gguf) | q6_k | 48.39 MB | +| [verysmol_llama-v11-kix2.q8_0.gguf](https://huggingface.co/afrideva/verysmol_llama-v11-KIx2-GGUF/resolve/main/verysmol_llama-v11-kix2.q8_0.gguf) | q8_0 | 62.45 MB | + + + +## Original Model Card: + + +# verysmol_llama-v11-KIx2 + +## Model description + +This model is a fine-tuned version of v10 (refinedweb-3m dedup) further trained for 2 epochs on KI dataset. + +It achieves the following results on the evaluation set: +- Loss: 2.8876 +- Accuracy: 0.4502 + +--- + +## evals + +`hf-causal-experimental (pretrained=pszemraj/verysmol_llama-v11-KIx2,revision=main,trust_remote_code=True,dtype='float'), limit: None, provide_description: False, num_fewshot: 0, batch_size: 16` + +| Task |Version| Metric | Value | |Stderr| +|--------------|------:|--------|-------:|---|-----:| +|arc_easy | 0|acc | 0.4024|± |0.0101| +| | |acc_norm| 0.3788|± |0.0100| +|boolq | 1|acc | 0.6199|± |0.0085| +|lambada_openai| 0|ppl |111.9939|± |4.6906| +| | |acc | 0.2354|± |0.0059| +|openbookqa | 0|acc | 0.1440|± |0.0157| +| | |acc_norm| 0.2760|± |0.0200| +|piqa | 0|acc | 0.5713|± |0.0115| +| | |acc_norm| 0.5664|± |0.0116| +|winogrande | 0|acc | 0.5201|± |0.0140| + +| Task |Version| Metric |Value | |Stderr| +|-------------|------:|--------|-----:|---|-----:| +|arc_challenge| 0|acc |0.1971|± |0.0116| +| | |acc_norm|0.2278|± |0.0123| + +| Task |Version| Metric |Value | |Stderr| +|---------|------:|--------|-----:|---|-----:| +|hellaswag| 0|acc |0.2618|± |0.0088| +| | |acc_norm|0.2797|± |0.0090| + +| Task |Version|Metric|Value | |Stderr| +|-------------|------:|------|-----:|---|-----:| +|truthfulqa_mc| 1|mc1 |0.2509|± |0.0152| +| | |mc2 |0.4492|± |0.0156| + +--- + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.00014 +- train_batch_size: 16 +- eval_batch_size: 16 +- seed: 17514 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 128 +- optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-06 +- lr_scheduler_type: inverse_sqrt +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 2.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Accuracy | +|:-------------:|:-----:|:----:|:---------------:|:--------:| +| 3.0681 | 0.03 | 150 | 3.0689 | 0.4259 | +| 3.0113 | 0.07 | 300 | 3.0433 | 0.4278 | +| 2.9468 | 0.1 | 450 | 3.0362 | 0.4288 | +| 3.0162 | 0.13 | 600 | 3.0148 | 0.4326 | +| 2.9531 | 0.17 | 750 | 3.0012 | 0.4341 | +| 2.9282 | 0.2 | 900 | 2.9923 | 0.4358 | +| 2.9485 | 0.23 | 1050 | 2.9845 | 0.4357 | +| 2.9365 | 0.27 | 1200 | 2.9749 | 0.4375 | + +... + +| Training Loss | Epoch | Step | Validation Loss | Accuracy | +|:-------------:|:-----:|:----:|:---------------:|:--------:| +| 2.8215 | 1.7 | 7650 | 2.8943 | 0.4496 | +| 2.7714 | 1.74 | 7800 | 2.8914 | 0.4501 | +| 2.8132 | 1.77 | 7950 | 2.8913 | 0.4500 | +| 2.8505 | 1.8 | 8100 | 2.8906 | 0.4502 | +| 2.8294 | 1.84 | 8250 | 2.8901 | 0.4502 | +| 2.7977 | 1.87 | 8400 | 2.8891 | 0.4499 | +| 2.7501 | 1.9 | 8550 | 2.8878 | 0.4505 | +| 2.8038 | 1.94 | 8700 | 2.8883 | 0.4504 | +| 2.7547 | 1.97 | 8850 | 2.8876 | 0.4502 | + +--- \ No newline at end of file diff --git a/verysmol_llama-v11-kix2.fp16.gguf b/verysmol_llama-v11-kix2.fp16.gguf new file mode 100644 index 0000000..3b7f7bb --- /dev/null +++ b/verysmol_llama-v11-kix2.fp16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585514af8dd1b3ebdbe872d037cccb9f20ad3154fcdba97f78465f31eec38a69 +size 116887136 diff --git a/verysmol_llama-v11-kix2.q2_k.gguf b/verysmol_llama-v11-kix2.q2_k.gguf new file mode 100644 index 0000000..f22391f --- /dev/null +++ b/verysmol_llama-v11-kix2.q2_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7143227c398f6a91811e547a950b91114548ef61a5f660edd67cdeea70fbf992 +size 30142592 diff --git a/verysmol_llama-v11-kix2.q3_k_m.gguf b/verysmol_llama-v11-kix2.q3_k_m.gguf new file mode 100644 index 0000000..2f6c6be --- /dev/null +++ b/verysmol_llama-v11-kix2.q3_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32f962dc227eecff7ba6fbd0b635b8bc0b1c251e8ff301b12cf3e78386613b5 +size 33713792 diff --git a/verysmol_llama-v11-kix2.q4_k_m.gguf b/verysmol_llama-v11-kix2.q4_k_m.gguf new file mode 100644 index 0000000..975d5a4 --- /dev/null +++ b/verysmol_llama-v11-kix2.q4_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f56be6776d3c72aff306f15508c75e46a5cde4fc18be2524a4f687ede723d59 +size 38335616 diff --git a/verysmol_llama-v11-kix2.q5_k_m.gguf b/verysmol_llama-v11-kix2.q5_k_m.gguf new file mode 100644 index 0000000..3e1b540 --- /dev/null +++ b/verysmol_llama-v11-kix2.q5_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5514092af53831a58a4a37d91462812fd7b14b8cf376c87533fef8cd710a51f +size 43209856 diff --git a/verysmol_llama-v11-kix2.q6_k.gguf b/verysmol_llama-v11-kix2.q6_k.gguf new file mode 100644 index 0000000..6f2f1ce --- /dev/null +++ b/verysmol_llama-v11-kix2.q6_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ac03ad21c9e3d897742e8a1527999d6a8df1b6daf65271d95f83bf2e7f9a0a +size 48388736 diff --git a/verysmol_llama-v11-kix2.q8_0.gguf b/verysmol_llama-v11-kix2.q8_0.gguf new file mode 100644 index 0000000..c239c67 --- /dev/null +++ b/verysmol_llama-v11-kix2.q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f93d783e2e79791faeacf54d465fac9cc0467aca9682179f7c820980a3ff1d2 +size 62451328