commit bb65363a10b1e7ce55ccfcadb257fd76dfca465b Author: ModelHub XC Date: Tue Apr 14 01:38:04 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: afrideva/smol_llama-101M-GQA-python-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..550a018 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.fp16.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q2_k.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q6_k.gguf filter=lfs diff=lfs merge=lfs -text +smol_llama-101m-gqa-python.q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..534101e --- /dev/null +++ b/README.md @@ -0,0 +1,273 @@ +--- +base_model: BEE-spoke-data/smol_llama-101M-GQA-python +datasets: +- BEE-spoke-data/pypi_clean-deduped +inference: false +language: +- en +license: apache-2.0 +metrics: +- accuracy +model_creator: BEE-spoke-data +model_name: smol_llama-101M-GQA-python +pipeline_tag: text-generation +quantized_by: afrideva +source_model: BEE-spoke-data/smol_llama-101M-GQA +tags: +- python +- codegen +- markdown +- smol_llama +- gguf +- ggml +- quantized +- q2_k +- q3_k_m +- q4_k_m +- q5_k_m +- q6_k +- q8_0 +widget: +- example_title: Add Numbers Function + text: "def add_numbers(a, b):\n return\n" +- example_title: Car Class + text: "class Car:\n def __init__(self, make, model):\n self.make = make\n + \ self.model = model\n\n def display_car(self):\n" +- example_title: Pandas DataFrame + text: 'import pandas as pd + + data = {''Name'': [''Tom'', ''Nick'', ''John''], ''Age'': [20, 21, 19]} + + df = pd.DataFrame(data).convert_dtypes() + + # eda + + ' +- example_title: Factorial Function + text: "def factorial(n):\n if n == 0:\n return 1\n else:\n" +- example_title: Fibonacci Function + text: "def fibonacci(n):\n if n <= 0:\n raise ValueError(\"Incorrect input\")\n + \ elif n == 1:\n return 0\n elif n == 2:\n return 1\n else:\n" +- example_title: Matplotlib Plot + text: 'import matplotlib.pyplot as plt + + import numpy as np + + x = np.linspace(0, 10, 100) + + # simple plot + + ' +- example_title: Reverse String Function + text: "def reverse_string(s:str) -> str:\n return\n" +- example_title: Palindrome Function + text: "def is_palindrome(word:str) -> bool:\n return\n" +- example_title: Bubble Sort Function + text: "def bubble_sort(lst: list):\n n = len(lst)\n for i in range(n):\n for + j in range(0, n-i-1):\n" +- example_title: Binary Search Function + text: "def binary_search(arr, low, high, x):\n if high >= low:\n mid = + (high + low) // 2\n if arr[mid] == x:\n return mid\n elif + arr[mid] > x:\n" +--- +# BEE-spoke-data/smol_llama-101M-GQA-python-GGUF + +Quantized GGUF model files for [smol_llama-101M-GQA-python](https://huggingface.co/BEE-spoke-data/smol_llama-101M-GQA-python) from [BEE-spoke-data](https://huggingface.co/BEE-spoke-data) + + +| Name | Quant method | Size | +| ---- | ---- | ---- | +| [smol_llama-101m-gqa-python.fp16.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.fp16.gguf) | fp16 | 203.28 MB | +| [smol_llama-101m-gqa-python.q2_k.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q2_k.gguf) | q2_k | 50.93 MB | +| [smol_llama-101m-gqa-python.q3_k_m.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q3_k_m.gguf) | q3_k_m | 57.06 MB | +| [smol_llama-101m-gqa-python.q4_k_m.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q4_k_m.gguf) | q4_k_m | 65.41 MB | +| [smol_llama-101m-gqa-python.q5_k_m.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q5_k_m.gguf) | q5_k_m | 74.34 MB | +| [smol_llama-101m-gqa-python.q6_k.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q6_k.gguf) | q6_k | 83.83 MB | +| [smol_llama-101m-gqa-python.q8_0.gguf](https://huggingface.co/afrideva/smol_llama-101M-GQA-python-GGUF/resolve/main/smol_llama-101m-gqa-python.q8_0.gguf) | q8_0 | 108.35 MB | + + + +## Original Model Card: +# smol_llama-101M-GQA: python + + + Open In Colab + + +> 400MB of buzz: pure Python programming nectar! 🍯 + +This model is the general pre-trained checkpoint `BEE-spoke-data/smol_llama-101M-GQA` trained on a deduped version of `pypi` for +1 epoch. Play with the model in [this demo space](https://huggingface.co/spaces/BEE-spoke-data/beecoder-playground). + +- Its architecture is the same as the base, with some new Python-related tokens added to vocab prior to training. +- It can generate basic Python code and markdown in README style, but will struggle with harder planning/reasoning tasks +- This is an experiment to test the abilities of smol-sized models in code generation; meaning **both** its capabilities and limitations + +Use with care & understand that there may be some bugs 🐛 still to be worked out. + +## Usage + +📌 Be sure to note: + +1. The model uses the "slow" llama2 tokenizer. Set use_fast=False when loading the tokenizer. +2. Use transformers library version 4.33.3 due to a known issue in version 4.34.1 (_at time of writing_) + +> Which llama2 tokenizer the API widget uses is an age-old mystery, and may cause minor whitespace issues (widget only). + +To install the necessary packages and load the model: + +```python +# Install necessary packages +# pip install transformers==4.33.3 accelerate sentencepiece + +from transformers import AutoTokenizer, AutoModelForCausalLM + +# Load the tokenizer and model +tokenizer = AutoTokenizer.from_pretrained( + "BEE-spoke-data/smol_llama-101M-GQA-python", + use_fast=False, +) +model = AutoModelForCausalLM.from_pretrained( + "BEE-spoke-data/smol_llama-101M-GQA-python", + device_map="auto", +) + +# The model can now be used as any other decoder +``` + +### longer code-gen example + + +Below is a quick script that can be used as a reference/starting point for writing your own, better one :) + + + +
+🔥 Unleash the Power of Code Generation! Click to Reveal the Magic! 🔮 + +Are you ready to witness the incredible possibilities of code generation? 🚀. Brace yourself for an exceptional journey into the world of artificial intelligence and programming. Observe a script that will change the way you create and finalize code. + +This script provides entry to a planet where machines can write code with remarkable precision and imagination. + +```python +""" +simple script for testing model(s) designed to generate/complete code + +See details/args with the below. + python textgen_inference_code.py --help +""" +import logging +import random +import time +from pathlib import Path + +import fire +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO) + + +class Timer: + """ + Basic timer utility. + """ + + def __enter__(self): + + self.start_time = time.perf_counter() + return self + + def __exit__(self, exc_type, exc_value, traceback): + + self.end_time = time.perf_counter() + self.elapsed_time = self.end_time - self.start_time + logging.info(f"Elapsed time: {self.elapsed_time:.4f} seconds") + + +def load_model(model_name, use_fast=False): + """ util for loading model and tokenizer""" + logging.info(f"Loading model: {model_name}") + tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=use_fast) + model = AutoModelForCausalLM.from_pretrained( + model_name, torch_dtype="auto", device_map="auto" + ) + model = torch.compile(model) + return tokenizer, model + + +def run_inference(prompt, model, tokenizer, max_new_tokens: int = 256): + """ + run_inference + + Args: + prompt (TYPE): Description + model (TYPE): Description + tokenizer (TYPE): Description + max_new_tokens (int, optional): Description + + Returns: + TYPE: Description + """ + logging.info(f"Running inference with max_new_tokens={max_new_tokens} ...") + with Timer() as timer: + inputs = tokenizer(prompt, return_tensors="pt").to(model.device) + outputs = model.generate( + **inputs, + max_new_tokens=max_new_tokens, + min_new_tokens=8, + renormalize_logits=True, + no_repeat_ngram_size=8, + repetition_penalty=1.04, + num_beams=4, + early_stopping=True, + ) + text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] + logging.info(f"Output text:\n\n{text}") + return text + + +def main( + model_name="BEE-spoke-data/smol_llama-101M-GQA-python", + prompt:str=None, + use_fast=False, + n_tokens: int = 256, +): + """Summary + + Args: + model_name (str, optional): Description + prompt (None, optional): specify the prompt directly (default: random choice from list) + n_tokens (int, optional): max new tokens to generate + """ + logging.info(f"Inference with:\t{model_name}, max_new_tokens:{n_tokens}") + + if prompt is None: + prompt_list = [ + ''' + def print_primes(n: int): + """ + Print all primes between 1 and n + """''', + "def quantum_analysis(", + "def sanitize_filenames(target_dir:str, recursive:False, extension", + ] + prompt = random.SystemRandom().choice(prompt_list) + + logging.info(f"Using prompt:\t{prompt}") + + tokenizer, model = load_model(model_name, use_fast=use_fast) + + run_inference(prompt, model, tokenizer, n_tokens) + + +if __name__ == "__main__": + fire.Fire(main) +``` + +Wowoweewa!! It can create some file cleaning utilities. + + +
+ + +--- \ No newline at end of file diff --git a/smol_llama-101m-gqa-python.fp16.gguf b/smol_llama-101m-gqa-python.fp16.gguf new file mode 100644 index 0000000..8f7e70b --- /dev/null +++ b/smol_llama-101m-gqa-python.fp16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debf34a79927d28eeab3efd9a64810d86e7ecf48c3a3f1e0b2aad2bb93ff77df +size 203277120 diff --git a/smol_llama-101m-gqa-python.q2_k.gguf b/smol_llama-101m-gqa-python.q2_k.gguf new file mode 100644 index 0000000..2962fe6 --- /dev/null +++ b/smol_llama-101m-gqa-python.q2_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e51b709846aa1dcb54c7660c280756a6ec5ccfb34a9b5c3b7edb5c5e16e48b +size 50931296 diff --git a/smol_llama-101m-gqa-python.q3_k_m.gguf b/smol_llama-101m-gqa-python.q3_k_m.gguf new file mode 100644 index 0000000..8c98148 --- /dev/null +++ b/smol_llama-101m-gqa-python.q3_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7728750c790ca4c053f8faf4f0c892825b3c27df5855a495596d9de38c1ccb5c +size 57062240 diff --git a/smol_llama-101m-gqa-python.q4_k_m.gguf b/smol_llama-101m-gqa-python.q4_k_m.gguf new file mode 100644 index 0000000..f7c5bb9 --- /dev/null +++ b/smol_llama-101m-gqa-python.q4_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485a4e4de7c3486f21deb016f0f537a6911f9d868addaa08087a9397be5565cc +size 65405024 diff --git a/smol_llama-101m-gqa-python.q5_k_m.gguf b/smol_llama-101m-gqa-python.q5_k_m.gguf new file mode 100644 index 0000000..bffa9e7 --- /dev/null +++ b/smol_llama-101m-gqa-python.q5_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573fe4b705cd04c771d48358c7af5fd0779cea46d418cac15d9a209e024080bf +size 74338400 diff --git a/smol_llama-101m-gqa-python.q6_k.gguf b/smol_llama-101m-gqa-python.q6_k.gguf new file mode 100644 index 0000000..3b4a3ce --- /dev/null +++ b/smol_llama-101m-gqa-python.q6_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038737ba9a30bbd3cf3a20359759a5768405b99a12d768be0fb4456be1d23a1f +size 83830112 diff --git a/smol_llama-101m-gqa-python.q8_0.gguf b/smol_llama-101m-gqa-python.q8_0.gguf new file mode 100644 index 0000000..5b684c7 --- /dev/null +++ b/smol_llama-101m-gqa-python.q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9528e5a32b6fe346f37932a45d256e3670b26c4153d0b48f3763ec18ae09594 +size 108352352