commit 72085588e444599bce100c42fa055e8786179938 Author: ModelHub XC Date: Wed May 6 07:30:36 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: Flexan/MohammedSabry-biinduct-1b-baseline-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3116402 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.f16.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +biinduct-1b-baseline.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..9395d1f --- /dev/null +++ b/README.md @@ -0,0 +1,194 @@ +--- +base_model: MohammedSabry/biinduct-1b-baseline +library_name: transformers +pipeline_tag: text-generation +language: +- en +tags: +- causal-lm +- biinduct +- pretraining +- matched-compute +- the-pile +- 1b +- baseline +--- + +# GGUF Files for biinduct-1b-baseline + +These are the GGUF files for [MohammedSabry/biinduct-1b-baseline](https://huggingface.co/MohammedSabry/biinduct-1b-baseline). + +## Downloads + +| GGUF Link | Quantization | Description | +| ---- | ----- | ----------- | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q2_K.gguf) | Q2_K | Lowest quality | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q3_K_S.gguf) | Q3_K_S | | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.IQ3_S.gguf) | IQ3_S | Integer quant, preferable over Q3_K_S | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.IQ3_M.gguf) | IQ3_M | Integer quant | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q3_K_M.gguf) | Q3_K_M | | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q3_K_L.gguf) | Q3_K_L | | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.IQ4_XS.gguf) | IQ4_XS | Integer quant | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q4_K_S.gguf) | Q4_K_S | Fast with good performance | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q4_K_M.gguf) | Q4_K_M | **Recommended:** Perfect mix of speed and performance | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q5_K_S.gguf) | Q5_K_S | | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q5_K_M.gguf) | Q5_K_M | | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q6_K.gguf) | Q6_K | Very good quality | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.Q8_0.gguf) | Q8_0 | Best quality | +| [Download](https://huggingface.co/Flexan/MohammedSabry-biinduct-1b-baseline-GGUF/resolve/main/biinduct-1b-baseline.f16.gguf) | f16 | Full precision, don't bother; use a quant | + +## Note from Flexan + +I provide GGUFs and quantizations of publicly available models that do not have a GGUF equivalent available yet, +usually for models **I deem interesting and wish to try out**. + +If there are some quants missing that you'd like me to add, you may request one in the community tab. +If you want to request a public model to be converted, you can also request that in the community tab. +If you have questions regarding this model, please refer to [the original model repo](https://huggingface.co/MohammedSabry/biinduct-1b-baseline). + +You can find more info about me and what I do [here](https://huggingface.co/Flexan/Flexan). + +# Bi-Induct 1B Baseline + +This repository contains the **Bi-Induct 1B Baseline** checkpoint from *Induction Signatures Are Not Enough: A Matched-Compute Study of Load-Bearing Structure in In-Context Learning*. + +This release corresponds to the **1B** setting in the paper and is a **research checkpoint** intended for studying matched-compute pretraining, induction-style curricula, and in-context learning behavior. It is **not** instruction-tuned, alignment-tuned, or safety-tuned. + +## Variant + +Natural-only pretraining baseline with no synthetic copy snippets. + +## Model overview + +- Architecture: decoder-only Transformer +- Positional encoding: RoPE (`theta=10000`) +- Normalization: pre-norm residual blocks +- MLP: SwiGLU +- Attention: grouped-query / grouped key-value attention +- Precision: bfloat16 training +- Context length: 1024 +- Embeddings: untied input/output embeddings + +## Model specification + +| Field | Value | +|---|---:| +| Parameters (paper label) | 1B | +| Layers | 30 | +| Hidden size | 1,536 | +| Intermediate / MLP size | 6,144 | +| Head dimension | 64 | +| Attention heads | 24 | +| KV heads | 6 | + +## Training data + +All checkpoints in this family were pretrained on the **deduplicated THE PILE** in streaming / shuffled mode. A stable MD5-based hash was used to create a fixed held-out evaluation slice, with **0.2% of the corpus** reserved for evaluation (roughly **0.4B tokens**). Tokenization was truncated to **1024 tokens per sequence**. + +For the Bi-Induct variants, synthetic snippets were interleaved on top of the natural stream: + +- **Induction**: `[S || SEP || S]` +- **Anti-Induction**: `[S || SEP || reverse(S)]` +- **Balanced**: each injection randomly chooses induction or anti-induction + +The main cross-scale experiments used **span length L = 20** and **initial mix ratio m0 = 50%**, linearly annealed to zero over the full training budget. + +## Training recipe + +- Optimizer: AdamW (`beta1=0.9`, `beta2=0.999`, weight decay `0.1`) +- Learning rate: peak `1e-3` +- Schedule: `3%` linear warmup, then cosine decay +- Update size: `2^16` tokens per update +- Token budget: approximately `20N` tokens following the Chinchilla-style rule of thumb +- Comparison protocol: iso-FLOPs across curricula at each scale + +## Evaluation summary for the 1B family + +The table below summarizes the main results at this scale. Standard LM benchmarks are evaluated **3-shot** and Todd et al. function-style probes are evaluated **10-shot** with **HITS@1**. + +| Variant | Standard LM ICL composite ↑ | Todd-style ICL composite ↑ | Held-out PPL ↓ | +|---|---:|---:|---:| +| Baseline | 24.2 ± 0.5 | 20.0 ± 1.3 | 14.1 | +| Induction | 23.9 ± 0.5 | 15.2 ± 1.1 | 14.9 | +| Anti-Induction | 23.6 ± 0.4 | 14.7 ± 1.2 | 14.9 | +| Balanced | 24.3 ± 0.3 | 14.9 ± 1.1 | 14.9 | + +**This checkpoint:** **Baseline**. + +## Benchmarks included + +### Standard LM benchmarks +- MMLU +- Winogrande +- CommonSenseQA +- PIQA +- HellaSwag +- TriviaQA-Wiki +- BBH (CoT) +- OpenBookQA +- ARC-Challenge +- GPQA +- GSM-8K +- MathQA +- BoolQ +- LAMBADA + +### Todd et al. function-style probes +- alphabetically first 3 +- alphabetically first 5 +- alphabetically last 3 +- alphabetically last 5 +- capitalize +- capitalize first letter +- capitalize last letter +- choose first of 3 +- choose first of 5 +- choose last of 3 +- choose last of 5 +- choose middle of 3 +- choose middle of 5 +- lowercase first letter +- lowercase last letter +- next capital letter +- next item +- prev item +- word length + +## Example usage + +```python +from transformers import AutoTokenizer, AutoModelForCausalLM + +repo_id = "MohammedSabry/biinduct-1b-baseline" + +tokenizer = AutoTokenizer.from_pretrained(repo_id) +model = AutoModelForCausalLM.from_pretrained(repo_id) + +prompt = "The capital of France is" +inputs = tokenizer(prompt, return_tensors="pt") +outputs = model.generate(**inputs, max_new_tokens=20) +print(tokenizer.decode(outputs[0], skip_special_tokens=True)) +``` + +## Limitations + +- These are research checkpoints, not production chat models. +- They were designed to study the relationship between induction-style telemetry and load-bearing ICL behavior under matched compute. +- The synthetic interventions are intentionally lightweight and token-level; results should not be interpreted as ruling out richer data-rewrite strategies. +- Because Bi-Induct replaces a fraction of natural data under iso-FLOPs, some trade-offs may reflect natural-text displacement in addition to mechanistic redundancy. + +## Citation + +If you use this model, please cite: + +```bibtex +@misc{sabry2026inductionsignaturesenoughmatchedcompute, + title={Induction Signatures Are Not Enough: A Matched-Compute Study of Load-Bearing Structure in In-Context Learning}, + author={Mohammed Sabry and Anya Belz}, + year={2026}, + eprint={2509.22947}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2509.22947}, +} +``` \ No newline at end of file diff --git a/biinduct-1b-baseline.IQ3_M.gguf b/biinduct-1b-baseline.IQ3_M.gguf new file mode 100644 index 0000000..ad44de4 --- /dev/null +++ b/biinduct-1b-baseline.IQ3_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9d8a5fcff51ecea080e90b9dc0a64ce393fe4f716e866622bd24548cc2affd +size 519045472 diff --git a/biinduct-1b-baseline.IQ3_S.gguf b/biinduct-1b-baseline.IQ3_S.gguf new file mode 100644 index 0000000..4866f6e --- /dev/null +++ b/biinduct-1b-baseline.IQ3_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262e75afcf4af979394ecce83250b82761a7b7fe8660050017c0d2a90198e70f +size 505885024 diff --git a/biinduct-1b-baseline.IQ4_XS.gguf b/biinduct-1b-baseline.IQ4_XS.gguf new file mode 100644 index 0000000..5004af1 --- /dev/null +++ b/biinduct-1b-baseline.IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab613a899da130c71e1f21b2d74db65e0dac0178bef788c50e13a1301698c89 +size 619948384 diff --git a/biinduct-1b-baseline.Q2_K.gguf b/biinduct-1b-baseline.Q2_K.gguf new file mode 100644 index 0000000..0de13a3 --- /dev/null +++ b/biinduct-1b-baseline.Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b785c75f64fb4cf35f68d45ddcb987f316543e11b5aa647353b6e88048198cf9 +size 434399584 diff --git a/biinduct-1b-baseline.Q3_K_L.gguf b/biinduct-1b-baseline.Q3_K_L.gguf new file mode 100644 index 0000000..aa37e4f --- /dev/null +++ b/biinduct-1b-baseline.Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac9c7625064e6f7d5d42a2a802d1b3225579d81b074c9a741f4c2a5684da763 +size 599335264 diff --git a/biinduct-1b-baseline.Q3_K_M.gguf b/biinduct-1b-baseline.Q3_K_M.gguf new file mode 100644 index 0000000..ff6a1c6 --- /dev/null +++ b/biinduct-1b-baseline.Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ebe54b4d4631f126fb67ca961aab2c6dbba2dd3d742405c2318d429b399b41 +size 554213728 diff --git a/biinduct-1b-baseline.Q3_K_S.gguf b/biinduct-1b-baseline.Q3_K_S.gguf new file mode 100644 index 0000000..59483df --- /dev/null +++ b/biinduct-1b-baseline.Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727091579c680f83576f089bd6ddadccbd83de82f49194c05cc6a004e4e35109 +size 503534944 diff --git a/biinduct-1b-baseline.Q4_K_M.gguf b/biinduct-1b-baseline.Q4_K_M.gguf new file mode 100644 index 0000000..7bd6702 --- /dev/null +++ b/biinduct-1b-baseline.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9af438f5917372b71abc4dd1236d023152017ff0396ac99d4ebde7499a405fd +size 682558816 diff --git a/biinduct-1b-baseline.Q4_K_S.gguf b/biinduct-1b-baseline.Q4_K_S.gguf new file mode 100644 index 0000000..189bf0a --- /dev/null +++ b/biinduct-1b-baseline.Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190641eedb4ae8efed86e3930d0160ec7dd4260de554ba1211b291a5edf7564c +size 650201440 diff --git a/biinduct-1b-baseline.Q5_K_M.gguf b/biinduct-1b-baseline.Q5_K_M.gguf new file mode 100644 index 0000000..77ef8da --- /dev/null +++ b/biinduct-1b-baseline.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beaf6c22801dbb720398d2da44024c7559021707967bc030b57ad22abe306660 +size 799442272 diff --git a/biinduct-1b-baseline.Q5_K_S.gguf b/biinduct-1b-baseline.Q5_K_S.gguf new file mode 100644 index 0000000..614854a --- /dev/null +++ b/biinduct-1b-baseline.Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd195c089cde5471c28c2b12b824ad6ece50a57a7ad45688e438af86433173d +size 780798304 diff --git a/biinduct-1b-baseline.Q6_K.gguf b/biinduct-1b-baseline.Q6_K.gguf new file mode 100644 index 0000000..33e2ea3 --- /dev/null +++ b/biinduct-1b-baseline.Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d185ea7ea9cbf4b6994d6cda466582c3874e268b88e3b2b1777a48ee2065ac +size 923630944 diff --git a/biinduct-1b-baseline.Q8_0.gguf b/biinduct-1b-baseline.Q8_0.gguf new file mode 100644 index 0000000..dd2f7b3 --- /dev/null +++ b/biinduct-1b-baseline.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79da03e7a558e39ab43931787f8eee92dd9b9286ed0e2b9c456bc21dcf10c24c +size 1195994464 diff --git a/biinduct-1b-baseline.f16.gguf b/biinduct-1b-baseline.f16.gguf new file mode 100644 index 0000000..7c6b779 --- /dev/null +++ b/biinduct-1b-baseline.f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc356d4d635f19d09a60de9ffd0d9d638a31ac0e6ab2f895aa6ebf7afd91d73 +size 2250304864