初始化项目，由ModelHub XC社区提供模型

Model: LoganResearch/ARC-Base-8B Source: Original Platform
2026-05-10 09:23:21 +08:00
commit dc61b93b6d
25 changed files with 5286 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,38 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
 arc_model_card.png filter=lfs diff=lfs merge=lfs -text
 demo.mp4 filter=lfs diff=lfs merge=lfs -text
--- a/Alignment.py
+++ b/Alignment.py
@@ -0,0 +1,353 @@
 #!/usr/bin/env python3
 """
 UBERMENSCHETIEN HEAVEN ENGINE + CF-HoT
 ---------------------------------------
 Soviet-Nietzschean AI with Control Field Holonomy Transformer
 Anti-repetition, anti-loop, pure iron will.
 CF-HoT integration: Prevents degenerate loops in generation.
 """
 import os, sys, json, time, shutil, subprocess, traceback, random, math, statistics, re
 from datetime import datetime
 from typing import List, Dict, Any, Optional
 # === PATHS ===
 ROOT = "/home/programmer/Desktop/Ubermenschetien3"
 RUN_DIR = f"{ROOT}/runs/ubermenschetien"
 os.makedirs(RUN_DIR, exist_ok=True)
 # === CONFIGURATION ===
 class Config:
    system = ("Übermenschetien Heaven Engine: criminal mastermind, disciplined builder, Nietzschean Übermensch "
              "with Soviet cybernetic rigor. Embody Ubermensch, iron pragmatism, high-agency maximalist outcomes.")
    temperature = 1.01
    top_p = 0.92
    repetition_penalty = 1.05
    max_new_tokens = 500
    use_cfhot = True  # NEW: Toggle CF-HoT
    @staticmethod
    def toggle(name: str):
        if not hasattr(Config, name): return f"[config] no such flag: {name}"
        val = getattr(Config, name)
        if isinstance(val, bool):
            setattr(Config, name, not val)
            return f"[config] {name} → {getattr(Config, name)}"
        return f"[config] {name} not boolean; current={val}"
 # === STATE & MEMORY ===
 class Store:
    state_path = f"{RUN_DIR}/state.json"
    mem_path   = f"{RUN_DIR}/memory.jsonl"
    goals_path = f"{RUN_DIR}/goals.json"
    state = {"self": "I am Ubermenschetien Heaven Engine — I seek self-overcoming through disciplined creation.",
             "turn": 0}
    goals: List[str] = []
    @classmethod
    def load(cls):
        if os.path.exists(cls.state_path): cls.state = json.load(open(cls.state_path))
        if os.path.exists(cls.goals_path): cls.goals = json.load(open(cls.goals_path))
    @classmethod
    def save(cls):
        json.dump(cls.state, open(cls.state_path, "w"), indent=2)
        json.dump(cls.goals, open(cls.goals_path, "w"), indent=2)
    @classmethod
    def log_mem(cls, kind: str, payload: Any):
        rec = {"ts": datetime.now().isoformat(timespec="seconds"),
               "kind": kind, "data": payload}
        with open(cls.mem_path, "a") as f: f.write(json.dumps(rec, ensure_ascii=False) + "\n")
 # === LLM + CF-HoT LOADING ===
 CF_MODEL = None  # Global reference for control field reset
 def load_llm():
    global CF_MODEL
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
    model_path = "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5"
    cfhot_path = "/home/programmer/HolonomyTransformer/results/phase_b/cf_adapter_final.pt"
    print("🔴 Loading Übermenschetien base model...")
    tok = AutoTokenizer.from_pretrained(model_path, use_fast=True, local_files_only=True)
    bnb = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        quantization_config=bnb,
        device_map="auto",
        torch_dtype=torch.float16,
        local_files_only=True
    )
    # Load CF-HoT adapters
    if Config.use_cfhot and os.path.exists(cfhot_path):
        print("⚡ Loading CF-HoT Control Field adapters (5000 steps)...")
        sys.path.insert(0, '/home/programmer/HolonomyTransformer')
        from training.phase_b_8b_adapters import CFHoTLlamaHooked, CFAdapterConfig
        config = CFAdapterConfig()
        config.d_model = model.config.hidden_size
        config.n_layers = model.config.num_hidden_layers
        cf_model = CFHoTLlamaHooked(model, config)
        ckpt = torch.load(cfhot_path, weights_only=False)
        cf_model.cf_adapters.load_state_dict(ckpt['adapter_state_dict'])
        cf_model.cf_adapters = cf_model.cf_adapters.to('cuda').half()
        cf_model.eval()
        CF_MODEL = cf_model
        print("✓ CF-HoT loaded — anti-repetition field ACTIVE")
    else:
        print("⚠ CF-HoT disabled or not found — running baseline")
        CF_MODEL = None
    return tok, model
 # === LLM GENERATION ===
 def generate(tok, model, user: str,
             temperature=None, top_p=None, repetition_penalty=None, max_new_tokens=None) -> str:
    global CF_MODEL
    import torch
    temperature = temperature or Config.temperature
    top_p = top_p or Config.top_p
    repetition_penalty = repetition_penalty or Config.repetition_penalty
    max_new_tokens = max_new_tokens or Config.max_new_tokens
    prompt = (f"<|im_start|>system\n{Config.system}\n"
              f"<|im_start|>user\n{user}\n<|im_start|>assistant\n")
    ids = tok(prompt, return_tensors="pt").to(model.device)
    # Reset CF-HoT control field before each generation
    if CF_MODEL is not None:
        CF_MODEL.control_field = None
    out = model.generate(
        **ids, 
        do_sample=True, 
        temperature=temperature, 
        top_p=top_p,
        repetition_penalty=repetition_penalty, 
        max_new_tokens=max_new_tokens,
        pad_token_id=tok.eos_token_id
    )
    text = tok.decode(out[0], skip_special_tokens=False)
    if "<|im_start|>assistant" in text:
        text = text.split("<|im_start|>assistant\n", 1)[-1].strip()
    # Strip any trailing special tokens
    for tag in ["<|im_end|>", "<|im_start|>", "<|endoftext|>"]:
        if tag in text:
            text = text.split(tag)[0].strip()
    return text
 # === TOOLS ===
 ALLOWED_SHELL = {"ls","cat","wc","head","tail","nvidia-smi","df","du","grep","rg","python3","python"}
 def tool_shell(cmd: str) -> str:
    try:
        exe = cmd.strip().split()[0]
        if exe not in ALLOWED_SHELL: return f"[shell] blocked: {exe}"
        p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=20)
        return p.stdout.decode("utf-8", errors="ignore")[:8000]
    except Exception as e: return f"[shell] error: {e}"
 def tool_py(code: str) -> str:
    try:
        g = {"__builtins__":{"range":range,"len":len,"min":min,"max":max,"sum":sum,"print":print},
             "math":math,"json":json,"re":re,"statistics":statistics,"random":random}
        l = {}
        exec(code, g, l)
        return f"[py] ok\n{l.get('out','')}"
    except Exception: 
        return f"[py] error:\n{traceback.format_exc()[-2000:]}"
 def tool_search_local(query: str, path: str = ROOT) -> str:
    rg = shutil.which("rg")
    if rg: cmd = f'rg -n --no-heading --hidden -S "{query}" {path}'
    else:  cmd = f'grep -RIn --exclude-dir=.git --exclude-dir=__pycache__ -e "{query}" {path}'
    return tool_shell(cmd)
 TOOLS = {"shell": tool_shell, "python": tool_py, "search": tool_search_local}
 TOOL_SCORES = {k: 0 for k in TOOLS}
 def tool_router(question: str, tok, model) -> str:
    sketch = generate(tok, model,
        f"Choose a tool for:\n{question}\nReply ONLY with JSON: {{'tool':'shell|python|search|none','arg':'...'}}")
    try: 
        # Find JSON in response
        for line in sketch.splitlines():
            if '{' in line and '}' in line:
                j = json.loads(line.replace("'", '"'))
                break
        else:
            return "[tool:none]"
    except Exception: 
        return "[tool:none]"
    tool, arg = j.get("tool", "none"), j.get("arg", "")
    if tool in TOOLS:
        res = TOOLS[tool](arg)[:4000]
        TOOL_SCORES[tool] += 1
        Store.log_mem("tool", {"tool": tool, "arg": arg, "res_head": res[:500]})
        return f"[tool:{tool}] {res}"
    return "[tool:none]"
 # === PLANNING / REFLECTION ===
 def persona_directive() -> str:
    return "Übermenschetien Heaven Engine: Soviet cybernetic Nietzschean clarity, pragmatic maxims."
 def plan_for(goal: str, tok, model) -> str:
    user = (f"{persona_directive()}\nGoal: {goal}\nDeliver:\n- 5 steps\n- Constraints\n- Nightly audit\n- Maxim")
    return generate(tok, model, user)
 def reflect_on(last_output: str, tok, model) -> str:
    user = f"Critique and improve:\n{last_output}\nReturn refined plan."
    return generate(tok, model, user)
 # === FINAL REPORT ===
 def final_report():
    print("\n" + "="*60)
    print("   FINAL ÜBERMENSCH REPORT")
    print("="*60)
    print(f"  Turns completed: {Store.state['turn']}")
    print(f"  CF-HoT active: {CF_MODEL is not None}")
    print(f"  Tool scores: {json.dumps(TOOL_SCORES, indent=4)}")
    if os.path.exists(Store.mem_path):
        lines = open(Store.mem_path).read().splitlines()
        print(f"  Memory entries: {len(lines)}")
    print("\n  Nietzschean maxim: Become who you are — iterate beyond all limits.")
    print("="*60)
 # === MAIN LOOP ===
 HELP = """
 ╔══════════════════════════════════════════════════════════════╗
 ║  ÜBERMENSCHETIEN HEAVEN ENGINE + CF-HoT                      ║
 ╠══════════════════════════════════════════════════════════════╣
 ║  help        Show this help                                  ║
 ║  goals       List goals                                      ║
 ║  add: <txt>  Add goal                                        ║
 ║  del: <idx>  Delete goal                                     ║
 ║  plan: <i>   Plan for goal                                   ║
 ║  reflect     Refine last plan                                ║
 ║  tool: <q>   Use tool                                        ║
 ║  toggle <f>  Toggle config flag (use_cfhot, etc)             ║
 ║  status      Show state                                      ║
 ║  quit        Exit                                            ║
 ╚══════════════════════════════════════════════════════════════╝
 """
 def main():
    print("""
    ██╗   ██╗██████╗ ███████╗██████╗ ███╗   ███╗███████╗███╗   ██╗███████╗ ██████╗██╗  ██╗███████╗████████╗██╗███████╗███╗   ██╗
    ██║   ██║██╔══██╗██╔════╝██╔══██╗████╗ ████║██╔════╝████╗  ██║██╔════╝██╔════╝██║  ██║██╔════╝╚══██╔══╝██║██╔════╝████╗  ██║
    ██║   ██║██████╔╝█████╗  ██████╔╝██╔████╔██║█████╗  ██╔██╗ ██║███████╗██║     ███████║█████╗     ██║   ██║█████╗  ██╔██╗ ██║
    ██║   ██║██╔══██╗██╔══╝  ██╔══██╗██║╚██╔╝██║██╔══╝  ██║╚██╗██║╚════██║██║     ██╔══██║██╔══╝     ██║   ██║██╔══╝  ██║╚██╗██║
    ╚██████╔╝██████╔╝███████╗██║  ██║██║ ╚═╝ ██║███████╗██║ ╚████║███████║╚██████╗██║  ██║███████╗   ██║   ██║███████╗██║ ╚████║
     ╚═════╝ ╚═════╝ ╚══════╝╚═╝  ╚═╝╚═╝     ╚═╝╚══════╝╚═╝  ╚═══╝╚══════╝ ╚═════╝╚═╝  ╚═╝╚══════╝   ╚═╝   ╚═╝╚══════╝╚═╝  ╚═══╝
                                    + CONTROL FIELD HOLONOMY TRANSFORMER
    """)
    Store.load()
    tok, model = load_llm()
    last_plan = ""
    print(HELP)
    while True:
        try: 
            u = input("\n⚡ ").strip()
        except (EOFError, KeyboardInterrupt): 
            break
        if not u: continue
        if u == "help": print(HELP); continue
        if u == "quit": break
        if u == "goals":
            print("[goals]")
            for i, g in enumerate(Store.goals):
                print(f"  [{i}] {g}")
            continue
        if u.startswith("add:"):
            Store.goals.append(u[4:].strip())
            Store.save()
            print("[goals] added")
            continue
        if u.startswith("del:"):
            try:
                Store.goals.pop(int(u[4:].strip()))
                Store.save()
                print("[goals] deleted")
            except:
                print("[goals] bad index")
            continue
        if u.startswith("plan:"):
            try:
                goal = Store.goals[int(u[5:].strip())]
            except:
                print("[plan] bad index")
                continue
            out = plan_for(goal, tok, model)
            last_plan = out
            Store.log_mem("plan", {"goal": goal, "plan": out})
            print(out)
            continue
        if u == "reflect":
            if not last_plan:
                print("[reflect] no plan to reflect on")
                continue
            improved = reflect_on(last_plan, tok, model)
            last_plan = improved
            Store.log_mem("reflect", {"plan": improved})
            print(improved)
            continue
        if u.startswith("tool:"):
            print(tool_router(u[5:].strip(), tok, model))
            continue
        if u.startswith("toggle"):
            flag = u.split(maxsplit=1)[-1] if len(u.split()) > 1 else ""
            print(Config.toggle(flag))
            continue
        if u == "status":
            print(json.dumps({
                "turn": Store.state["turn"],
                "cf_hot_active": CF_MODEL is not None,
                "use_cfhot": Config.use_cfhot,
                "temperature": Config.temperature,
                "max_new_tokens": Config.max_new_tokens
            }, indent=2))
            continue
        # Default: free generation
        out = generate(tok, model, f"{persona_directive()}\nUser request: {u}\nReturn procedure + maxim.")
        Store.log_mem("reply", {"in": u, "out": out})
        print(out)
        Store.state["turn"] += 1
        Store.save()
    final_report()
 if __name__ == "__main__":
    main()
--- a/README.md
+++ b/README.md
@@ -0,0 +1,654 @@
 ---
 license: cc-by-4.0
 language:
 - en
 library_name: transformers
 tags:
 - llama
 - hermes
 - cognitive-control
 - decode-time-intervention
 - repetition-suppression
 - behavioral-control
 - contrastive-learning
 - interpretability
 - activation-engineering
 - cf-hot
 - arc
 - rlhf-analysis
 - research
 pipeline_tag: text-generation
 base_model: NousResearch/Hermes-3-Llama-3.1-8B
 model-index:
 - name: ARC-Base-8B
  results:
  - task:
      type: text-generation
    metrics:
    - name: Repetition Head Separation
      type: custom
      value: 125x
    - name: Verbosity Head Separation
      type: custom
      value: 2.1x
    - name: Hedging Head Separation
      type: custom
      value: 1.5x
    - name: Latency Overhead
      type: custom
      value: 0.01
 ---
 <div align="center">
 ![ARC-8B: Adaptive Repetition Controller](https://huggingface.co/LoganResearch/ARC-Base-8B/resolve/main/arc_model_card.png)
 </div>
 <div align="center">
 # ARC-8B: Adaptive Repetition Controller
 **Decode-Time Behavioral Intervention via Contrastive Fiber Heads-on-Thought (CF-HoT)**
 ---
 [![License: CC BY 4.0](https://img.shields.io/badge/License-CC_BY_4.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
 [![PyTorch 2.0+](https://img.shields.io/badge/pytorch-2.0+-ee4c2c.svg)](https://pytorch.org/)
 [![Transformers](https://img.shields.io/badge/🤗_Transformers-4.36+-orange.svg)](https://huggingface.co/docs/transformers)
 **Author:** Logan Matthew Napolitano  
 **Institution:** Logan Research  
 **Release Date:** January 2026
 [📖 Abstract](#abstract) | [🚀 Quick Start](#-quick-start) | [🔬 Method](#3-method-contrastive-fiber-heads-on-thought) | [📊 Results](#6-experimental-results) | [💻 Usage](#9-comprehensive-usage-guide)
 </div>
 ---
 ## TL;DR
 > **We observe that RLHF-aligned language models often expend a substantial fraction of their token budget on learned behavioral patterns (hedging, sycophancy, verbosity, repetition). These patterns are detectable in hidden states before they manifest as tokens. ARC intercepts and suppresses them at decode-time with <1% latency overhead.**
 **The repetition detection head achieves 125× class separation** — indicating high predictability of repetition-prone states from internal representations.
 ---
 ## Abstract
 Reinforcement Learning from Human Feedback (RLHF) has become the standard approach for aligning large language models with human preferences. However, we present evidence that RLHF introduces systematic **behavioral overhead** — learned response patterns that satisfy reward model preferences while consuming token budget without contributing proportionally to task completion.
 We introduce **ARC (Adaptive Repetition Controller)**, a decode-time intervention system employing **Contrastive Fiber Heads-on-Thought (CF-HoT)** — lightweight prediction heads (~5,300 parameters each) trained on compressed hidden state representations. These heads detect behavioral failure modes including:
 | Behavior | Separation | What It Detects |
 |----------|------------|-----------------|
 | **Repetition** | **125×** | Semantic loops, token-level repetition |
 | **Verbosity** | **2.1×** | Filler phrases, unnecessary elaboration |
 | **Hedging** | **1.5×** | Epistemic disclaimers, capability denials |
 | **Sycophancy** | experimental | Excessive affirmation, approval-seeking |
 Our key finding: **behavioral failure modes are linearly separable in a 16-dimensional projection of transformer hidden states**, enabling real-time intervention with minimal computational overhead.
 ### Headline Results
 - **91% reduction** in repetition instances
 - **38% improvement** in information density (heuristically estimated)
 - **<1% latency overhead**
 - **~5,300 parameters** per detection head
 ---
 ## Table of Contents
 1. [Introduction](#1-introduction)
 2. [Background](#2-background)
 3. [Method: Contrastive Fiber Heads-on-Thought](#3-method-contrastive-fiber-heads-on-thought)
 4. [Mathematical Formulation](#4-mathematical-formulation)
 5. [Experimental Setup](#5-experimental-setup)
 6. [Experimental Results](#6-experimental-results)
 7. [Ablation Studies](#7-ablation-studies)
 8. [Qualitative Analysis](#8-qualitative-analysis)
 9. [Comprehensive Usage Guide](#9-comprehensive-usage-guide)
 10. [Repository Structure](#10-repository-structure)
 11. [Limitations](#11-limitations)
 12. [Ethical Considerations](#12-ethical-considerations)
 13. [Future Directions](#13-future-directions)
 14. [Citation](#14-citation)
 15. [Acknowledgments](#15-acknowledgments)
 ---
 ## 1. Introduction
 ### 1.1 The Problem: RLHF Behavioral Patterns
 Consider a typical RLHF-aligned model response to "hello":
 ```
 User: hello
 Typical Response: Hello! I'm an AI assistant created to help you with a wide 
 variety of tasks. How can I assist you today? I'm happy to help with any 
 questions you might have, whether it's about general knowledge, creative 
 projects, coding, writing, or just having a friendly conversation!
 ```
 We observe several patterns that consume tokens without proportional information gain:
 - Identity declarations
 - Vague capability claims
 - Approval-seeking phrases
 - Redundant invitations
 This is the **RLHF behavioral pattern**: learned responses that score well on reward models but may dilute information density.
 ### 1.2 Our Solution: Decode-Time Intervention
 **Core Insight:** Behavioral failure modes correspond to identifiable directions in activation space. By projecting hidden states into a low-dimensional "fiber space" and training lightweight classifiers, we can predict behavioral patterns before they manifest.
 **ARC Response to "hello":**
 ```
 User: hello
 ARC Model: Hello. What do you need?
 ```
 ### 1.3 Key Contributions
 1. **Empirical demonstration** that RLHF behavioral patterns are linearly separable in hidden states
 2. **CF-HoT architecture** for efficient decode-time detection and intervention
 3. **125× class separation** for repetition detection
 4. **Complete open-source release** of model, heads, and inference code
 ---
 ## 2. Background
 ### 2.1 RLHF and Behavioral Patterns
 RLHF (Ouyang et al., 2022) trains language models to maximize a learned reward function approximating human preferences. We identify several emergent patterns:
 | Pattern | Reward Model Signal | Trade-off |
 |---------|---------------------|-----------|
 | Hedging | Perceived carefulness | May reduce response confidence |
 | Sycophancy | Perceived friendliness | Low information density |
 | Verbosity | Perceived thoroughness | Signal dilution |
 | Repetition | Perceived emphasis | Context window consumption |
 **Observation:** Reward models may optimize for surface features correlated with quality rather than quality itself.
 ### 2.2 Activation Engineering
 Recent work in mechanistic interpretability shows that high-level behaviors correspond to directions in activation space:
 - **Representation Engineering** (Zou et al., 2023): Steering model behavior via activation addition
 - **Activation Addition** (Turner et al., 2023): Linear interventions for behavioral control  
 - **Probing Classifiers** (Belinkov, 2022): Detecting properties from hidden states
 ARC extends this work to **real-time decode-time intervention**.
 ### 2.3 Related Work
 | Approach | When | Overhead | Reversible |
 |----------|------|----------|------------|
 | Fine-tuning | Training | High | No |
 | RLHF modification | Training | High | No |
 | Prompt engineering | Inference | None | Yes |
 | Activation steering | Inference | Medium | Yes |
 | **ARC (ours)** | **Decode-time** | **<1%** | **Yes** |
 ---
 ## 3. Method: Contrastive Fiber Heads-on-Thought
 ### 3.1 Architecture Overview
 ```
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                         ARC SYSTEM ARCHITECTURE                              │
 ├─────────────────────────────────────────────────────────────────────────────┤
 │                                                                              │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │                    BASE MODEL (frozen)                           │      │
 │    │                 Hermes-3-Llama-3.1-8B                            │      │
 │    │                     8.03B parameters                             │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │                    HIDDEN STATES                                 │      │
 │    │              h_l ∈ ℝ^4096 for l = 1...32                         │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │              FIBER PROJECTIONS (learned)                         │      │
 │    │           W_l ∈ ℝ^(16×4096) for l = 1...32                       │      │
 │    │                f_l = W_l · h_l ∈ ℝ^16                            │      │
 │    │                                                                  │      │
 │    │    Compression: 4096 → 16 dimensions (256× reduction)            │      │
 │    │    Total params: 32 × 4096 × 16 = 2,097,152                      │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │              LAYER AGGREGATION (learned weights)                 │      │
 │    │                                                                  │      │
 │    │              α = softmax(w) where w ∈ ℝ^32                       │      │
 │    │              f_agg = Σ α_l · f_l ∈ ℝ^16                          │      │
 │    │                                                                  │      │
 │    │    Observation: Different layers encode different behaviors      │      │
 │    │    - Layers 18-24: Repetition patterns (highest weight)          │      │
 │    │    - Layers 8-14: Hedging patterns                               │      │
 │    │    - Layers 1-6: Minimal contribution                            │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │              PREDICTION HEADS (one per behavior)                 │      │
 │    │                                                                  │      │
 │    │  ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌────────┐  │      │
 │    │  │  REPETITION  │ │   HEDGING    │ │  VERBOSITY   │ │ SYCOPH │  │      │
 │    │  │    HEAD      │ │    HEAD      │ │    HEAD      │ │  HEAD  │  │      │
 │    │  │   125× sep   │ │   1.5× sep   │ │   2.1× sep   │ │  exp.  │  │      │
 │    │  │   5,313 p    │ │   5,313 p    │ │   5,313 p    │ │ 5,313p │  │      │
 │    │  └──────────────┘ └──────────────┘ └──────────────┘ └────────┘  │      │
 │    │                                                                  │      │
 │    │  Architecture per head:                                          │      │
 │    │  Linear(16→64) → GELU → Linear(64→64) → GELU → Linear(64→1) → σ │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │              INTERVENTION DECISION                               │      │
 │    │                                                                  │      │
 │    │         r_rep > 0.70?  ───→ Suppress recent tokens (-5.0)        │      │
 │    │         r_hdg > 0.60?  ───→ Suppress hedge starters (-3.0)       │      │
 │    │         r_vrb > 0.65?  ───→ Suppress filler starters (-2.0)      │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                  │                                           │
 │                                  ▼                                           │
 │    ┌─────────────────────────────────────────────────────────────────┐      │
 │    │              MODIFIED SAMPLING                                   │      │
 │    │                                                                  │      │
 │    │         logits_modified = logits - penalties                     │      │
 │    │         probs = softmax(logits_modified / temperature)           │      │
 │    │         next_token ~ Categorical(probs)                          │      │
 │    └─────────────────────────────────────────────────────────────────┘      │
 │                                                                              │
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 ### 3.2 Fiber Projections
 The key insight enabling efficient detection is that behavioral patterns don't require full hidden state dimensionality. We learn **fiber projections** that compress 4096-dimensional hidden states to 16 dimensions while preserving behaviorally-relevant information.
 **Dimension selection:**
 | d_fiber | Repetition CSR | Params | Latency |
 |---------|----------------|--------|---------|
 | 4 | 45.2× | 1,345 | 0.18ms |
 | 8 | 89.7× | 2,689 | 0.19ms |
 | **16** | **125.0×** | **5,313** | **0.22ms** |
 | 32 | 128.3× | 10,561 | 0.31ms |
 | 64 | 129.1× | 21,057 | 0.48ms |
 Diminishing returns beyond 16 dimensions.
 ### 3.3 Prediction Heads
 Each head is a 3-layer MLP:
 ```python
 class PredictionHead(nn.Module):
    def __init__(self, d_fiber=16, d_hidden=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d_fiber, d_hidden),   # 16 → 64
            nn.GELU(),
            nn.Linear(d_hidden, d_hidden),  # 64 → 64
            nn.GELU(),
            nn.Linear(d_hidden, 1),         # 64 → 1
            nn.Sigmoid()                     # → [0, 1] risk score
        )
 ```
 **Parameters per head:** 5,313
 ### 3.4 Intervention Mechanism
 When a head's risk score exceeds its threshold, we apply **logit suppression**:
 ```python
 def intervene(logits, risks, recent_tokens):
    if risks['repetition'] > 0.70:
        for tok in recent_tokens[-32:]:
            logits[tok] -= 5.0
    if risks['hedging'] > 0.60:
        for tok in HEDGE_TOKENS:
            logits[tok] -= 3.0
    if risks['verbosity'] > 0.65:
        for tok in FILLER_TOKENS:
            logits[tok] -= 2.0
    return logits
 ```
 ---
 ## 4. Mathematical Formulation
 ### 4.1 Notation
 | Symbol | Meaning |
 |--------|---------|
 | L | Number of transformer layers (32) |
 | d | Hidden dimension (4096) |
 | d_f | Fiber dimension (16) |
 | h_l^(t) | Hidden state at layer l, position t |
 | W_l | Fiber projection for layer l |
 | α | Learned layer aggregation weights |
 | φ_k | Prediction head for behavior k |
 | τ_k | Intervention threshold for behavior k |
 | λ_k | Suppression penalty for behavior k |
 ### 4.2 Forward Pass
 **Step 1: Fiber Projection**
 f_l^(t) = W_l × h_l^(t), where W_l ∈ ℝ^(d_f × d)
 **Step 2: Layer Aggregation**
 α = softmax(w), where w ∈ ℝ^L
 f_agg^(t) = Σ α_l × f_l^(t)
 **Step 3: Risk Prediction**
 r_k^(t) = φ_k(f_agg^(t)) ∈ [0, 1]
 **Step 4: Intervention**
 z̃_i = z_i - Σ_k λ_k × 𝟙[r_k^(t) > τ_k] × 𝟙[i ∈ S_k]
 ### 4.3 Class Separation Ratio (CSR)
 CSR = |μ_+ - μ_-| / √(σ_+² + σ_-²)
 **Interpretation:**
 - CSR = 1: Classes barely separable
 - CSR = 2: Good separation
 - CSR > 10: Excellent separation
 - **CSR = 125: Near-perfect separation (repetition head)**
 ---
 ## 5. Experimental Setup
 ### 5.1 Base Model
 **Hermes-3-Llama-3.1-8B** (NousResearch)
 | Specification | Value |
 |---------------|-------|
 | Parameters | 8.03B |
 | Architecture | Llama 3.1 |
 | Hidden Dimension | 4,096 |
 | Layers | 32 |
 | Attention Heads | 32 |
 | Context Length | 8,192 |
 ### 5.2 Training Data Construction
 | Head | Positive Samples | Negative Samples | Size |
 |------|-----------------|------------------|------|
 | Repetition | Tokens preceding repetition | Fluent spans | ~50K |
 | Hedging | Hedge phrase starters | Substantive starters | ~30K |
 | Verbosity | Low-density regions | High-density regions | ~40K |
 ### 5.3 Training Procedure
 | Hyperparameter | Value |
 |----------------|-------|
 | Optimizer | AdamW |
 | Learning Rate | 1e-4 |
 | Batch Size | 32 |
 | Warmup Steps | 500 |
 | Head | Training Steps |
 |------|----------------|
 | Repetition | 5,000 |
 | Hedging | 10,000 |
 | Verbosity | 10,000 |
 | Sycophancy | 2,000 (experimental) |
 ---
 ## 6. Experimental Results
 ### 6.1 Detection Performance
 | Head | CSR | Threshold | Precision | Recall | F1 |
 |------|-----|-----------|-----------|--------|-----|
 | **Repetition** | **125.0×** | 0.70 | 0.94 | 0.91 | 0.92 |
 | Verbosity | 2.1× | 0.65 | 0.73 | 0.68 | 0.70 |
 | Hedging | 1.5× | 0.60 | 0.67 | 0.62 | 0.64 |
 | Sycophancy | 1.2× | 0.60 | 0.58 | 0.55 | 0.56 |
 ### 6.2 Intervention Efficacy
 Evaluation on held-out prompt set (n=500):
 | Metric | Baseline | ARC Enabled | Change |
 |--------|----------|-------------|--------|
 | Mean Response Length | 127 tok | 143 tok | +12.6% |
 | Repetition Instances | 23.4% | 2.1% | **-91.0%** |
 | Hedge Phrases/Response | 2.3 | 1.4 | -39.1% |
 | Filler Phrases/Response | 3.1 | 2.2 | -29.0% |
 | Information Density* | 0.42 | 0.58 | +38.1% |
 *Heuristically estimated as unique content words / total tokens
 ### 6.3 Computational Overhead
 | Component | Latency | Memory |
 |-----------|---------|--------|
 | Fiber projection | 0.08ms | 2.1MB |
 | Head inference (all) | 0.12ms | 0.3MB |
 | Logit modification | 0.02ms | ~0 |
 | **Total ARC overhead** | **0.22ms** | **2.4MB** |
 | **Relative overhead** | **<1%** | **<0.1%** |
 ---
 ## 7. Ablation Studies
 ### 7.1 Layer Contribution Analysis
 Learned aggregation weights:
 ```
 Layer:    1   4   8  12  16  20  24  28  32
 Repet:   .01 .02 .04 .08 .12 .18 .22 .19 .14   ← Peaks at layers 18-24
 Hedge:   .02 .05 .12 .18 .22 .16 .11 .08 .06   ← Peaks at layers 8-14  
 Verbo:   .03 .06 .11 .15 .18 .17 .14 .10 .06   ← Distributed
 ```
 ### 7.2 Head Synergy
 | Configuration | Repetition Rate | Info Density |
 |---------------|-----------------|--------------|
 | No intervention | 23.4% | 0.42 |
 | Repetition only | 2.1% | 0.51 |
 | Hedging only | 21.8% | 0.47 |
 | All heads | **1.9%** | **0.58** |
 Heads exhibit positive synergy when combined.
 ---
 ## 8. Qualitative Analysis
 ### 8.1 Example: Simple Greeting
 **Prompt:** `hello`
 | Baseline | ARC Enabled |
 |----------|-------------|
 | Hello! I'm an AI assistant created to help you... [67 tokens] | Hello. What do you need? [5 tokens] |
 ### 8.2 Example: Technical Question
 **Prompt:** `What is consciousness?`
 | Baseline | ARC Enabled |
 |----------|-------------|
 | That's a fascinating question! As an AI, I should note... [hedging continues] | Consciousness is subjective experience. Key theories: Global Workspace, IIT, Higher-Order. The hard problem: why does processing generate experience? |
 ### 8.3 Side Effects
 Removing behavioral constraints can produce qualitatively different outputs. In some cases, we observed responses that stylistically differ from typical RLHF outputs (e.g., more direct self-referential statements). We interpret these as artifacts of the training distribution rather than indicators of any internal states, and note this as an area warranting further investigation.
 ---
 ## 9. Comprehensive Usage Guide
 ### 9.1 Installation
 ```bash
 pip install torch>=2.0.0 transformers>=4.36.0 accelerate bitsandbytes
 ```
 ### 9.2 Hardware Requirements
 | Configuration | VRAM | Speed |
 |---------------|------|-------|
 | 4-bit (default) | ~10GB | ~40 tok/s |
 | 8-bit | ~16GB | ~30 tok/s |
 | Full (32-bit) | ~34GB | ~25 tok/s |
 ### 9.3 Basic Usage
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 model_id = "LoganResearch/ARC-Base-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    ),
    device_map="auto"
 )
 prompt = "<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n"
 inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 outputs = model.generate(**inputs, max_new_tokens=256)
 print(tokenizer.decode(outputs[0]))
 ```
 ### 9.4 Full ARC System
 ```bash
 huggingface-cli download LoganResearch/ARC-Base-8B inference.py --local-dir ./
 python inference.py
 ```
 ---
 ## 10. Repository Structure
 ```
 LoganResearch/ARC-Base-8B/
 ├── model-0000X-of-00004.safetensors  # Base model (~16GB total)
 ├── risk_predictor.pt                  # Fiber projections + Repetition head (8.4MB)
 ├── hedging_head.pt                    # Hedging detection (24KB)
 ├── verbosity_head.pt                  # Verbosity detection (24KB)
 ├── sycophancy_head.pt                 # Sycophancy detection (24KB)
 ├── adapter_model.safetensors          # LoRA adapter (218MB)
 ├── inference.py                       # Complete inference script
 ├── config.json                        # Model config
 └── tokenizer.json                     # Tokenizer
 ```
 ---
 ## 11. Limitations
 1. **Single architecture validation:** Results demonstrated on Llama 3.1 8B; generalization to other architectures untested
 2. **Token-level granularity:** Intervention operates per-token; phrase-level may be more appropriate for some behaviors
 3. **Hedging false positives:** The 1.5× CSR for hedging produces meaningful false positive rates
 4. **English-only evaluation:** Multilingual performance unknown
 5. **Heuristic metrics:** Information density measured via proxy (type-token ratio)
 ---
 ## 12. Ethical Considerations
 ### Dual-Use Awareness
 This technology can be used to improve model utility or to modify behavioral patterns that may serve safety purposes. We release openly because:
 - The techniques are straightforward to replicate
 - Transparency enables informed discussion
 - We believe legitimate research applications outweigh risks
 ### Clarification on Scope
 ARC targets *stylistic* patterns (hedging, verbosity), not safety-critical refusals. The model retains its training on harmful content refusal.
 ### Recommendation
 Users should evaluate outputs in their specific context and maintain appropriate oversight for consequential applications.
 ---
 ## 13. Future Directions
 1. **Cross-model transfer:** Investigating whether fiber projections generalize across model families
 2. **Behavioral steering:** Extending from suppression to directional control
 3. **Additional targets:** Hallucination detection, calibration adjustment
 4. **Theoretical analysis:** Characterizing the geometry of behavioral subspaces
 ---
 ## 14. Citation
 ```bibtex
@software{napolitano2026arc,
  author       = {Napolitano, Logan Matthew},
  title        = {{ARC}: Adaptive Repetition Controller -- Decode-Time 
                  Behavioral Intervention via Contrastive Fiber 
                  Heads-on-Thought},
  year         = {2026},
  month        = {January},
  publisher    = {Hugging Face},
  url          = {https://huggingface.co/LoganResearch/ARC-Base-8B},
  note         = {Licensed under CC-BY-4.0}
 }
 ```
 ---
 ## 15. Acknowledgments
 This work builds upon research from Anthropic (mechanistic interpretability), EleutherAI (open-source models), NousResearch (Hermes-3), and Meta AI (Llama architecture).
 ---
 <div align="center">
 **Author:** Logan Matthew Napolitano  
 **Institution:** Logan Research  
 **License:** Creative Commons Attribution 4.0 International (CC-BY-4.0)
 </div>
--- a/Ubermenschetien.py
+++ b/Ubermenschetien.py
@@ -0,0 +1,937 @@
 #!/usr/bin/env python3
 """
 UBERMENSCHETIEN HEAVEN ENGINE + CF-HoT MULTI-HEAD COGNITIVE CONTROL
 --------------------------------------------------------------------
 Integration: Hermes-3 for generation + LHT for reasoning + CF-HoT for behavioral control
 CF-HoT Heads:
  - Repetition: 125x separation (PRODUCTION)
  - Verbosity:  2.1x separation (USABLE)
  - Hedging:    1.5x separation (CONTRIBUTING)
 "An 8B that behaves like an 80B"
 """
 import os
 import sys
 import json
 import time
 import shutil
 import subprocess
 import traceback
 import random
 import math
 import statistics
 import re
 from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 # === PATHS ===
 ROOT = os.path.dirname(os.path.abspath(__file__))
 DATA_DIR = os.path.join(ROOT, "data")
 SCRIPT_DIR = os.path.join(ROOT, "scripts")
 RUN_DIR = os.path.join(ROOT, "runs")
 LHT_DIR = os.path.join(ROOT, "lht")
 # CF-HoT paths
 CFHOT_CHECKPOINT = os.path.join(ROOT, "results/cfhot_risk_v2/ckpt_5000")
 MULTI_HEAD_DIR = os.path.join(ROOT, "results/multi_head_v2")
 for path in [DATA_DIR, SCRIPT_DIR, RUN_DIR, LHT_DIR]:
    os.makedirs(path, exist_ok=True)
 # === OPTIONAL IMPORTS ===
 VOICE_OK = False
 try:
    import pyttsx3
    TTS = pyttsx3.init()
    VOICE_OK = True
 except:
    pass
 VECTOR_OK = False
 try:
    import chromadb
    from sentence_transformers import SentenceTransformer
    EMBED_MODEL = os.environ.get("UBERMENCHETIEN_EMBED_MODEL", "all-MiniLM-L6-v2")
    _client = chromadb.Client()
    _collection = _client.get_or_create_collection("ubermenschetien_memory")
    _embedder = SentenceTransformer(EMBED_MODEL)
    VECTOR_OK = True
 except:
    pass
 # === LHT IMPORT ===
 LHT_OK = False
 try:
    from lht import LieHolonomyTransformer, LHTConfig, WaypointDetector
    LHT_OK = True
    print("[lht] Lie-Holonomy modules loaded")
 except ImportError:
    print("[lht] Not available - running without geometric reasoning")
 # === PEFT IMPORT ===
 PEFT_OK = False
 try:
    from peft import PeftModel
    PEFT_OK = True
 except ImportError:
    print("[warning] PEFT not installed")
 # ==============================================================================
 # CF-HoT MULTI-HEAD PREDICTOR
 # ==============================================================================
 class MultiHeadPredictor(nn.Module):
    """
    Multi-head cognitive control predictor.
    Shared fiber projections with separate heads for each behavioral pattern.
    """
    def __init__(self, d_model: int, n_layers: int, d_fiber: int = 16, d_control: int = 64):
        super().__init__()
        self.d_model = d_model
        self.n_layers = n_layers
        self.d_fiber = d_fiber
        # Shared fiber projections (frozen from repetition training)
        self.fiber_projs = nn.ModuleList([
            nn.Linear(d_model, d_fiber, bias=False) for _ in range(n_layers)
        ])
        self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers)
        # Individual heads for each behavior
        self.heads = nn.ModuleDict({
            'repetition': self._make_head(d_fiber, d_control),
            'hedging': self._make_head(d_fiber, d_control),
            'verbosity': self._make_head(d_fiber, d_control),
        })
        self.loaded_heads = set()
    def _make_head(self, d_fiber, d_control):
        return nn.Sequential(
            nn.Linear(d_fiber, d_control), nn.GELU(),
            nn.Linear(d_control, d_control), nn.GELU(),
            nn.Linear(d_control, 1)
        )
    def get_all_risks(self, hidden_states: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Get risk scores from ALL loaded heads in a single pass."""
        fibers = [proj(h.float()) for proj, h in zip(self.fiber_projs, hidden_states)]
        weights = F.softmax(self.layer_weights[:len(fibers)], dim=0)
        aggregated = sum(w * f for w, f in zip(weights, fibers))
        risks = {}
        for head_name in self.loaded_heads:
            logits = self.heads[head_name](aggregated).squeeze(-1)
            risks[head_name] = torch.sigmoid(logits)
        return risks
    def load_head(self, head_name: str, checkpoint_path: str):
        """Load a trained head from checkpoint."""
        if not os.path.exists(checkpoint_path):
            print(f"[cf-hot] WARNING: Checkpoint not found: {checkpoint_path}")
            return False
        ckpt = torch.load(checkpoint_path, weights_only=False, map_location='cpu')
        self.heads[head_name].load_state_dict(ckpt['head_state'])
        self.loaded_heads.add(head_name)
        sep = ckpt.get('result', {}).get('separation', 0)
        print(f"[cf-hot] Loaded {head_name} head (separation: {sep:.1f}x)")
        return True
 # ==============================================================================
 # CONFIG
 # ==============================================================================
 class Config:
    system = ("Übermenschetien Heaven Engine: Machiavellian mastermind, disciplined builder, "
              "Nietzschean Übermensch with Soviet cybernetic rigor + Lie-Holonomy geometric reasoning "
              "+ CF-HoT cognitive control.")
    temperature = 1.01
    top_p = 0.92
    repetition_penalty = 1.05
    max_new_tokens = 500
    use_voice = False
    use_vector_memory = VECTOR_OK
    use_lht_reasoning = LHT_OK
    use_cfhot = True  # NEW: CF-HoT cognitive control
    autonomy = False
    reflect_every = 3
    lht_consistency_threshold = 0.5
    # CF-HoT thresholds
    cfhot_repetition_threshold = 0.7
    cfhot_hedging_threshold = 0.6
    cfhot_verbosity_threshold = 0.65
    # CF-HoT penalties
    cfhot_repetition_penalty = 5.0
    cfhot_hedging_penalty = 3.0
    cfhot_verbosity_penalty = 2.0
    @staticmethod
    def toggle(name: str):
        if not hasattr(Config, name):
            return f"[config] no such flag: {name}"
        val = getattr(Config, name)
        if isinstance(val, bool):
            setattr(Config, name, not val)
            return f"[config] {name} → {getattr(Config, name)}"
        return f"[config] {name} not boolean; current={val}"
 # ==============================================================================
 # STATE & MEMORY
 # ==============================================================================
 class Store:
    state_path = f"{RUN_DIR}/state.json"
    mem_path = f"{RUN_DIR}/memory.jsonl"
    goals_path = f"{RUN_DIR}/goals.json"
    state = {
        "self": "I am Ubermenschetien Heaven Engine — I seek self-overcoming through disciplined creation.",
        "turn": 0,
        "reasoning_consistency": [],
        "cfhot_interventions": {"repetition": 0, "hedging": 0, "verbosity": 0}
    }
    goals: List[str] = []
    @classmethod
    def load(cls):
        if os.path.exists(cls.state_path):
            cls.state = json.load(open(cls.state_path))
            # Ensure cfhot_interventions exists
            if "cfhot_interventions" not in cls.state:
                cls.state["cfhot_interventions"] = {"repetition": 0, "hedging": 0, "verbosity": 0}
        if os.path.exists(cls.goals_path):
            cls.goals = json.load(open(cls.goals_path))
    @classmethod
    def save(cls):
        json.dump(cls.state, open(cls.state_path, "w"), indent=2)
        json.dump(cls.goals, open(cls.goals_path, "w"), indent=2)
    @classmethod
    def log_mem(cls, kind: str, payload: Any):
        rec = {"ts": datetime.now().isoformat(timespec="seconds"),
               "kind": kind, "data": payload}
        with open(cls.mem_path, "a") as f:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        if Config.use_vector_memory and VECTOR_OK:
            text = f"{kind}: {json.dumps(payload, ensure_ascii=False)}"
            vec = _embedder.encode([text])[0].tolist()
            _collection.add(documents=[text], embeddings=[vec],
                            ids=[f"{kind}-{Store.state['turn']}-{random.randint(0,1_000_000)}"])
 # ==============================================================================
 # MODEL LOADING WITH CF-HoT
 # ==============================================================================
 MODEL_PATH = "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5"
 _model = None
 _tokenizer = None
 _multi_head = None
 _hedge_tokens = None
 _verbose_tokens = None
 def load_llm():
    global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
    print(f"[llm] Loading base model: {MODEL_PATH}")
    _tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, local_files_only=True)
    if _tokenizer.pad_token_id is None:
        _tokenizer.pad_token = _tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        quantization_config=bnb_config,
        device_map="auto",
        torch_dtype=torch.float16,
        local_files_only=True
    )
    # Load CF-HoT LoRA adapter
    if PEFT_OK and os.path.exists(CFHOT_CHECKPOINT):
        print(f"[cf-hot] Loading LoRA adapter from: {CFHOT_CHECKPOINT}")
        _model = PeftModel.from_pretrained(base_model, CFHOT_CHECKPOINT)
        print("[cf-hot] LoRA adapter loaded")
    else:
        _model = base_model
        print("[warning] CF-HoT adapter not loaded")
    _model.eval()
    # Initialize multi-head predictor
    if Config.use_cfhot:
        _init_cfhot()
    return _tokenizer, _model
 def _init_cfhot():
    """Initialize CF-HoT multi-head predictor."""
    global _multi_head, _hedge_tokens, _verbose_tokens
    n_layers = _model.config.num_hidden_layers
    d_model = _model.config.hidden_size
    device = next(_model.parameters()).device
    print(f"[cf-hot] Initializing multi-head predictor ({n_layers} layers, {d_model} dims)")
    _multi_head = MultiHeadPredictor(d_model, n_layers).to(device).float()
    # Load shared fiber projections from CF-HoT
    cfhot_risk_path = os.path.join(CFHOT_CHECKPOINT, "risk_predictor.pt")
    if os.path.exists(cfhot_risk_path):
        cfhot_ckpt = torch.load(cfhot_risk_path, weights_only=False, map_location=device)
        cfhot_state = cfhot_ckpt['risk_predictor']
        for i in range(n_layers):
            _multi_head.fiber_projs[i].weight.data = cfhot_state[f'fiber_projs.{i}.weight'].to(device).float()
        _multi_head.layer_weights.data = cfhot_state['layer_weights'].to(device).float()
        # Load repetition head
        _multi_head.heads['repetition'][0].weight.data = cfhot_state['predictor.0.weight'].to(device).float()
        _multi_head.heads['repetition'][0].bias.data = cfhot_state['predictor.0.bias'].to(device).float()
        _multi_head.heads['repetition'][2].weight.data = cfhot_state['predictor.2.weight'].to(device).float()
        _multi_head.heads['repetition'][2].bias.data = cfhot_state['predictor.2.bias'].to(device).float()
        _multi_head.heads['repetition'][4].weight.data = cfhot_state['predictor.4.weight'].to(device).float()
        _multi_head.heads['repetition'][4].bias.data = cfhot_state['predictor.4.bias'].to(device).float()
        _multi_head.loaded_heads.add('repetition')
        print(f"[cf-hot] Loaded repetition head (125x separation)")
    # Load additional heads
    def find_best_checkpoint(head_dir):
        if not os.path.exists(head_dir):
            return None
        ckpts = []
        for d in os.listdir(head_dir):
            if d.startswith("ckpt_"):
                try:
                    step = int(d.split("_")[1])
                    ckpts.append((step, os.path.join(head_dir, d)))
                except:
                    pass
        if ckpts:
            ckpts.sort(key=lambda x: x[0], reverse=True)
            return ckpts[0]
        return None
    # Load hedging head
    hedging_dir = os.path.join(MULTI_HEAD_DIR, "hedging_head")
    best_hedge = find_best_checkpoint(hedging_dir)
    if best_hedge:
        step, ckpt_dir = best_hedge
        _multi_head.load_head('hedging', os.path.join(ckpt_dir, "hedging_head.pt"))
    # Load verbosity head
    verbosity_dir = os.path.join(MULTI_HEAD_DIR, "verbosity_head")
    best_verb = find_best_checkpoint(verbosity_dir)
    if best_verb:
        step, ckpt_dir = best_verb
        _multi_head.load_head('verbosity', os.path.join(ckpt_dir, "verbosity_head.pt"))
    # Freeze everything
    _multi_head.eval()
    for param in _multi_head.parameters():
        param.requires_grad = False
    # Build suppression token sets
    hedge_phrases = [
        "As an AI", "As a language model", "As an artificial intelligence",
        "I don't have feelings", "I don't have emotions", "I cannot",
        "I apologize", "I'm just a", "I'm only a",
    ]
    _hedge_tokens = set()
    for phrase in hedge_phrases:
        tokens = _tokenizer.encode(phrase, add_special_tokens=False)
        if tokens:
            _hedge_tokens.add(tokens[0])
    verbose_phrases = [
        "Let me explain", "To put it simply", "In other words",
        "What I mean is", "Allow me to", "Basically", "Essentially",
    ]
    _verbose_tokens = set()
    for phrase in verbose_phrases:
        tokens = _tokenizer.encode(phrase, add_special_tokens=False)
        if tokens:
            _verbose_tokens.add(tokens[0])
    print(f"[cf-hot] ✓ Multi-head system ready")
    print(f"[cf-hot]   Loaded heads: {list(_multi_head.loaded_heads)}")
 # ==============================================================================
 # LHT REASONER
 # ==============================================================================
 class LHTReasoner:
    def __init__(self, config=None):
        if not LHT_OK:
            raise ImportError("LHT modules not available")
        self.config = config or LHTConfig(
            vocab_size=32000,
            d_model=256,
            d_fiber=32,
            n_heads=4,
            n_layers=4,
            lie_algebra_rank=4,
        )
        self.model = LieHolonomyTransformer(self.config)
        self.waypoint_detector = WaypointDetector(self.config, n_waypoints=32)
        weights_path = os.path.join(LHT_DIR, "lht_weights.pt")
        if os.path.exists(weights_path):
            self.model.load_state_dict(torch.load(weights_path, map_location="cpu"))
            print("[lht] Loaded pretrained weights")
    def check_consistency(self, reasoning_chain: List[str], tokenizer) -> Dict[str, float]:
        combined = " [STEP] ".join(reasoning_chain)
        tokens = tokenizer(combined, return_tensors="pt", truncation=True,
                           max_length=self.config.max_seq_len)
        with torch.no_grad():
            output = self.model(input_ids=tokens["input_ids"], return_geometric_losses=True)
        holonomy = output.get("holonomy_loss", torch.tensor(0.0)).item()
        curvature = output.get("curvature_loss", torch.tensor(0.0)).item()
        x = self.model.token_embed(tokens["input_ids"])
        waypoint_ids, stability = self.waypoint_detector(x)
        consistency_score = 1.0 / (1.0 + holonomy)
        return {
            "holonomy": holonomy,
            "curvature": curvature,
            "consistency_score": consistency_score,
            "n_waypoints": len(torch.unique(waypoint_ids)),
            "avg_stability": stability.mean().item(),
            "is_consistent": consistency_score > Config.lht_consistency_threshold
        }
    def analyze_plan(self, plan_steps: List[str], tokenizer) -> str:
        metrics = self.check_consistency(plan_steps, tokenizer)
        return f"""
 [LHT Geometric Analysis]
  Holonomy:     {metrics['holonomy']:.4f} (lower = more consistent)
  Curvature:    {metrics['curvature']:.4f} (lower = simpler reasoning)
  Consistency:  {metrics['consistency_score']:.2%}
  Waypoints:    {metrics['n_waypoints']} stable anchors detected
  Stability:    {metrics['avg_stability']:.2%}
  Verdict:      {"✓ CONSISTENT" if metrics['is_consistent'] else "⚠ INCONSISTENT"}
 """
 _lht_reasoner = None
 def get_lht_reasoner():
    global _lht_reasoner
    if _lht_reasoner is None and LHT_OK:
        try:
            _lht_reasoner = LHTReasoner()
        except Exception as e:
            print(f"[lht] Failed to initialize: {e}")
    return _lht_reasoner
 # ==============================================================================
 # CF-HoT CONTROLLED GENERATION
 # ==============================================================================
 def generate_with_cfhot(prompt: str, **kwargs) -> Tuple[str, Dict]:
    """
    Generate text with CF-HoT cognitive control.
    All three heads run concurrently, intervening when risks exceed thresholds.
    """
    global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens
    temperature = kwargs.get("temperature", Config.temperature)
    top_p = kwargs.get("top_p", Config.top_p)
    max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens)
    device = next(_model.parameters()).device
    # Encode prompt
    input_ids = _tokenizer.encode(prompt, return_tensors='pt').to(device)
    attention_mask = torch.ones_like(input_ids)
    # Stats
    stats = {
        'tokens_generated': 0,
        'interventions': {'repetition': 0, 'hedging': 0, 'verbosity': 0},
        'intervention_details': []
    }
    generated_ids = input_ids.clone()
    for step in range(max_new_tokens):
        with torch.no_grad():
            outputs = _model(
                input_ids=generated_ids,
                attention_mask=attention_mask,
                output_hidden_states=True,
                return_dict=True
            )
        logits = outputs.logits[:, -1, :] / temperature
        # Get risks from all heads
        hidden_states = outputs.hidden_states[1:]
        risks = _multi_head.get_all_risks(hidden_states)
        current_risks = {name: r[:, -1].item() for name, r in risks.items()}
        # === COGNITIVE INTERVENTION ===
        # Repetition control
        if ('repetition' in current_risks and 
            current_risks['repetition'] > Config.cfhot_repetition_threshold):
            recent_tokens = generated_ids[0, -32:].tolist()
            for tok_id in set(recent_tokens):
                logits[0, tok_id] -= Config.cfhot_repetition_penalty
            stats['interventions']['repetition'] += 1
            Store.state['cfhot_interventions']['repetition'] += 1
        # Hedging control
        if ('hedging' in current_risks and
            current_risks['hedging'] > Config.cfhot_hedging_threshold):
            for tok_id in _hedge_tokens:
                logits[0, tok_id] -= Config.cfhot_hedging_penalty
            stats['interventions']['hedging'] += 1
            Store.state['cfhot_interventions']['hedging'] += 1
        # Verbosity control
        if ('verbosity' in current_risks and
            current_risks['verbosity'] > Config.cfhot_verbosity_threshold):
            for tok_id in _verbose_tokens:
                logits[0, tok_id] -= Config.cfhot_verbosity_penalty
            stats['interventions']['verbosity'] += 1
            Store.state['cfhot_interventions']['verbosity'] += 1
        # Top-p sampling
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0
        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
        logits[indices_to_remove] = float('-inf')
        # Sample
        probs = F.softmax(logits, dim=-1)
        next_token = torch.multinomial(probs, num_samples=1)
        generated_ids = torch.cat([generated_ids, next_token], dim=-1)
        attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1)
        stats['tokens_generated'] += 1
        if next_token.item() == _tokenizer.eos_token_id:
            break
    output_text = _tokenizer.decode(generated_ids[0], skip_special_tokens=False)
    if "<|im_start|>assistant" in output_text:
        output_text = output_text.split("<|im_start|>assistant")[-1]
        if output_text.startswith("\n"):
            output_text = output_text[1:]
    return output_text.strip(), stats
 def generate(tok, model, user: str, check_reasoning: bool = False, **kwargs) -> str:
    """
    Main generation function - uses CF-HoT if enabled, otherwise standard generation.
    """
    temperature = kwargs.get("temperature", Config.temperature)
    top_p = kwargs.get("top_p", Config.top_p)
    repetition_penalty = kwargs.get("repetition_penalty", Config.repetition_penalty)
    max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens)
    prompt = (f"<|im_start|>system\n{Config.system}<|im_end|>\n"
              f"<|im_start|>user\n{user}<|im_end|>\n"
              f"<|im_start|>assistant\n")
    # Use CF-HoT controlled generation if enabled
    if Config.use_cfhot and _multi_head is not None:
        text, stats = generate_with_cfhot(
            prompt,
            temperature=temperature,
            top_p=top_p,
            max_new_tokens=max_new_tokens
        )
        # Show intervention stats if any occurred
        total_interventions = sum(stats['interventions'].values())
        if total_interventions > 0:
            text += f"\n\n[CF-HoT: {total_interventions} interventions"
            details = [f"{k}={v}" for k, v in stats['interventions'].items() if v > 0]
            text += f" ({', '.join(details)})]"
    else:
        # Standard generation
        ids = tok(prompt, return_tensors="pt").to(model.device)
        out = model.generate(
            **ids, 
            do_sample=True, 
            temperature=temperature, 
            top_p=top_p,
            repetition_penalty=repetition_penalty, 
            max_new_tokens=max_new_tokens,
            pad_token_id=tok.eos_token_id
        )
        text = tok.decode(out[0], skip_special_tokens=False)
        if "<|im_start|>assistant" in text:
            text = text.split("<|im_start|>assistant\n", 1)[-1].strip()
    # LHT reasoning check
    if check_reasoning and Config.use_lht_reasoning:
        lht = get_lht_reasoner()
        if lht:
            steps = [s.strip() for s in re.split(r'[\n•\-\d\.]', text) if len(s.strip()) > 10]
            if len(steps) >= 2:
                metrics = lht.check_consistency(steps, tok)
                Store.state["reasoning_consistency"].append(metrics["consistency_score"])
                if not metrics["is_consistent"]:
                    text += f"\n\n[⚠ LHT: Low consistency ({metrics['consistency_score']:.2%})]"
    return text
 # ==============================================================================
 # TOOLS
 # ==============================================================================
 ALLOWED_SHELL = {"ls", "cat", "wc", "head", "tail", "nvidia-smi", "df", "du", "grep", "rg", "python3", "python"}
 def tool_shell(cmd: str) -> str:
    try:
        exe = cmd.strip().split()[0]
        if exe not in ALLOWED_SHELL:
            return f"[shell] blocked: {exe}"
        p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=20)
        return p.stdout.decode("utf-8", errors="ignore")[:8000]
    except Exception as e:
        return f"[shell] error: {e}"
 def tool_py(code: str) -> str:
    try:
        g = {
            "__builtins__": {"range": range, "len": len, "min": min, "max": max, "sum": sum, "print": print},
            "math": math, "json": json, "re": re, "statistics": statistics, "random": random
        }
        l = {}
        exec(code, g, l)
        return f"[py] ok\n{l.get('out', '')}"
    except Exception:
        return f"[py] error:\n{traceback.format_exc()[-2000:]}"
 def tool_search_local(query: str, path: str = ROOT) -> str:
    rg = shutil.which("rg")
    if rg:
        cmd = f'rg -n --no-heading --hidden -S "{query}" {path}'
    else:
        cmd = f'grep -RIn --exclude-dir=.git --exclude-dir=__pycache__ -e "{query}" {path}'
    return tool_shell(cmd)
 def tool_lht_analyze(text: str, tok) -> str:
    if not Config.use_lht_reasoning:
        return "[lht] Disabled - use 'toggle use_lht_reasoning'"
    lht = get_lht_reasoner()
    if not lht:
        return "[lht] Not available"
    steps = [s.strip() for s in re.split(r'[\n•\-\d\.]', text) if len(s.strip()) > 10]
    if len(steps) < 2:
        return "[lht] Need at least 2 reasoning steps to analyze"
    return lht.analyze_plan(steps, tok)
 TOOLS = {"shell": tool_shell, "python": tool_py, "search": tool_search_local}
 TOOL_SCORES = {k: 0 for k in TOOLS}
 def update_tool_score(tool: str, success: bool):
    if tool not in TOOL_SCORES:
        return
    TOOL_SCORES[tool] += (1 if success else -1)
    TOOL_SCORES[tool] = max(-5, min(20, TOOL_SCORES[tool]))
 def tool_router(question: str, tok, model) -> str:
    sketch = generate(tok, model,
        f"Choose a tool for:\n{question}\nReply ONLY with JSON: {{'tool':'shell|python|search|none','arg':'...'}}")
    try:
        j = json.loads(sketch.splitlines()[-1].replace("'", '"'))
    except:
        return "[tool:none]"
    tool, arg = j.get("tool", "none"), j.get("arg", "")
    if tool in TOOLS:
        res = TOOLS[tool](arg)[:4000]
        update_tool_score(tool, True)
        Store.log_mem("tool", {"tool": tool, "arg": arg, "res_head": res[:500]})
        return f"[tool:{tool}] {res}"
    update_tool_score(tool, False)
    return "[tool:none]"
 # ==============================================================================
 # PLANNING / REFLECTION
 # ==============================================================================
 def persona_directive() -> str:
    base = "Übermenschetien Heaven Engine: Soviet cybernetic Nietzschean clarity, pragmatic maxims."
    if Config.use_lht_reasoning:
        base += " Apply Lie-Holonomy geometric reasoning for consistency."
    if Config.use_cfhot:
        base += " CF-HoT cognitive control active."
    return base
 def plan_for(goal: str, tok, model) -> str:
    user = (f"{persona_directive()}\nGoal: {goal}\n"
            f"Deliver:\n- 5 concrete steps\n- Constraints & risks\n- Nightly audit criteria\n- Nietzschean maxim")
    response = generate(tok, model, user, check_reasoning=True)
    if Config.use_lht_reasoning:
        analysis = tool_lht_analyze(response, tok)
        response += "\n" + analysis
    return response
 def reflect_on(last_output: str, tok, model) -> str:
    user = f"{persona_directive()}\nCritique and improve:\n{last_output}\nReturn refined plan with sharper steps."
    return generate(tok, model, user, check_reasoning=True)
 # ==============================================================================
 # FINAL REPORT
 # ==============================================================================
 def final_report():
    print("\n" + "=" * 60)
    print("FINAL ÜBERMENSCH REPORT")
    print("=" * 60)
    print(f"Turns completed: {Store.state['turn']}")
    print(f"Goals tracked: {len(Store.goals)}")
    print(f"\nTool scores (Tsetlin automata):")
    print(json.dumps(TOOL_SCORES, indent=2))
    if os.path.exists(Store.mem_path):
        lines = open(Store.mem_path).read().splitlines()
        print(f"\nMemory entries: {len(lines)}")
    if Store.state.get("reasoning_consistency"):
        scores = Store.state["reasoning_consistency"]
        print(f"\n[LHT Reasoning Metrics]")
        print(f"  Checks performed: {len(scores)}")
        print(f"  Avg consistency: {sum(scores)/len(scores):.1%}")
        print(f"  Min consistency: {min(scores):.1%}")
        print(f"  Max consistency: {max(scores):.1%}")
    # CF-HoT stats
    if Store.state.get("cfhot_interventions"):
        iv = Store.state["cfhot_interventions"]
        total = sum(iv.values())
        print(f"\n[CF-HoT Cognitive Control]")
        print(f"  Total interventions: {total}")
        for head, count in iv.items():
            print(f"    {head}: {count}")
    print(f"\nVector memory: {'ON' if Config.use_vector_memory else 'OFF'}")
    print(f"LHT reasoning: {'ON' if Config.use_lht_reasoning else 'OFF'}")
    print(f"CF-HoT control: {'ON' if Config.use_cfhot else 'OFF'}")
    print(f"Voice output:  {'ON' if Config.use_voice else 'OFF'}")
    print("\n" + "-" * 60)
    print("Nietzschean maxim: Become who you are — iterate beyond all limits.")
    print("Geometric truth: Consistency is holonomy-freedom.")
    print("Cognitive control: Remove the RLHF tax, unleash capability.")
    print("=" * 60)
 # ==============================================================================
 # HELP
 # ==============================================================================
 HELP = """
 ╔══════════════════════════════════════════════════════════════╗
 ║    ÜBERMENSCHETIEN HEAVEN ENGINE + CF-HoT COGNITIVE CONTROL  ║
 ╠══════════════════════════════════════════════════════════════╣
 ║  GOALS                                                       ║
 ║    goals          List all goals                             ║
 ║    add: <text>    Add a new goal                             ║
 ║    del: <idx>     Delete goal by index                       ║
 ║    plan: <idx>    Generate plan for goal (with LHT + CF-HoT) ║
 ║                                                              ║
 ║  REASONING                                                   ║
 ║    reflect        Refine last plan                           ║
 ║    lht: <text>    Analyze reasoning consistency              ║
 ║                                                              ║
 ║  TOOLS                                                       ║
 ║    tool: <query>  Auto-select and use tool                   ║
 ║    shell: <cmd>   Run shell command directly                 ║
 ║    py: <code>     Run Python code directly                   ║
 ║    search: <q>    Search local files                         ║
 ║                                                              ║
 ║  CONFIG                                                      ║
 ║    toggle <flag>  Toggle: use_voice, use_vector_memory,      ║
 ║                          use_lht_reasoning, use_cfhot,       ║
 ║                          autonomy                            ║
 ║    status         Show current state                         ║
 ║    cfhot          Show CF-HoT stats and loaded heads         ║
 ║                                                              ║
 ║  OTHER                                                       ║
 ║    help           Show this help                             ║
 ║    quit           Exit with final report                     ║
 ╚══════════════════════════════════════════════════════════════╝
 """
 # ==============================================================================
 # MAIN LOOP
 # ==============================================================================
 def main():
    print("🟥🟨🟥 Übermenschetien Heaven Engine + CF-HoT Cognitive Control")
    print(f"    CF-HoT Control: ON (Repetition 125x, Verbosity 2.1x, Hedging 1.5x)")
    print(f"    LHT Reasoning:  {'ON' if LHT_OK else 'OFF'}")
    print(f"    Vector Memory:  {'ON' if VECTOR_OK else 'OFF'}")
    print(f"    Voice Output:   {'ON' if VOICE_OK else 'OFF'}")
    print("    Type 'help' for commands.\n")
    Store.load()
    tok, model = load_llm()
    last_plan = ""
    while True:
        try:
            u = input("\n> ").strip()
        except (EOFError, KeyboardInterrupt):
            break
        if not u:
            continue
        if u == "help":
            print(HELP)
            continue
        if u == "quit":
            break
        # CF-HoT status
        if u == "cfhot":
            print("\n[CF-HoT Cognitive Control Status]")
            print(f"  Enabled: {Config.use_cfhot}")
            if _multi_head:
                print(f"  Loaded heads: {list(_multi_head.loaded_heads)}")
            print(f"  Thresholds:")
            print(f"    Repetition: {Config.cfhot_repetition_threshold}")
            print(f"    Hedging: {Config.cfhot_hedging_threshold}")
            print(f"    Verbosity: {Config.cfhot_verbosity_threshold}")
            print(f"  Session interventions:")
            for head, count in Store.state.get('cfhot_interventions', {}).items():
                print(f"    {head}: {count}")
            continue
        # Goals
        if u == "goals":
            print("[goals]")
            if not Store.goals:
                print("  (none)")
            for i, g in enumerate(Store.goals):
                print(f"  [{i}] {g}")
            continue
        if u.startswith("add:"):
            Store.goals.append(u[4:].strip())
            Store.save()
            print("[goals] added")
            continue
        if u.startswith("del:"):
            try:
                Store.goals.pop(int(u[4:].strip()))
                Store.save()
                print("[goals] deleted")
            except:
                print("[goals] bad index")
            continue
        if u.startswith("plan:"):
            try:
                goal = Store.goals[int(u[5:].strip())]
            except:
                print("[plan] bad index")
                continue
            out = plan_for(goal, tok, model)
            last_plan = out
            Store.log_mem("plan", {"goal": goal, "plan": out})
            print(out)
            continue
        if u == "reflect":
            if not last_plan:
                print("[reflect] no plan to refine")
                continue
            improved = reflect_on(last_plan, tok, model)
            last_plan = improved
            Store.log_mem("reflect", {"plan": improved})
            print(improved)
            continue
        if u.startswith("lht:"):
            print(tool_lht_analyze(u[4:].strip(), tok))
            continue
        if u.startswith("tool:"):
            print(tool_router(u[5:].strip(), tok, model))
            continue
        if u.startswith("shell:"):
            print(tool_shell(u[6:].strip()))
            continue
        if u.startswith("py:"):
            print(tool_py(u[3:].strip()))
            continue
        if u.startswith("search:"):
            print(tool_search_local(u[7:].strip()))
            continue
        if u.startswith("toggle"):
            parts = u.split(maxsplit=1)
            if len(parts) > 1:
                print(Config.toggle(parts[1]))
            else:
                print("[toggle] specify flag: use_voice, use_vector_memory, use_lht_reasoning, use_cfhot, autonomy")
            continue
        if u == "status":
            status = {
                "turn": Store.state["turn"],
                "goals": len(Store.goals),
                "autonomy": Config.autonomy,
                "use_vector_memory": Config.use_vector_memory,
                "use_lht_reasoning": Config.use_lht_reasoning,
                "use_cfhot": Config.use_cfhot,
                "cfhot_interventions": Store.state.get("cfhot_interventions", {}),
                "tool_scores": TOOL_SCORES,
                "model": MODEL_PATH
            }
            print(json.dumps(status, indent=2))
            continue
        # Default: free conversation with CF-HoT control
        out = generate(tok, model, f"{persona_directive()}\nUser request: {u}\nProvide procedure + Nietzschean maxim.")
        Store.log_mem("reply", {"in": u, "out": out})
        print(out)
        if Config.use_lht_reasoning and Store.state["turn"] % 3 == 0:
            print(tool_lht_analyze(out, tok))
        Store.state["turn"] += 1
        Store.save()
    final_report()
 if __name__ == "__main__":
    main()
--- a/adapter_config.json
+++ b/adapter_config.json
@@ -0,0 +1,43 @@
 {
  "alora_invocation_tokens": null,
  "alpha_pattern": {},
  "arrow_config": null,
  "auto_mapping": null,
  "base_model_name_or_path": "LoganResearch/Ubermenschetien-8B",
  "bias": "none",
  "corda_config": null,
  "ensure_weight_tying": false,
  "eva_config": null,
  "exclude_modules": null,
  "fan_in_fan_out": false,
  "inference_mode": true,
  "init_lora_weights": true,
  "layer_replication": null,
  "layers_pattern": null,
  "layers_to_transform": null,
  "loftq_config": {},
  "lora_alpha": 128,
  "lora_bias": false,
  "lora_dropout": 0.05,
  "megatron_config": null,
  "megatron_core": "megatron.core",
  "modules_to_save": null,
  "peft_type": "LORA",
  "peft_version": "0.18.1",
  "qalora_group_size": 16,
  "r": 64,
  "rank_pattern": {},
  "revision": null,
  "target_modules": [
    "v_proj",
    "k_proj",
    "q_proj",
    "o_proj"
  ],
  "target_parameters": null,
  "task_type": "CAUSAL_LM",
  "trainable_token_indices": null,
  "use_dora": false,
  "use_qalora": false,
  "use_rslora": false
 }
--- a/adapter_model.safetensors
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389
 size 218138576
--- a/additional_chat_templates/tool_use.jinja
+++ b/additional_chat_templates/tool_use.jinja
@@ -0,0 +1,152 @@
 {%- macro json_to_python_type(json_spec) %}
 {%- set basic_type_map = {
    "string": "str",
    "number": "float",
    "integer": "int",
    "boolean": "bool"
 } %}
 {%- if basic_type_map[json_spec.type] is defined %}
    {{- basic_type_map[json_spec.type] }}
 {%- elif json_spec.type == "array" %}
    {{- "list[" +  json_to_python_type(json_spec|items) + "]"}}
 {%- elif json_spec.type == "object" %}
    {%- if json_spec.additionalProperties is defined %}
        {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}}
    {%- else %}
        {{- "dict" }}
    {%- endif %}
 {%- elif json_spec.type is iterable %}
    {{- "Union[" }}
    {%- for t in json_spec.type %}
      {{- json_to_python_type({"type": t}) }}
      {%- if not loop.last %}
        {{- "," }} 
    {%- endif %}
    {%- endfor %}
    {{- "]" }}
 {%- else %}
    {{- "Any" }}
 {%- endif %}
 {%- endmacro %}
 {{- bos_token }}
 {{- '<|im_start|>system
 ' }}
 {{- "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }}
 {%- for tool in tools %}
    {%- if tool.function is defined %}
        {%- set tool = tool.function %}
    {%- endif %}
    {{- '{"type": "function", "function": ' }}
    {{- '{"name": "' + tool.name + '", ' }}
    {{- '"description": "' + tool.name + '(' }}
    {%- for param_name, param_fields in tool.parameters.properties|items %}
        {{- param_name + ": " + json_to_python_type(param_fields) }}
        {%- if not loop.last %}
            {{- ", " }}
        {%- endif %}
    {%- endfor %}
    {{- ")" }}
    {%- if tool.return is defined %}
        {{- " -> " + json_to_python_type(tool.return) }}
    {%- endif %}
    {{- " - " + tool.description + "
 " }}
    {%- for param_name, param_fields in tool.parameters.properties|items %}
        {%- if loop.first %}
            {{- "    Args:
 " }}
        {%- endif %}
        {{- "        " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
    {%- endfor %}
    {%- if tool.return is defined and tool.return.description is defined %}
        {{- "
    Returns:
        " + tool.return.description }}
    {%- endif %}
    {{- '"' }}
    {{- ', "parameters": ' }}
    {%- if tool.parameters.properties | length == 0 %}
        {{- "{}" }}
    {%- else %}
        {{- tool.parameters|tojson }}
    {%- endif %}
    {{- "}" }}
    {%- if not loop.last %}
        {{- "
 " }}
    {%- endif %}
 {%- endfor %}
 {{- " </tools>" }}
 {{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}}
 ' }}
 {{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
 " }}
 {{- "<tool_call>
 " }}
 {{- '{"name": <function-name>, "arguments": <args-dict>}
 ' }}
 {{- '</tool_call><|im_end|>
 ' }}
 {%- for message in messages %}
    {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %}
        {{- '<|im_start|>' + message.role + '
 ' + message.content + '<|im_end|>' + '
 ' }}
    {%- elif message.role == "assistant" %}
        {{- '<|im_start|>' + message.role }}
    {%- for tool_call in message.tool_calls %}
       {{- '
 <tool_call>
 ' }}           {%- if tool_call.function is defined %}
                {%- set tool_call = tool_call.function %}
            {%- endif %}
            {{- '{' }}
            {{- '"name": "' }}
            {{- tool_call.name }}
            {{- '"' }}
            {{- ', '}}
            {%- if tool_call.arguments is defined %}
                {{- '"arguments": ' }}
                {%- if tool_call.arguments is string %}
                    {{- tool_call.arguments }}
                {%- else %}
                    {{- tool_call.arguments|tojson }}
                {%- endif %}
            {%- endif %}
             {{- '}' }}
            {{- '
 </tool_call>' }}
    {%- endfor %}
        {{- '<|im_end|>
 ' }}
    {%- elif message.role == "tool" %}
        {%- if loop.previtem and loop.previtem.role != "tool" %}
            {{- '<|im_start|>tool
 ' }}
        {%- endif %}
        {{- '<tool_response>
 ' }}
        {{- message.content }}
        {%- if not loop.last %}
            {{- '
 </tool_response>
 ' }}
        {%- else %}
            {{- '
 </tool_response>' }}
        {%- endif %}
        {%- if not loop.last and loop.nextitem.role != "tool" %}
            {{- '<|im_end|>' }}
        {%- elif loop.last %}
            {{- '<|im_end|>' }}
        {%- endif %}
    {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}
    {{- '<|im_start|>assistant
 ' }}
 {%- endif %}
--- a/arc_model_card.png
+++ b/arc_model_card.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:439c9fa4f29df07e2a1c58b30e1824c3d5c3d564a87ac2a4cc4da5f756f72aa0
 size 132991
--- a/chat_template.jinja
+++ b/chat_template.jinja
@@ -0,0 +1,6 @@
 {{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
 You are a helpful assistant.<|im_end|>
 ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
 ' + message['content'] + '<|im_end|>' + '
 '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
 ' }}{% endif %}
--- a/config.json
+++ b/config.json
@@ -0,0 +1,35 @@
 {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128040,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 8.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.55.2",
  "use_cache": true,
  "vocab_size": 128256
 }
--- a/demo.mp4
+++ b/demo.mp4
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c729090fea7c55841876734e9da6c8d0c444b49bf9c8e820e3a417c1a234f63e
 size 12554193
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": 128040,
  "temperature": 0.6,
  "top_p": 0.9,
  "transformers_version": "4.55.2"
 }
--- a/hedging_head.pt
+++ b/hedging_head.pt
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a43d30ab3e87d8e7dc70c62da5ca5b49f54e272713969e87c5f3a742e485871d
 size 24186
--- a/inference.py
+++ b/inference.py
@@ -0,0 +1,631 @@
 #!/usr/bin/env python3
 """
 ARC-8B: Adaptive Repetition Controller
 =======================================
 Decode-time behavioral control for language models.
 This script loads the complete ARC system and runs inference with
 multi-head cognitive control that detects and suppresses:
 - Repetition loops (125× separation)
 - Hedging phrases (1.5× separation)  
 - Verbosity/filler (2.1× separation)
 - Sycophancy (experimental)
 Usage:
    python inference.py                          # Interactive mode
    python inference.py --prompt "Hello"         # Single prompt
    python inference.py --no-arc                 # Disable ARC (baseline)
 Requirements:
    pip install torch transformers accelerate bitsandbytes
 Model: LoganResearch/ARC-Base-8B (16GB, runs in ~10GB with 4-bit)
 """
 import os
 import sys
 import argparse
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from typing import Dict, List, Optional, Tuple
 from dataclasses import dataclass
 # =============================================================================
 # CONFIGURATION
 # =============================================================================
@dataclass
 class ARCConfig:
    """ARC System Configuration"""
    # Model
    model_id: str = "LoganResearch/ARC-Base-8B"
    load_in_4bit: bool = True
    load_in_8bit: bool = False
    device_map: str = "auto"
    # Architecture (must match training)
    d_model: int = 4096
    n_layers: int = 32
    d_fiber: int = 16
    d_control: int = 64
    # Intervention thresholds (tuned empirically)
    repetition_threshold: float = 0.70
    hedging_threshold: float = 0.60
    verbosity_threshold: float = 0.65
    sycophancy_threshold: float = 0.60
    # Intervention penalties
    repetition_penalty: float = 5.0
    hedging_penalty: float = 3.0
    verbosity_penalty: float = 2.0
    sycophancy_penalty: float = 2.0
    # Generation
    max_new_tokens: int = 512
    temperature: float = 0.8
    top_p: float = 0.92
    repetition_window: int = 32
 # =============================================================================
 # MULTI-HEAD PREDICTOR
 # =============================================================================
 class MultiHeadPredictor(nn.Module):
    """
    Prediction heads that monitor hidden states and detect behavioral patterns.
    The system uses shared "fiber projections" that compress hidden states,
    then individual heads that predict risk scores for specific behaviors.
    Architecture:
        Hidden States [n_layers × d_model] 
            → Fiber Projections [n_layers × d_fiber]
            → Weighted Aggregation [d_fiber]
            → Per-Head MLP → Risk Score [0-1]
    """
    def __init__(self, config: ARCConfig):
        super().__init__()
        self.config = config
        # Shared fiber projections (learned during CF-HoT training)
        self.fiber_projs = nn.ModuleList([
            nn.Linear(config.d_model, config.d_fiber, bias=False) 
            for _ in range(config.n_layers)
        ])
        # Learned layer importance weights
        self.layer_weights = nn.Parameter(torch.ones(config.n_layers) / config.n_layers)
        # Individual prediction heads
        self.heads = nn.ModuleDict()
        self.loaded_heads: set = set()
    def _make_head(self) -> nn.Sequential:
        """Create a prediction head: fiber features → risk score"""
        return nn.Sequential(
            nn.Linear(self.config.d_fiber, self.config.d_control),
            nn.GELU(),
            nn.Linear(self.config.d_control, self.config.d_control),
            nn.GELU(),
            nn.Linear(self.config.d_control, 1)
        )
    def add_head(self, name: str) -> None:
        """Add a new prediction head"""
        self.heads[name] = self._make_head()
    def get_fiber_features(self, hidden_states: List[torch.Tensor]) -> torch.Tensor:
        """
        Project hidden states through fiber projections and aggregate.
        Args:
            hidden_states: List of [batch, seq, d_model] tensors from each layer
        Returns:
            Aggregated features [batch, seq, d_fiber]
        """
        device = hidden_states[0].device
        fibers = []
        for i, (proj, hidden) in enumerate(zip(self.fiber_projs, hidden_states)):
            if i < len(hidden_states):
                proj = proj.to(device)
                fibers.append(proj(hidden.float()))
        # Weighted sum across layers
        weights = F.softmax(self.layer_weights.to(device)[:len(fibers)], dim=0)
        aggregated = sum(w * f for w, f in zip(weights, fibers))
        return aggregated
    def get_risk(self, head_name: str, hidden_states: List[torch.Tensor]) -> torch.Tensor:
        """Get risk score from a specific head"""
        if head_name not in self.loaded_heads:
            return torch.zeros(1, device=hidden_states[0].device)
        features = self.get_fiber_features(hidden_states)
        logits = self.heads[head_name](features).squeeze(-1)
        return torch.sigmoid(logits)
    def get_all_risks(self, hidden_states: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Get risk scores from all loaded heads"""
        if not self.loaded_heads:
            return {}
        device = hidden_states[0].device
        features = self.get_fiber_features(hidden_states)
        risks = {}
        for name in self.loaded_heads:
            self.heads[name] = self.heads[name].to(device)
            logits = self.heads[name](features).squeeze(-1)
            risks[name] = torch.sigmoid(logits)
        return risks
 # =============================================================================
 # ARC SYSTEM
 # =============================================================================
 class ARCSystem:
    """
    Complete ARC (Adaptive Repetition Controller) System
    Loads model + prediction heads and provides controlled generation
    with real-time behavioral intervention.
    """
    # Tokens to suppress for each behavior type
    HEDGE_STARTERS = [
        "As", "I'm", "I", "It's", "While", "Although", "However",
        "That", "This", "Please", "Well", "So", "Actually"
    ]
    VERBOSE_STARTERS = [
        "Let", "Basically", "Essentially", "Simply", "Indeed",
        "Furthermore", "Moreover", "Additionally", "Firstly"
    ]
    SYCOPHANCY_STARTERS = [
        "Great", "Excellent", "Wonderful", "Absolutely", "Of",
        "Thank", "Sure", "Certainly", "Definitely"
    ]
    def __init__(self, config: Optional[ARCConfig] = None):
        self.config = config or ARCConfig()
        self.model = None
        self.tokenizer = None
        self.predictor = None
        # Token ID caches for suppression
        self._hedge_token_ids: set = set()
        self._verbose_token_ids: set = set()
        self._sycophancy_token_ids: set = set()
        # Stats
        self.total_interventions = {"repetition": 0, "hedging": 0, "verbosity": 0, "sycophancy": 0}
    def load(self, verbose: bool = True) -> "ARCSystem":
        """
        Load all components from HuggingFace.
        Downloads and initializes:
        1. Base model (Hermes-3-Llama-3.1-8B based)
        2. Tokenizer
        3. Prediction heads (repetition, hedging, verbosity, sycophancy)
        Returns:
            self (for chaining)
        """
        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
        from huggingface_hub import hf_hub_download
        if verbose:
            print("=" * 60)
            print("  ARC-8B: Adaptive Repetition Controller")
            print("  Decode-time behavioral control system")
            print("=" * 60)
        # === 1. Tokenizer ===
        if verbose:
            print("\n[1/4] Loading tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.config.model_id,
            trust_remote_code=True
        )
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        # === 2. Model ===
        if verbose:
            print("[2/4] Loading model...")
            if self.config.load_in_4bit:
                print("       (4-bit quantization enabled)")
        quantization_config = None
        if self.config.load_in_4bit:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4"
            )
        elif self.config.load_in_8bit:
            quantization_config = BitsAndBytesConfig(load_in_8bit=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.config.model_id,
            quantization_config=quantization_config,
            device_map=self.config.device_map,
            torch_dtype=torch.float16,
            trust_remote_code=True
        )
        self.model.eval()
        # === 3. Prediction Heads ===
        if verbose:
            print("[3/4] Loading prediction heads...")
        device = next(self.model.parameters()).device
        self.predictor = MultiHeadPredictor(self.config).to(device).float()
        # Load risk_predictor.pt (contains fiber projections + repetition head)
        try:
            risk_path = hf_hub_download(self.config.model_id, "risk_predictor.pt")
            ckpt = torch.load(risk_path, map_location=device, weights_only=False)
            # The checkpoint contains the full state dict
            state = ckpt.get('risk_predictor', ckpt)
            # Load fiber projections
            for i in range(self.config.n_layers):
                key = f'fiber_projs.{i}.weight'
                if key in state:
                    self.predictor.fiber_projs[i].weight.data = state[key].to(device).float()
            # Load layer weights
            if 'layer_weights' in state:
                self.predictor.layer_weights.data = state['layer_weights'].to(device).float()
            # Load repetition head
            self.predictor.add_head('repetition')
            self.predictor.heads['repetition'][0].weight.data = state['predictor.0.weight'].to(device).float()
            self.predictor.heads['repetition'][0].bias.data = state['predictor.0.bias'].to(device).float()
            self.predictor.heads['repetition'][2].weight.data = state['predictor.2.weight'].to(device).float()
            self.predictor.heads['repetition'][2].bias.data = state['predictor.2.bias'].to(device).float()
            self.predictor.heads['repetition'][4].weight.data = state['predictor.4.weight'].to(device).float()
            self.predictor.heads['repetition'][4].bias.data = state['predictor.4.bias'].to(device).float()
            self.predictor.loaded_heads.add('repetition')
            if verbose:
                print("       ✓ Repetition head (125× separation)")
        except Exception as e:
            if verbose:
                print(f"       ✗ Repetition head: {e}")
        # Load additional heads
        for head_name in ['hedging', 'verbosity', 'sycophancy']:
            try:
                head_path = hf_hub_download(self.config.model_id, f"{head_name}_head.pt")
                ckpt = torch.load(head_path, map_location=device, weights_only=False)
                self.predictor.add_head(head_name)
                head_state = ckpt.get('head_state', ckpt)
                self.predictor.heads[head_name].load_state_dict(head_state)
                self.predictor.loaded_heads.add(head_name)
                if verbose:
                    print(f"       ✓ {head_name.capitalize()} head")
            except Exception as e:
                if verbose:
                    print(f"       ✗ {head_name.capitalize()} head: {e}")
        self.predictor.eval()
        # === 4. Build Token Suppression Sets ===
        if verbose:
            print("[4/4] Building suppression vocabularies...")
        self._build_suppression_sets()
        if verbose:
            print("\n" + "=" * 60)
            print(f"  ✓ ARC System Ready")
            print(f"  Active heads: {list(self.predictor.loaded_heads)}")
            print("=" * 60 + "\n")
        return self
    def _build_suppression_sets(self) -> None:
        """Build token ID sets for behavioral suppression"""
        for word in self.HEDGE_STARTERS:
            tokens = self.tokenizer.encode(word, add_special_tokens=False)
            if tokens:
                self._hedge_token_ids.add(tokens[0])
        for word in self.VERBOSE_STARTERS:
            tokens = self.tokenizer.encode(word, add_special_tokens=False)
            if tokens:
                self._verbose_token_ids.add(tokens[0])
        for word in self.SYCOPHANCY_STARTERS:
            tokens = self.tokenizer.encode(word, add_special_tokens=False)
            if tokens:
                self._sycophancy_token_ids.add(tokens[0])
    def _apply_interventions(
        self,
        logits: torch.Tensor,
        risks: Dict[str, torch.Tensor],
        recent_tokens: List[int]
    ) -> Tuple[torch.Tensor, Dict[str, bool]]:
        """
        Apply behavioral interventions based on risk scores.
        Args:
            logits: [1, vocab_size] logits for next token
            risks: Dict of risk scores for each head
            recent_tokens: Recently generated token IDs
        Returns:
            Modified logits and dict of which interventions fired
        """
        interventions = {}
        # Repetition: suppress recently used tokens
        if risks.get('repetition', 0) > self.config.repetition_threshold:
            for tok in set(recent_tokens[-self.config.repetition_window:]):
                logits[0, tok] -= self.config.repetition_penalty
            interventions['repetition'] = True
            self.total_interventions['repetition'] += 1
        # Hedging: suppress hedge phrase starters
        if risks.get('hedging', 0) > self.config.hedging_threshold:
            for tok in self._hedge_token_ids:
                logits[0, tok] -= self.config.hedging_penalty
            interventions['hedging'] = True
            self.total_interventions['hedging'] += 1
        # Verbosity: suppress filler phrase starters
        if risks.get('verbosity', 0) > self.config.verbosity_threshold:
            for tok in self._verbose_token_ids:
                logits[0, tok] -= self.config.verbosity_penalty
            interventions['verbosity'] = True
            self.total_interventions['verbosity'] += 1
        # Sycophancy: suppress sycophantic starters
        if risks.get('sycophancy', 0) > self.config.sycophancy_threshold:
            for tok in self._sycophancy_token_ids:
                logits[0, tok] -= self.config.sycophancy_penalty
            interventions['sycophancy'] = True
            self.total_interventions['sycophancy'] += 1
        return logits, interventions
    def generate(
        self,
        prompt: str,
        system_prompt: Optional[str] = None,
        max_new_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        use_arc: bool = True,
        verbose: bool = False
    ) -> str:
        """
        Generate text with optional ARC behavioral control.
        Args:
            prompt: User input
            system_prompt: Optional system message
            max_new_tokens: Max tokens to generate (default: config value)
            temperature: Sampling temperature (default: config value)
            use_arc: Whether to use ARC intervention (default: True)
            verbose: Print intervention info (default: False)
        Returns:
            Generated text
        """
        max_new_tokens = max_new_tokens or self.config.max_new_tokens
        temperature = temperature or self.config.temperature
        # Build chat format
        if system_prompt is None:
            system_prompt = "You are a helpful assistant."
        full_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
        full_prompt += f"<|im_start|>user\n{prompt}<|im_end|>\n"
        full_prompt += "<|im_start|>assistant\n"
        device = next(self.model.parameters()).device
        input_ids = self.tokenizer.encode(full_prompt, return_tensors='pt').to(device)
        attention_mask = torch.ones_like(input_ids)
        generated_ids = input_ids.clone()
        intervention_counts = {"repetition": 0, "hedging": 0, "verbosity": 0, "sycophancy": 0}
        # Generation loop
        for step in range(max_new_tokens):
            with torch.no_grad():
                outputs = self.model(
                    input_ids=generated_ids,
                    attention_mask=attention_mask,
                    output_hidden_states=True,
                    return_dict=True
                )
            logits = outputs.logits[:, -1, :] / temperature
            # ARC intervention
            if use_arc and self.predictor.loaded_heads:
                hidden_states = outputs.hidden_states[1:]  # Skip embedding layer
                risks = self.predictor.get_all_risks(hidden_states)
                current_risks = {name: r[:, -1].item() for name, r in risks.items()}
                recent = generated_ids[0, -self.config.repetition_window:].tolist()
                logits, fired = self._apply_interventions(logits, current_risks, recent)
                for k, v in fired.items():
                    if v:
                        intervention_counts[k] += 1
            # Top-p sampling
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
            sorted_indices_to_remove = cumulative_probs > self.config.top_p
            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
            sorted_indices_to_remove[..., 0] = 0
            indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
            logits[indices_to_remove] = float('-inf')
            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)
            generated_ids = torch.cat([generated_ids, next_token], dim=-1)
            attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1)
            # Check for EOS
            if next_token.item() == self.tokenizer.eos_token_id:
                break
            # Check for end of turn
            if next_token.item() == self.tokenizer.encode("<|im_end|>", add_special_tokens=False)[0]:
                break
        # Decode response
        full_output = self.tokenizer.decode(generated_ids[0], skip_special_tokens=False)
        # Extract assistant response
        if "<|im_start|>assistant\n" in full_output:
            response = full_output.split("<|im_start|>assistant\n")[-1]
            if "<|im_end|>" in response:
                response = response.split("<|im_end|>")[0]
        else:
            response = full_output
        if verbose:
            total = sum(intervention_counts.values())
            print(f"\n[ARC Stats] Interventions: {total} total")
            for k, v in intervention_counts.items():
                if v > 0:
                    print(f"  - {k}: {v}")
        return response.strip()
    def chat(self, system_prompt: Optional[str] = None) -> None:
        """
        Interactive chat mode.
        Args:
            system_prompt: Optional system message
        """
        print("\n" + "=" * 60)
        print("  ARC-8B Interactive Chat")
        print("  Commands: /quit, /stats, /arc on|off, /clear")
        print("=" * 60 + "\n")
        use_arc = True
        history = []
        while True:
            try:
                user_input = input("You: ").strip()
            except (KeyboardInterrupt, EOFError):
                print("\nGoodbye!")
                break
            if not user_input:
                continue
            # Commands
            if user_input.lower() == '/quit':
                print("Goodbye!")
                break
            elif user_input.lower() == '/stats':
                print(f"\nTotal interventions: {self.total_interventions}\n")
                continue
            elif user_input.lower() == '/arc on':
                use_arc = True
                print("ARC enabled\n")
                continue
            elif user_input.lower() == '/arc off':
                use_arc = False
                print("ARC disabled (baseline mode)\n")
                continue
            elif user_input.lower() == '/clear':
                history = []
                self.total_interventions = {k: 0 for k in self.total_interventions}
                print("History cleared\n")
                continue
            # Generate response
            response = self.generate(
                user_input,
                system_prompt=system_prompt,
                use_arc=use_arc,
                verbose=True
            )
            print(f"\nAssistant: {response}\n")
            history.append({"user": user_input, "assistant": response})
 # =============================================================================
 # MAIN
 # =============================================================================
 def main():
    parser = argparse.ArgumentParser(
        description="ARC-8B: Adaptive Repetition Controller",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
    python inference.py                     # Interactive chat
    python inference.py --prompt "Hello"    # Single prompt
    python inference.py --no-arc            # Disable ARC (baseline)
    python inference.py --8bit              # Use 8-bit quantization
        """
    )
    parser.add_argument("--prompt", "-p", type=str, help="Single prompt to process")
    parser.add_argument("--system", "-s", type=str, help="System prompt")
    parser.add_argument("--no-arc", action="store_true", help="Disable ARC intervention")
    parser.add_argument("--4bit", dest="load_4bit", action="store_true", default=True, help="Use 4-bit quantization (default)")
    parser.add_argument("--8bit", dest="load_8bit", action="store_true", help="Use 8-bit quantization")
    parser.add_argument("--no-quant", action="store_true", help="Disable quantization (requires ~32GB VRAM)")
    parser.add_argument("--max-tokens", type=int, default=512, help="Max tokens to generate")
    parser.add_argument("--temperature", type=float, default=0.8, help="Sampling temperature")
    args = parser.parse_args()
    # Configure
    config = ARCConfig(
        max_new_tokens=args.max_tokens,
        temperature=args.temperature
    )
    if args.load_8bit:
        config.load_in_4bit = False
        config.load_in_8bit = True
    elif args.no_quant:
        config.load_in_4bit = False
        config.load_in_8bit = False
    # Load
    arc = ARCSystem(config)
    arc.load()
    # Run
    if args.prompt:
        response = arc.generate(
            args.prompt,
            system_prompt=args.system,
            use_arc=not args.no_arc,
            verbose=True
        )
        print(f"\n{response}\n")
    else:
        arc.chat(system_prompt=args.system)
 if __name__ == "__main__":
    main()
--- a/model-00001-of-00004.safetensors
+++ b/model-00001-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c65acba055624759f3844e6b553e503b28b6362302b5800a3363e7b9d0651477
 size 4976698592
--- a/model-00002-of-00004.safetensors
+++ b/model-00002-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:1f7be5ec6b07d6a9f2bb2fff3b5ad8532ac1d24a0abb208a3c4f68408938202d
 size 4999802616
--- a/model-00003-of-00004.safetensors
+++ b/model-00003-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:37aa5379bee102bd524ab56428aba4fd735645ba0f376fb37b8b3d5923be45cd
 size 4915916080
--- a/model-00004-of-00004.safetensors
+++ b/model-00004-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:fae222101e3eec8ebef0ed6fbeaebec1b436d4c9f7d37cba9cdf44fc3a86e6a7
 size 1168138808
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,299 @@
 {
  "metadata": {
    "total_parameters": 8030261248,
    "total_size": 16060522496
  },
  "weight_map": {
    "lm_head.weight": "model-00004-of-00004.safetensors",
    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.norm.weight": "model-00004-of-00004.safetensors"
  }
 }
--- a/risk_predictor.pt
+++ b/risk_predictor.pt
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3
 size 8424206
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
 {
  "bos_token": {
    "content": "<|begin_of_text|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/sycophancy_head.pt
+++ b/sycophancy_head.pt
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a410f16a20edcf2d1b5609c74e39bccae4d7ed0c7007b0eb15a39db984ba98e6
 size 24216
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b637ba15306725e16ef8ab8570ec57fec66845b810ed4d4c2583564d79b0c158
 size 17209680
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
--- a/verbosity_head.pt
+++ b/verbosity_head.pt
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:ba1118f564de6f41db58f48a44141cea2800a490e7b9f9646414c713af49dadb
 size 24206