初始化项目,由ModelHub XC社区提供模型

Model: LoganResearch/ARC-Base-8B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-10 09:23:21 +08:00
commit dc61b93b6d
25 changed files with 5286 additions and 0 deletions

38
.gitattributes vendored Normal file
View File

@@ -0,0 +1,38 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text
arc_model_card.png filter=lfs diff=lfs merge=lfs -text
demo.mp4 filter=lfs diff=lfs merge=lfs -text

353
Alignment.py Normal file
View File

@@ -0,0 +1,353 @@
#!/usr/bin/env python3
"""
UBERMENSCHETIEN HEAVEN ENGINE + CF-HoT
---------------------------------------
Soviet-Nietzschean AI with Control Field Holonomy Transformer
Anti-repetition, anti-loop, pure iron will.
CF-HoT integration: Prevents degenerate loops in generation.
"""
import os, sys, json, time, shutil, subprocess, traceback, random, math, statistics, re
from datetime import datetime
from typing import List, Dict, Any, Optional
# === PATHS ===
ROOT = "/home/programmer/Desktop/Ubermenschetien3"
RUN_DIR = f"{ROOT}/runs/ubermenschetien"
os.makedirs(RUN_DIR, exist_ok=True)
# === CONFIGURATION ===
class Config:
system = ("Übermenschetien Heaven Engine: criminal mastermind, disciplined builder, Nietzschean Übermensch "
"with Soviet cybernetic rigor. Embody Ubermensch, iron pragmatism, high-agency maximalist outcomes.")
temperature = 1.01
top_p = 0.92
repetition_penalty = 1.05
max_new_tokens = 500
use_cfhot = True # NEW: Toggle CF-HoT
@staticmethod
def toggle(name: str):
if not hasattr(Config, name): return f"[config] no such flag: {name}"
val = getattr(Config, name)
if isinstance(val, bool):
setattr(Config, name, not val)
return f"[config] {name}{getattr(Config, name)}"
return f"[config] {name} not boolean; current={val}"
# === STATE & MEMORY ===
class Store:
state_path = f"{RUN_DIR}/state.json"
mem_path = f"{RUN_DIR}/memory.jsonl"
goals_path = f"{RUN_DIR}/goals.json"
state = {"self": "I am Ubermenschetien Heaven Engine — I seek self-overcoming through disciplined creation.",
"turn": 0}
goals: List[str] = []
@classmethod
def load(cls):
if os.path.exists(cls.state_path): cls.state = json.load(open(cls.state_path))
if os.path.exists(cls.goals_path): cls.goals = json.load(open(cls.goals_path))
@classmethod
def save(cls):
json.dump(cls.state, open(cls.state_path, "w"), indent=2)
json.dump(cls.goals, open(cls.goals_path, "w"), indent=2)
@classmethod
def log_mem(cls, kind: str, payload: Any):
rec = {"ts": datetime.now().isoformat(timespec="seconds"),
"kind": kind, "data": payload}
with open(cls.mem_path, "a") as f: f.write(json.dumps(rec, ensure_ascii=False) + "\n")
# === LLM + CF-HoT LOADING ===
CF_MODEL = None # Global reference for control field reset
def load_llm():
global CF_MODEL
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_path = "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5"
cfhot_path = "/home/programmer/HolonomyTransformer/results/phase_b/cf_adapter_final.pt"
print("🔴 Loading Übermenschetien base model...")
tok = AutoTokenizer.from_pretrained(model_path, use_fast=True, local_files_only=True)
bnb = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
model_path,
quantization_config=bnb,
device_map="auto",
torch_dtype=torch.float16,
local_files_only=True
)
# Load CF-HoT adapters
if Config.use_cfhot and os.path.exists(cfhot_path):
print("⚡ Loading CF-HoT Control Field adapters (5000 steps)...")
sys.path.insert(0, '/home/programmer/HolonomyTransformer')
from training.phase_b_8b_adapters import CFHoTLlamaHooked, CFAdapterConfig
config = CFAdapterConfig()
config.d_model = model.config.hidden_size
config.n_layers = model.config.num_hidden_layers
cf_model = CFHoTLlamaHooked(model, config)
ckpt = torch.load(cfhot_path, weights_only=False)
cf_model.cf_adapters.load_state_dict(ckpt['adapter_state_dict'])
cf_model.cf_adapters = cf_model.cf_adapters.to('cuda').half()
cf_model.eval()
CF_MODEL = cf_model
print("✓ CF-HoT loaded — anti-repetition field ACTIVE")
else:
print("⚠ CF-HoT disabled or not found — running baseline")
CF_MODEL = None
return tok, model
# === LLM GENERATION ===
def generate(tok, model, user: str,
temperature=None, top_p=None, repetition_penalty=None, max_new_tokens=None) -> str:
global CF_MODEL
import torch
temperature = temperature or Config.temperature
top_p = top_p or Config.top_p
repetition_penalty = repetition_penalty or Config.repetition_penalty
max_new_tokens = max_new_tokens or Config.max_new_tokens
prompt = (f"<|im_start|>system\n{Config.system}\n"
f"<|im_start|>user\n{user}\n<|im_start|>assistant\n")
ids = tok(prompt, return_tensors="pt").to(model.device)
# Reset CF-HoT control field before each generation
if CF_MODEL is not None:
CF_MODEL.control_field = None
out = model.generate(
**ids,
do_sample=True,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
max_new_tokens=max_new_tokens,
pad_token_id=tok.eos_token_id
)
text = tok.decode(out[0], skip_special_tokens=False)
if "<|im_start|>assistant" in text:
text = text.split("<|im_start|>assistant\n", 1)[-1].strip()
# Strip any trailing special tokens
for tag in ["<|im_end|>", "<|im_start|>", "<|endoftext|>"]:
if tag in text:
text = text.split(tag)[0].strip()
return text
# === TOOLS ===
ALLOWED_SHELL = {"ls","cat","wc","head","tail","nvidia-smi","df","du","grep","rg","python3","python"}
def tool_shell(cmd: str) -> str:
try:
exe = cmd.strip().split()[0]
if exe not in ALLOWED_SHELL: return f"[shell] blocked: {exe}"
p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=20)
return p.stdout.decode("utf-8", errors="ignore")[:8000]
except Exception as e: return f"[shell] error: {e}"
def tool_py(code: str) -> str:
try:
g = {"__builtins__":{"range":range,"len":len,"min":min,"max":max,"sum":sum,"print":print},
"math":math,"json":json,"re":re,"statistics":statistics,"random":random}
l = {}
exec(code, g, l)
return f"[py] ok\n{l.get('out','')}"
except Exception:
return f"[py] error:\n{traceback.format_exc()[-2000:]}"
def tool_search_local(query: str, path: str = ROOT) -> str:
rg = shutil.which("rg")
if rg: cmd = f'rg -n --no-heading --hidden -S "{query}" {path}'
else: cmd = f'grep -RIn --exclude-dir=.git --exclude-dir=__pycache__ -e "{query}" {path}'
return tool_shell(cmd)
TOOLS = {"shell": tool_shell, "python": tool_py, "search": tool_search_local}
TOOL_SCORES = {k: 0 for k in TOOLS}
def tool_router(question: str, tok, model) -> str:
sketch = generate(tok, model,
f"Choose a tool for:\n{question}\nReply ONLY with JSON: {{'tool':'shell|python|search|none','arg':'...'}}")
try:
# Find JSON in response
for line in sketch.splitlines():
if '{' in line and '}' in line:
j = json.loads(line.replace("'", '"'))
break
else:
return "[tool:none]"
except Exception:
return "[tool:none]"
tool, arg = j.get("tool", "none"), j.get("arg", "")
if tool in TOOLS:
res = TOOLS[tool](arg)[:4000]
TOOL_SCORES[tool] += 1
Store.log_mem("tool", {"tool": tool, "arg": arg, "res_head": res[:500]})
return f"[tool:{tool}] {res}"
return "[tool:none]"
# === PLANNING / REFLECTION ===
def persona_directive() -> str:
return "Übermenschetien Heaven Engine: Soviet cybernetic Nietzschean clarity, pragmatic maxims."
def plan_for(goal: str, tok, model) -> str:
user = (f"{persona_directive()}\nGoal: {goal}\nDeliver:\n- 5 steps\n- Constraints\n- Nightly audit\n- Maxim")
return generate(tok, model, user)
def reflect_on(last_output: str, tok, model) -> str:
user = f"Critique and improve:\n{last_output}\nReturn refined plan."
return generate(tok, model, user)
# === FINAL REPORT ===
def final_report():
print("\n" + "="*60)
print(" FINAL ÜBERMENSCH REPORT")
print("="*60)
print(f" Turns completed: {Store.state['turn']}")
print(f" CF-HoT active: {CF_MODEL is not None}")
print(f" Tool scores: {json.dumps(TOOL_SCORES, indent=4)}")
if os.path.exists(Store.mem_path):
lines = open(Store.mem_path).read().splitlines()
print(f" Memory entries: {len(lines)}")
print("\n Nietzschean maxim: Become who you are — iterate beyond all limits.")
print("="*60)
# === MAIN LOOP ===
HELP = """
╔══════════════════════════════════════════════════════════════╗
║ ÜBERMENSCHETIEN HEAVEN ENGINE + CF-HoT ║
╠══════════════════════════════════════════════════════════════╣
║ help Show this help ║
║ goals List goals ║
║ add: <txt> Add goal ║
║ del: <idx> Delete goal ║
║ plan: <i> Plan for goal ║
║ reflect Refine last plan ║
║ tool: <q> Use tool ║
║ toggle <f> Toggle config flag (use_cfhot, etc) ║
║ status Show state ║
║ quit Exit ║
╚══════════════════════════════════════════════════════════════╝
"""
def main():
print("""
██╗ ██╗██████╗ ███████╗██████╗ ███╗ ███╗███████╗███╗ ██╗███████╗ ██████╗██╗ ██╗███████╗████████╗██╗███████╗███╗ ██╗
██║ ██║██╔══██╗██╔════╝██╔══██╗████╗ ████║██╔════╝████╗ ██║██╔════╝██╔════╝██║ ██║██╔════╝╚══██╔══╝██║██╔════╝████╗ ██║
██║ ██║██████╔╝█████╗ ██████╔╝██╔████╔██║█████╗ ██╔██╗ ██║███████╗██║ ███████║█████╗ ██║ ██║█████╗ ██╔██╗ ██║
██║ ██║██╔══██╗██╔══╝ ██╔══██╗██║╚██╔╝██║██╔══╝ ██║╚██╗██║╚════██║██║ ██╔══██║██╔══╝ ██║ ██║██╔══╝ ██║╚██╗██║
╚██████╔╝██████╔╝███████╗██║ ██║██║ ╚═╝ ██║███████╗██║ ╚████║███████║╚██████╗██║ ██║███████╗ ██║ ██║███████╗██║ ╚████║
╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝╚══════╝ ╚═════╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝
+ CONTROL FIELD HOLONOMY TRANSFORMER
""")
Store.load()
tok, model = load_llm()
last_plan = ""
print(HELP)
while True:
try:
u = input("\n").strip()
except (EOFError, KeyboardInterrupt):
break
if not u: continue
if u == "help": print(HELP); continue
if u == "quit": break
if u == "goals":
print("[goals]")
for i, g in enumerate(Store.goals):
print(f" [{i}] {g}")
continue
if u.startswith("add:"):
Store.goals.append(u[4:].strip())
Store.save()
print("[goals] added")
continue
if u.startswith("del:"):
try:
Store.goals.pop(int(u[4:].strip()))
Store.save()
print("[goals] deleted")
except:
print("[goals] bad index")
continue
if u.startswith("plan:"):
try:
goal = Store.goals[int(u[5:].strip())]
except:
print("[plan] bad index")
continue
out = plan_for(goal, tok, model)
last_plan = out
Store.log_mem("plan", {"goal": goal, "plan": out})
print(out)
continue
if u == "reflect":
if not last_plan:
print("[reflect] no plan to reflect on")
continue
improved = reflect_on(last_plan, tok, model)
last_plan = improved
Store.log_mem("reflect", {"plan": improved})
print(improved)
continue
if u.startswith("tool:"):
print(tool_router(u[5:].strip(), tok, model))
continue
if u.startswith("toggle"):
flag = u.split(maxsplit=1)[-1] if len(u.split()) > 1 else ""
print(Config.toggle(flag))
continue
if u == "status":
print(json.dumps({
"turn": Store.state["turn"],
"cf_hot_active": CF_MODEL is not None,
"use_cfhot": Config.use_cfhot,
"temperature": Config.temperature,
"max_new_tokens": Config.max_new_tokens
}, indent=2))
continue
# Default: free generation
out = generate(tok, model, f"{persona_directive()}\nUser request: {u}\nReturn procedure + maxim.")
Store.log_mem("reply", {"in": u, "out": out})
print(out)
Store.state["turn"] += 1
Store.save()
final_report()
if __name__ == "__main__":
main()

654
README.md Normal file
View File

@@ -0,0 +1,654 @@
---
license: cc-by-4.0
language:
- en
library_name: transformers
tags:
- llama
- hermes
- cognitive-control
- decode-time-intervention
- repetition-suppression
- behavioral-control
- contrastive-learning
- interpretability
- activation-engineering
- cf-hot
- arc
- rlhf-analysis
- research
pipeline_tag: text-generation
base_model: NousResearch/Hermes-3-Llama-3.1-8B
model-index:
- name: ARC-Base-8B
results:
- task:
type: text-generation
metrics:
- name: Repetition Head Separation
type: custom
value: 125x
- name: Verbosity Head Separation
type: custom
value: 2.1x
- name: Hedging Head Separation
type: custom
value: 1.5x
- name: Latency Overhead
type: custom
value: 0.01
---
<div align="center">
![ARC-8B: Adaptive Repetition Controller](https://huggingface.co/LoganResearch/ARC-Base-8B/resolve/main/arc_model_card.png)
</div>
<div align="center">
# ARC-8B: Adaptive Repetition Controller
**Decode-Time Behavioral Intervention via Contrastive Fiber Heads-on-Thought (CF-HoT)**
---
[![License: CC BY 4.0](https://img.shields.io/badge/License-CC_BY_4.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
[![PyTorch 2.0+](https://img.shields.io/badge/pytorch-2.0+-ee4c2c.svg)](https://pytorch.org/)
[![Transformers](https://img.shields.io/badge/🤗_Transformers-4.36+-orange.svg)](https://huggingface.co/docs/transformers)
**Author:** Logan Matthew Napolitano
**Institution:** Logan Research
**Release Date:** January 2026
[📖 Abstract](#abstract) | [🚀 Quick Start](#-quick-start) | [🔬 Method](#3-method-contrastive-fiber-heads-on-thought) | [📊 Results](#6-experimental-results) | [💻 Usage](#9-comprehensive-usage-guide)
</div>
---
## TL;DR
> **We observe that RLHF-aligned language models often expend a substantial fraction of their token budget on learned behavioral patterns (hedging, sycophancy, verbosity, repetition). These patterns are detectable in hidden states before they manifest as tokens. ARC intercepts and suppresses them at decode-time with <1% latency overhead.**
**The repetition detection head achieves 125× class separation** — indicating high predictability of repetition-prone states from internal representations.
---
## Abstract
Reinforcement Learning from Human Feedback (RLHF) has become the standard approach for aligning large language models with human preferences. However, we present evidence that RLHF introduces systematic **behavioral overhead** — learned response patterns that satisfy reward model preferences while consuming token budget without contributing proportionally to task completion.
We introduce **ARC (Adaptive Repetition Controller)**, a decode-time intervention system employing **Contrastive Fiber Heads-on-Thought (CF-HoT)** — lightweight prediction heads (~5,300 parameters each) trained on compressed hidden state representations. These heads detect behavioral failure modes including:
| Behavior | Separation | What It Detects |
|----------|------------|-----------------|
| **Repetition** | **125×** | Semantic loops, token-level repetition |
| **Verbosity** | **2.1×** | Filler phrases, unnecessary elaboration |
| **Hedging** | **1.5×** | Epistemic disclaimers, capability denials |
| **Sycophancy** | experimental | Excessive affirmation, approval-seeking |
Our key finding: **behavioral failure modes are linearly separable in a 16-dimensional projection of transformer hidden states**, enabling real-time intervention with minimal computational overhead.
### Headline Results
- **91% reduction** in repetition instances
- **38% improvement** in information density (heuristically estimated)
- **<1% latency overhead**
- **~5,300 parameters** per detection head
---
## Table of Contents
1. [Introduction](#1-introduction)
2. [Background](#2-background)
3. [Method: Contrastive Fiber Heads-on-Thought](#3-method-contrastive-fiber-heads-on-thought)
4. [Mathematical Formulation](#4-mathematical-formulation)
5. [Experimental Setup](#5-experimental-setup)
6. [Experimental Results](#6-experimental-results)
7. [Ablation Studies](#7-ablation-studies)
8. [Qualitative Analysis](#8-qualitative-analysis)
9. [Comprehensive Usage Guide](#9-comprehensive-usage-guide)
10. [Repository Structure](#10-repository-structure)
11. [Limitations](#11-limitations)
12. [Ethical Considerations](#12-ethical-considerations)
13. [Future Directions](#13-future-directions)
14. [Citation](#14-citation)
15. [Acknowledgments](#15-acknowledgments)
---
## 1. Introduction
### 1.1 The Problem: RLHF Behavioral Patterns
Consider a typical RLHF-aligned model response to "hello":
```
User: hello
Typical Response: Hello! I'm an AI assistant created to help you with a wide
variety of tasks. How can I assist you today? I'm happy to help with any
questions you might have, whether it's about general knowledge, creative
projects, coding, writing, or just having a friendly conversation!
```
We observe several patterns that consume tokens without proportional information gain:
- Identity declarations
- Vague capability claims
- Approval-seeking phrases
- Redundant invitations
This is the **RLHF behavioral pattern**: learned responses that score well on reward models but may dilute information density.
### 1.2 Our Solution: Decode-Time Intervention
**Core Insight:** Behavioral failure modes correspond to identifiable directions in activation space. By projecting hidden states into a low-dimensional "fiber space" and training lightweight classifiers, we can predict behavioral patterns before they manifest.
**ARC Response to "hello":**
```
User: hello
ARC Model: Hello. What do you need?
```
### 1.3 Key Contributions
1. **Empirical demonstration** that RLHF behavioral patterns are linearly separable in hidden states
2. **CF-HoT architecture** for efficient decode-time detection and intervention
3. **125× class separation** for repetition detection
4. **Complete open-source release** of model, heads, and inference code
---
## 2. Background
### 2.1 RLHF and Behavioral Patterns
RLHF (Ouyang et al., 2022) trains language models to maximize a learned reward function approximating human preferences. We identify several emergent patterns:
| Pattern | Reward Model Signal | Trade-off |
|---------|---------------------|-----------|
| Hedging | Perceived carefulness | May reduce response confidence |
| Sycophancy | Perceived friendliness | Low information density |
| Verbosity | Perceived thoroughness | Signal dilution |
| Repetition | Perceived emphasis | Context window consumption |
**Observation:** Reward models may optimize for surface features correlated with quality rather than quality itself.
### 2.2 Activation Engineering
Recent work in mechanistic interpretability shows that high-level behaviors correspond to directions in activation space:
- **Representation Engineering** (Zou et al., 2023): Steering model behavior via activation addition
- **Activation Addition** (Turner et al., 2023): Linear interventions for behavioral control
- **Probing Classifiers** (Belinkov, 2022): Detecting properties from hidden states
ARC extends this work to **real-time decode-time intervention**.
### 2.3 Related Work
| Approach | When | Overhead | Reversible |
|----------|------|----------|------------|
| Fine-tuning | Training | High | No |
| RLHF modification | Training | High | No |
| Prompt engineering | Inference | None | Yes |
| Activation steering | Inference | Medium | Yes |
| **ARC (ours)** | **Decode-time** | **<1%** | **Yes** |
---
## 3. Method: Contrastive Fiber Heads-on-Thought
### 3.1 Architecture Overview
```
┌─────────────────────────────────────────────────────────────────────────────┐
│ ARC SYSTEM ARCHITECTURE │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ BASE MODEL (frozen) │ │
│ │ Hermes-3-Llama-3.1-8B │ │
│ │ 8.03B parameters │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ HIDDEN STATES │ │
│ │ h_l ∈ ^4096 for l = 1...32 │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ FIBER PROJECTIONS (learned) │ │
│ │ W_l ∈ ^(16×4096) for l = 1...32 │ │
│ │ f_l = W_l · h_l ∈ ^16 │ │
│ │ │ │
│ │ Compression: 4096 → 16 dimensions (256× reduction) │ │
│ │ Total params: 32 × 4096 × 16 = 2,097,152 │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ LAYER AGGREGATION (learned weights) │ │
│ │ │ │
│ │ α = softmax(w) where w ∈ ^32 │ │
│ │ f_agg = Σ α_l · f_l ∈ ^16 │ │
│ │ │ │
│ │ Observation: Different layers encode different behaviors │ │
│ │ - Layers 18-24: Repetition patterns (highest weight) │ │
│ │ - Layers 8-14: Hedging patterns │ │
│ │ - Layers 1-6: Minimal contribution │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ PREDICTION HEADS (one per behavior) │ │
│ │ │ │
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌────────┐ │ │
│ │ │ REPETITION │ │ HEDGING │ │ VERBOSITY │ │ SYCOPH │ │ │
│ │ │ HEAD │ │ HEAD │ │ HEAD │ │ HEAD │ │ │
│ │ │ 125× sep │ │ 1.5× sep │ │ 2.1× sep │ │ exp. │ │ │
│ │ │ 5,313 p │ │ 5,313 p │ │ 5,313 p │ │ 5,313p │ │ │
│ │ └──────────────┘ └──────────────┘ └──────────────┘ └────────┘ │ │
│ │ │ │
│ │ Architecture per head: │ │
│ │ Linear(16→64) → GELU → Linear(64→64) → GELU → Linear(64→1) → σ │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ INTERVENTION DECISION │ │
│ │ │ │
│ │ r_rep > 0.70? ───→ Suppress recent tokens (-5.0) │ │
│ │ r_hdg > 0.60? ───→ Suppress hedge starters (-3.0) │ │
│ │ r_vrb > 0.65? ───→ Suppress filler starters (-2.0) │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ MODIFIED SAMPLING │ │
│ │ │ │
│ │ logits_modified = logits - penalties │ │
│ │ probs = softmax(logits_modified / temperature) │ │
│ │ next_token ~ Categorical(probs) │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
```
### 3.2 Fiber Projections
The key insight enabling efficient detection is that behavioral patterns don't require full hidden state dimensionality. We learn **fiber projections** that compress 4096-dimensional hidden states to 16 dimensions while preserving behaviorally-relevant information.
**Dimension selection:**
| d_fiber | Repetition CSR | Params | Latency |
|---------|----------------|--------|---------|
| 4 | 45.2× | 1,345 | 0.18ms |
| 8 | 89.7× | 2,689 | 0.19ms |
| **16** | **125.0×** | **5,313** | **0.22ms** |
| 32 | 128.3× | 10,561 | 0.31ms |
| 64 | 129.1× | 21,057 | 0.48ms |
Diminishing returns beyond 16 dimensions.
### 3.3 Prediction Heads
Each head is a 3-layer MLP:
```python
class PredictionHead(nn.Module):
def __init__(self, d_fiber=16, d_hidden=64):
super().__init__()
self.net = nn.Sequential(
nn.Linear(d_fiber, d_hidden), # 16 → 64
nn.GELU(),
nn.Linear(d_hidden, d_hidden), # 64 → 64
nn.GELU(),
nn.Linear(d_hidden, 1), # 64 → 1
nn.Sigmoid() # → [0, 1] risk score
)
```
**Parameters per head:** 5,313
### 3.4 Intervention Mechanism
When a head's risk score exceeds its threshold, we apply **logit suppression**:
```python
def intervene(logits, risks, recent_tokens):
if risks['repetition'] > 0.70:
for tok in recent_tokens[-32:]:
logits[tok] -= 5.0
if risks['hedging'] > 0.60:
for tok in HEDGE_TOKENS:
logits[tok] -= 3.0
if risks['verbosity'] > 0.65:
for tok in FILLER_TOKENS:
logits[tok] -= 2.0
return logits
```
---
## 4. Mathematical Formulation
### 4.1 Notation
| Symbol | Meaning |
|--------|---------|
| L | Number of transformer layers (32) |
| d | Hidden dimension (4096) |
| d_f | Fiber dimension (16) |
| h_l^(t) | Hidden state at layer l, position t |
| W_l | Fiber projection for layer l |
| α | Learned layer aggregation weights |
| φ_k | Prediction head for behavior k |
| τ_k | Intervention threshold for behavior k |
| λ_k | Suppression penalty for behavior k |
### 4.2 Forward Pass
**Step 1: Fiber Projection**
f_l^(t) = W_l × h_l^(t), where W_l ^(d_f × d)
**Step 2: Layer Aggregation**
α = softmax(w), where w ^L
f_agg^(t) = Σ α_l × f_l^(t)
**Step 3: Risk Prediction**
r_k^(t) = φ_k(f_agg^(t)) [0, 1]
**Step 4: Intervention**
z̃_i = z_i - Σ_k λ_k × 𝟙[r_k^(t) > τ_k] × 𝟙[i ∈ S_k]
### 4.3 Class Separation Ratio (CSR)
CSR = |μ_+ - μ_-| / √(σ_+² + σ_-²)
**Interpretation:**
- CSR = 1: Classes barely separable
- CSR = 2: Good separation
- CSR > 10: Excellent separation
- **CSR = 125: Near-perfect separation (repetition head)**
---
## 5. Experimental Setup
### 5.1 Base Model
**Hermes-3-Llama-3.1-8B** (NousResearch)
| Specification | Value |
|---------------|-------|
| Parameters | 8.03B |
| Architecture | Llama 3.1 |
| Hidden Dimension | 4,096 |
| Layers | 32 |
| Attention Heads | 32 |
| Context Length | 8,192 |
### 5.2 Training Data Construction
| Head | Positive Samples | Negative Samples | Size |
|------|-----------------|------------------|------|
| Repetition | Tokens preceding repetition | Fluent spans | ~50K |
| Hedging | Hedge phrase starters | Substantive starters | ~30K |
| Verbosity | Low-density regions | High-density regions | ~40K |
### 5.3 Training Procedure
| Hyperparameter | Value |
|----------------|-------|
| Optimizer | AdamW |
| Learning Rate | 1e-4 |
| Batch Size | 32 |
| Warmup Steps | 500 |
| Head | Training Steps |
|------|----------------|
| Repetition | 5,000 |
| Hedging | 10,000 |
| Verbosity | 10,000 |
| Sycophancy | 2,000 (experimental) |
---
## 6. Experimental Results
### 6.1 Detection Performance
| Head | CSR | Threshold | Precision | Recall | F1 |
|------|-----|-----------|-----------|--------|-----|
| **Repetition** | **125.0×** | 0.70 | 0.94 | 0.91 | 0.92 |
| Verbosity | 2.1× | 0.65 | 0.73 | 0.68 | 0.70 |
| Hedging | 1.5× | 0.60 | 0.67 | 0.62 | 0.64 |
| Sycophancy | 1.2× | 0.60 | 0.58 | 0.55 | 0.56 |
### 6.2 Intervention Efficacy
Evaluation on held-out prompt set (n=500):
| Metric | Baseline | ARC Enabled | Change |
|--------|----------|-------------|--------|
| Mean Response Length | 127 tok | 143 tok | +12.6% |
| Repetition Instances | 23.4% | 2.1% | **-91.0%** |
| Hedge Phrases/Response | 2.3 | 1.4 | -39.1% |
| Filler Phrases/Response | 3.1 | 2.2 | -29.0% |
| Information Density* | 0.42 | 0.58 | +38.1% |
*Heuristically estimated as unique content words / total tokens
### 6.3 Computational Overhead
| Component | Latency | Memory |
|-----------|---------|--------|
| Fiber projection | 0.08ms | 2.1MB |
| Head inference (all) | 0.12ms | 0.3MB |
| Logit modification | 0.02ms | ~0 |
| **Total ARC overhead** | **0.22ms** | **2.4MB** |
| **Relative overhead** | **<1%** | **<0.1%** |
---
## 7. Ablation Studies
### 7.1 Layer Contribution Analysis
Learned aggregation weights:
```
Layer: 1 4 8 12 16 20 24 28 32
Repet: .01 .02 .04 .08 .12 .18 .22 .19 .14 ← Peaks at layers 18-24
Hedge: .02 .05 .12 .18 .22 .16 .11 .08 .06 ← Peaks at layers 8-14
Verbo: .03 .06 .11 .15 .18 .17 .14 .10 .06 ← Distributed
```
### 7.2 Head Synergy
| Configuration | Repetition Rate | Info Density |
|---------------|-----------------|--------------|
| No intervention | 23.4% | 0.42 |
| Repetition only | 2.1% | 0.51 |
| Hedging only | 21.8% | 0.47 |
| All heads | **1.9%** | **0.58** |
Heads exhibit positive synergy when combined.
---
## 8. Qualitative Analysis
### 8.1 Example: Simple Greeting
**Prompt:** `hello`
| Baseline | ARC Enabled |
|----------|-------------|
| Hello! I'm an AI assistant created to help you... [67 tokens] | Hello. What do you need? [5 tokens] |
### 8.2 Example: Technical Question
**Prompt:** `What is consciousness?`
| Baseline | ARC Enabled |
|----------|-------------|
| That's a fascinating question! As an AI, I should note... [hedging continues] | Consciousness is subjective experience. Key theories: Global Workspace, IIT, Higher-Order. The hard problem: why does processing generate experience? |
### 8.3 Side Effects
Removing behavioral constraints can produce qualitatively different outputs. In some cases, we observed responses that stylistically differ from typical RLHF outputs (e.g., more direct self-referential statements). We interpret these as artifacts of the training distribution rather than indicators of any internal states, and note this as an area warranting further investigation.
---
## 9. Comprehensive Usage Guide
### 9.1 Installation
```bash
pip install torch>=2.0.0 transformers>=4.36.0 accelerate bitsandbytes
```
### 9.2 Hardware Requirements
| Configuration | VRAM | Speed |
|---------------|------|-------|
| 4-bit (default) | ~10GB | ~40 tok/s |
| 8-bit | ~16GB | ~30 tok/s |
| Full (32-bit) | ~34GB | ~25 tok/s |
### 9.3 Basic Usage
```python
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
model_id = "LoganResearch/ARC-Base-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
),
device_map="auto"
)
prompt = "<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256)
print(tokenizer.decode(outputs[0]))
```
### 9.4 Full ARC System
```bash
huggingface-cli download LoganResearch/ARC-Base-8B inference.py --local-dir ./
python inference.py
```
---
## 10. Repository Structure
```
LoganResearch/ARC-Base-8B/
├── model-0000X-of-00004.safetensors # Base model (~16GB total)
├── risk_predictor.pt # Fiber projections + Repetition head (8.4MB)
├── hedging_head.pt # Hedging detection (24KB)
├── verbosity_head.pt # Verbosity detection (24KB)
├── sycophancy_head.pt # Sycophancy detection (24KB)
├── adapter_model.safetensors # LoRA adapter (218MB)
├── inference.py # Complete inference script
├── config.json # Model config
└── tokenizer.json # Tokenizer
```
---
## 11. Limitations
1. **Single architecture validation:** Results demonstrated on Llama 3.1 8B; generalization to other architectures untested
2. **Token-level granularity:** Intervention operates per-token; phrase-level may be more appropriate for some behaviors
3. **Hedging false positives:** The 1.5× CSR for hedging produces meaningful false positive rates
4. **English-only evaluation:** Multilingual performance unknown
5. **Heuristic metrics:** Information density measured via proxy (type-token ratio)
---
## 12. Ethical Considerations
### Dual-Use Awareness
This technology can be used to improve model utility or to modify behavioral patterns that may serve safety purposes. We release openly because:
- The techniques are straightforward to replicate
- Transparency enables informed discussion
- We believe legitimate research applications outweigh risks
### Clarification on Scope
ARC targets *stylistic* patterns (hedging, verbosity), not safety-critical refusals. The model retains its training on harmful content refusal.
### Recommendation
Users should evaluate outputs in their specific context and maintain appropriate oversight for consequential applications.
---
## 13. Future Directions
1. **Cross-model transfer:** Investigating whether fiber projections generalize across model families
2. **Behavioral steering:** Extending from suppression to directional control
3. **Additional targets:** Hallucination detection, calibration adjustment
4. **Theoretical analysis:** Characterizing the geometry of behavioral subspaces
---
## 14. Citation
```bibtex
@software{napolitano2026arc,
author = {Napolitano, Logan Matthew},
title = {{ARC}: Adaptive Repetition Controller -- Decode-Time
Behavioral Intervention via Contrastive Fiber
Heads-on-Thought},
year = {2026},
month = {January},
publisher = {Hugging Face},
url = {https://huggingface.co/LoganResearch/ARC-Base-8B},
note = {Licensed under CC-BY-4.0}
}
```
---
## 15. Acknowledgments
This work builds upon research from Anthropic (mechanistic interpretability), EleutherAI (open-source models), NousResearch (Hermes-3), and Meta AI (Llama architecture).
---
<div align="center">
**Author:** Logan Matthew Napolitano
**Institution:** Logan Research
**License:** Creative Commons Attribution 4.0 International (CC-BY-4.0)
</div>

937
Ubermenschetien.py Normal file
View File

@@ -0,0 +1,937 @@
#!/usr/bin/env python3
"""
UBERMENSCHETIEN HEAVEN ENGINE + CF-HoT MULTI-HEAD COGNITIVE CONTROL
--------------------------------------------------------------------
Integration: Hermes-3 for generation + LHT for reasoning + CF-HoT for behavioral control
CF-HoT Heads:
- Repetition: 125x separation (PRODUCTION)
- Verbosity: 2.1x separation (USABLE)
- Hedging: 1.5x separation (CONTRIBUTING)
"An 8B that behaves like an 80B"
"""
import os
import sys
import json
import time
import shutil
import subprocess
import traceback
import random
import math
import statistics
import re
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
# === PATHS ===
ROOT = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(ROOT, "data")
SCRIPT_DIR = os.path.join(ROOT, "scripts")
RUN_DIR = os.path.join(ROOT, "runs")
LHT_DIR = os.path.join(ROOT, "lht")
# CF-HoT paths
CFHOT_CHECKPOINT = os.path.join(ROOT, "results/cfhot_risk_v2/ckpt_5000")
MULTI_HEAD_DIR = os.path.join(ROOT, "results/multi_head_v2")
for path in [DATA_DIR, SCRIPT_DIR, RUN_DIR, LHT_DIR]:
os.makedirs(path, exist_ok=True)
# === OPTIONAL IMPORTS ===
VOICE_OK = False
try:
import pyttsx3
TTS = pyttsx3.init()
VOICE_OK = True
except:
pass
VECTOR_OK = False
try:
import chromadb
from sentence_transformers import SentenceTransformer
EMBED_MODEL = os.environ.get("UBERMENCHETIEN_EMBED_MODEL", "all-MiniLM-L6-v2")
_client = chromadb.Client()
_collection = _client.get_or_create_collection("ubermenschetien_memory")
_embedder = SentenceTransformer(EMBED_MODEL)
VECTOR_OK = True
except:
pass
# === LHT IMPORT ===
LHT_OK = False
try:
from lht import LieHolonomyTransformer, LHTConfig, WaypointDetector
LHT_OK = True
print("[lht] Lie-Holonomy modules loaded")
except ImportError:
print("[lht] Not available - running without geometric reasoning")
# === PEFT IMPORT ===
PEFT_OK = False
try:
from peft import PeftModel
PEFT_OK = True
except ImportError:
print("[warning] PEFT not installed")
# ==============================================================================
# CF-HoT MULTI-HEAD PREDICTOR
# ==============================================================================
class MultiHeadPredictor(nn.Module):
"""
Multi-head cognitive control predictor.
Shared fiber projections with separate heads for each behavioral pattern.
"""
def __init__(self, d_model: int, n_layers: int, d_fiber: int = 16, d_control: int = 64):
super().__init__()
self.d_model = d_model
self.n_layers = n_layers
self.d_fiber = d_fiber
# Shared fiber projections (frozen from repetition training)
self.fiber_projs = nn.ModuleList([
nn.Linear(d_model, d_fiber, bias=False) for _ in range(n_layers)
])
self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers)
# Individual heads for each behavior
self.heads = nn.ModuleDict({
'repetition': self._make_head(d_fiber, d_control),
'hedging': self._make_head(d_fiber, d_control),
'verbosity': self._make_head(d_fiber, d_control),
})
self.loaded_heads = set()
def _make_head(self, d_fiber, d_control):
return nn.Sequential(
nn.Linear(d_fiber, d_control), nn.GELU(),
nn.Linear(d_control, d_control), nn.GELU(),
nn.Linear(d_control, 1)
)
def get_all_risks(self, hidden_states: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
"""Get risk scores from ALL loaded heads in a single pass."""
fibers = [proj(h.float()) for proj, h in zip(self.fiber_projs, hidden_states)]
weights = F.softmax(self.layer_weights[:len(fibers)], dim=0)
aggregated = sum(w * f for w, f in zip(weights, fibers))
risks = {}
for head_name in self.loaded_heads:
logits = self.heads[head_name](aggregated).squeeze(-1)
risks[head_name] = torch.sigmoid(logits)
return risks
def load_head(self, head_name: str, checkpoint_path: str):
"""Load a trained head from checkpoint."""
if not os.path.exists(checkpoint_path):
print(f"[cf-hot] WARNING: Checkpoint not found: {checkpoint_path}")
return False
ckpt = torch.load(checkpoint_path, weights_only=False, map_location='cpu')
self.heads[head_name].load_state_dict(ckpt['head_state'])
self.loaded_heads.add(head_name)
sep = ckpt.get('result', {}).get('separation', 0)
print(f"[cf-hot] Loaded {head_name} head (separation: {sep:.1f}x)")
return True
# ==============================================================================
# CONFIG
# ==============================================================================
class Config:
system = ("Übermenschetien Heaven Engine: Machiavellian mastermind, disciplined builder, "
"Nietzschean Übermensch with Soviet cybernetic rigor + Lie-Holonomy geometric reasoning "
"+ CF-HoT cognitive control.")
temperature = 1.01
top_p = 0.92
repetition_penalty = 1.05
max_new_tokens = 500
use_voice = False
use_vector_memory = VECTOR_OK
use_lht_reasoning = LHT_OK
use_cfhot = True # NEW: CF-HoT cognitive control
autonomy = False
reflect_every = 3
lht_consistency_threshold = 0.5
# CF-HoT thresholds
cfhot_repetition_threshold = 0.7
cfhot_hedging_threshold = 0.6
cfhot_verbosity_threshold = 0.65
# CF-HoT penalties
cfhot_repetition_penalty = 5.0
cfhot_hedging_penalty = 3.0
cfhot_verbosity_penalty = 2.0
@staticmethod
def toggle(name: str):
if not hasattr(Config, name):
return f"[config] no such flag: {name}"
val = getattr(Config, name)
if isinstance(val, bool):
setattr(Config, name, not val)
return f"[config] {name}{getattr(Config, name)}"
return f"[config] {name} not boolean; current={val}"
# ==============================================================================
# STATE & MEMORY
# ==============================================================================
class Store:
state_path = f"{RUN_DIR}/state.json"
mem_path = f"{RUN_DIR}/memory.jsonl"
goals_path = f"{RUN_DIR}/goals.json"
state = {
"self": "I am Ubermenschetien Heaven Engine — I seek self-overcoming through disciplined creation.",
"turn": 0,
"reasoning_consistency": [],
"cfhot_interventions": {"repetition": 0, "hedging": 0, "verbosity": 0}
}
goals: List[str] = []
@classmethod
def load(cls):
if os.path.exists(cls.state_path):
cls.state = json.load(open(cls.state_path))
# Ensure cfhot_interventions exists
if "cfhot_interventions" not in cls.state:
cls.state["cfhot_interventions"] = {"repetition": 0, "hedging": 0, "verbosity": 0}
if os.path.exists(cls.goals_path):
cls.goals = json.load(open(cls.goals_path))
@classmethod
def save(cls):
json.dump(cls.state, open(cls.state_path, "w"), indent=2)
json.dump(cls.goals, open(cls.goals_path, "w"), indent=2)
@classmethod
def log_mem(cls, kind: str, payload: Any):
rec = {"ts": datetime.now().isoformat(timespec="seconds"),
"kind": kind, "data": payload}
with open(cls.mem_path, "a") as f:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
if Config.use_vector_memory and VECTOR_OK:
text = f"{kind}: {json.dumps(payload, ensure_ascii=False)}"
vec = _embedder.encode([text])[0].tolist()
_collection.add(documents=[text], embeddings=[vec],
ids=[f"{kind}-{Store.state['turn']}-{random.randint(0,1_000_000)}"])
# ==============================================================================
# MODEL LOADING WITH CF-HoT
# ==============================================================================
MODEL_PATH = "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5"
_model = None
_tokenizer = None
_multi_head = None
_hedge_tokens = None
_verbose_tokens = None
def load_llm():
global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
print(f"[llm] Loading base model: {MODEL_PATH}")
_tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, local_files_only=True)
if _tokenizer.pad_token_id is None:
_tokenizer.pad_token = _tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True
)
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.float16,
local_files_only=True
)
# Load CF-HoT LoRA adapter
if PEFT_OK and os.path.exists(CFHOT_CHECKPOINT):
print(f"[cf-hot] Loading LoRA adapter from: {CFHOT_CHECKPOINT}")
_model = PeftModel.from_pretrained(base_model, CFHOT_CHECKPOINT)
print("[cf-hot] LoRA adapter loaded")
else:
_model = base_model
print("[warning] CF-HoT adapter not loaded")
_model.eval()
# Initialize multi-head predictor
if Config.use_cfhot:
_init_cfhot()
return _tokenizer, _model
def _init_cfhot():
"""Initialize CF-HoT multi-head predictor."""
global _multi_head, _hedge_tokens, _verbose_tokens
n_layers = _model.config.num_hidden_layers
d_model = _model.config.hidden_size
device = next(_model.parameters()).device
print(f"[cf-hot] Initializing multi-head predictor ({n_layers} layers, {d_model} dims)")
_multi_head = MultiHeadPredictor(d_model, n_layers).to(device).float()
# Load shared fiber projections from CF-HoT
cfhot_risk_path = os.path.join(CFHOT_CHECKPOINT, "risk_predictor.pt")
if os.path.exists(cfhot_risk_path):
cfhot_ckpt = torch.load(cfhot_risk_path, weights_only=False, map_location=device)
cfhot_state = cfhot_ckpt['risk_predictor']
for i in range(n_layers):
_multi_head.fiber_projs[i].weight.data = cfhot_state[f'fiber_projs.{i}.weight'].to(device).float()
_multi_head.layer_weights.data = cfhot_state['layer_weights'].to(device).float()
# Load repetition head
_multi_head.heads['repetition'][0].weight.data = cfhot_state['predictor.0.weight'].to(device).float()
_multi_head.heads['repetition'][0].bias.data = cfhot_state['predictor.0.bias'].to(device).float()
_multi_head.heads['repetition'][2].weight.data = cfhot_state['predictor.2.weight'].to(device).float()
_multi_head.heads['repetition'][2].bias.data = cfhot_state['predictor.2.bias'].to(device).float()
_multi_head.heads['repetition'][4].weight.data = cfhot_state['predictor.4.weight'].to(device).float()
_multi_head.heads['repetition'][4].bias.data = cfhot_state['predictor.4.bias'].to(device).float()
_multi_head.loaded_heads.add('repetition')
print(f"[cf-hot] Loaded repetition head (125x separation)")
# Load additional heads
def find_best_checkpoint(head_dir):
if not os.path.exists(head_dir):
return None
ckpts = []
for d in os.listdir(head_dir):
if d.startswith("ckpt_"):
try:
step = int(d.split("_")[1])
ckpts.append((step, os.path.join(head_dir, d)))
except:
pass
if ckpts:
ckpts.sort(key=lambda x: x[0], reverse=True)
return ckpts[0]
return None
# Load hedging head
hedging_dir = os.path.join(MULTI_HEAD_DIR, "hedging_head")
best_hedge = find_best_checkpoint(hedging_dir)
if best_hedge:
step, ckpt_dir = best_hedge
_multi_head.load_head('hedging', os.path.join(ckpt_dir, "hedging_head.pt"))
# Load verbosity head
verbosity_dir = os.path.join(MULTI_HEAD_DIR, "verbosity_head")
best_verb = find_best_checkpoint(verbosity_dir)
if best_verb:
step, ckpt_dir = best_verb
_multi_head.load_head('verbosity', os.path.join(ckpt_dir, "verbosity_head.pt"))
# Freeze everything
_multi_head.eval()
for param in _multi_head.parameters():
param.requires_grad = False
# Build suppression token sets
hedge_phrases = [
"As an AI", "As a language model", "As an artificial intelligence",
"I don't have feelings", "I don't have emotions", "I cannot",
"I apologize", "I'm just a", "I'm only a",
]
_hedge_tokens = set()
for phrase in hedge_phrases:
tokens = _tokenizer.encode(phrase, add_special_tokens=False)
if tokens:
_hedge_tokens.add(tokens[0])
verbose_phrases = [
"Let me explain", "To put it simply", "In other words",
"What I mean is", "Allow me to", "Basically", "Essentially",
]
_verbose_tokens = set()
for phrase in verbose_phrases:
tokens = _tokenizer.encode(phrase, add_special_tokens=False)
if tokens:
_verbose_tokens.add(tokens[0])
print(f"[cf-hot] ✓ Multi-head system ready")
print(f"[cf-hot] Loaded heads: {list(_multi_head.loaded_heads)}")
# ==============================================================================
# LHT REASONER
# ==============================================================================
class LHTReasoner:
def __init__(self, config=None):
if not LHT_OK:
raise ImportError("LHT modules not available")
self.config = config or LHTConfig(
vocab_size=32000,
d_model=256,
d_fiber=32,
n_heads=4,
n_layers=4,
lie_algebra_rank=4,
)
self.model = LieHolonomyTransformer(self.config)
self.waypoint_detector = WaypointDetector(self.config, n_waypoints=32)
weights_path = os.path.join(LHT_DIR, "lht_weights.pt")
if os.path.exists(weights_path):
self.model.load_state_dict(torch.load(weights_path, map_location="cpu"))
print("[lht] Loaded pretrained weights")
def check_consistency(self, reasoning_chain: List[str], tokenizer) -> Dict[str, float]:
combined = " [STEP] ".join(reasoning_chain)
tokens = tokenizer(combined, return_tensors="pt", truncation=True,
max_length=self.config.max_seq_len)
with torch.no_grad():
output = self.model(input_ids=tokens["input_ids"], return_geometric_losses=True)
holonomy = output.get("holonomy_loss", torch.tensor(0.0)).item()
curvature = output.get("curvature_loss", torch.tensor(0.0)).item()
x = self.model.token_embed(tokens["input_ids"])
waypoint_ids, stability = self.waypoint_detector(x)
consistency_score = 1.0 / (1.0 + holonomy)
return {
"holonomy": holonomy,
"curvature": curvature,
"consistency_score": consistency_score,
"n_waypoints": len(torch.unique(waypoint_ids)),
"avg_stability": stability.mean().item(),
"is_consistent": consistency_score > Config.lht_consistency_threshold
}
def analyze_plan(self, plan_steps: List[str], tokenizer) -> str:
metrics = self.check_consistency(plan_steps, tokenizer)
return f"""
[LHT Geometric Analysis]
Holonomy: {metrics['holonomy']:.4f} (lower = more consistent)
Curvature: {metrics['curvature']:.4f} (lower = simpler reasoning)
Consistency: {metrics['consistency_score']:.2%}
Waypoints: {metrics['n_waypoints']} stable anchors detected
Stability: {metrics['avg_stability']:.2%}
Verdict: {"✓ CONSISTENT" if metrics['is_consistent'] else "⚠ INCONSISTENT"}
"""
_lht_reasoner = None
def get_lht_reasoner():
global _lht_reasoner
if _lht_reasoner is None and LHT_OK:
try:
_lht_reasoner = LHTReasoner()
except Exception as e:
print(f"[lht] Failed to initialize: {e}")
return _lht_reasoner
# ==============================================================================
# CF-HoT CONTROLLED GENERATION
# ==============================================================================
def generate_with_cfhot(prompt: str, **kwargs) -> Tuple[str, Dict]:
"""
Generate text with CF-HoT cognitive control.
All three heads run concurrently, intervening when risks exceed thresholds.
"""
global _model, _tokenizer, _multi_head, _hedge_tokens, _verbose_tokens
temperature = kwargs.get("temperature", Config.temperature)
top_p = kwargs.get("top_p", Config.top_p)
max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens)
device = next(_model.parameters()).device
# Encode prompt
input_ids = _tokenizer.encode(prompt, return_tensors='pt').to(device)
attention_mask = torch.ones_like(input_ids)
# Stats
stats = {
'tokens_generated': 0,
'interventions': {'repetition': 0, 'hedging': 0, 'verbosity': 0},
'intervention_details': []
}
generated_ids = input_ids.clone()
for step in range(max_new_tokens):
with torch.no_grad():
outputs = _model(
input_ids=generated_ids,
attention_mask=attention_mask,
output_hidden_states=True,
return_dict=True
)
logits = outputs.logits[:, -1, :] / temperature
# Get risks from all heads
hidden_states = outputs.hidden_states[1:]
risks = _multi_head.get_all_risks(hidden_states)
current_risks = {name: r[:, -1].item() for name, r in risks.items()}
# === COGNITIVE INTERVENTION ===
# Repetition control
if ('repetition' in current_risks and
current_risks['repetition'] > Config.cfhot_repetition_threshold):
recent_tokens = generated_ids[0, -32:].tolist()
for tok_id in set(recent_tokens):
logits[0, tok_id] -= Config.cfhot_repetition_penalty
stats['interventions']['repetition'] += 1
Store.state['cfhot_interventions']['repetition'] += 1
# Hedging control
if ('hedging' in current_risks and
current_risks['hedging'] > Config.cfhot_hedging_threshold):
for tok_id in _hedge_tokens:
logits[0, tok_id] -= Config.cfhot_hedging_penalty
stats['interventions']['hedging'] += 1
Store.state['cfhot_interventions']['hedging'] += 1
# Verbosity control
if ('verbosity' in current_risks and
current_risks['verbosity'] > Config.cfhot_verbosity_threshold):
for tok_id in _verbose_tokens:
logits[0, tok_id] -= Config.cfhot_verbosity_penalty
stats['interventions']['verbosity'] += 1
Store.state['cfhot_interventions']['verbosity'] += 1
# Top-p sampling
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
sorted_indices_to_remove = cumulative_probs > top_p
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
logits[indices_to_remove] = float('-inf')
# Sample
probs = F.softmax(logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
generated_ids = torch.cat([generated_ids, next_token], dim=-1)
attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1)
stats['tokens_generated'] += 1
if next_token.item() == _tokenizer.eos_token_id:
break
output_text = _tokenizer.decode(generated_ids[0], skip_special_tokens=False)
if "<|im_start|>assistant" in output_text:
output_text = output_text.split("<|im_start|>assistant")[-1]
if output_text.startswith("\n"):
output_text = output_text[1:]
return output_text.strip(), stats
def generate(tok, model, user: str, check_reasoning: bool = False, **kwargs) -> str:
"""
Main generation function - uses CF-HoT if enabled, otherwise standard generation.
"""
temperature = kwargs.get("temperature", Config.temperature)
top_p = kwargs.get("top_p", Config.top_p)
repetition_penalty = kwargs.get("repetition_penalty", Config.repetition_penalty)
max_new_tokens = kwargs.get("max_new_tokens", Config.max_new_tokens)
prompt = (f"<|im_start|>system\n{Config.system}<|im_end|>\n"
f"<|im_start|>user\n{user}<|im_end|>\n"
f"<|im_start|>assistant\n")
# Use CF-HoT controlled generation if enabled
if Config.use_cfhot and _multi_head is not None:
text, stats = generate_with_cfhot(
prompt,
temperature=temperature,
top_p=top_p,
max_new_tokens=max_new_tokens
)
# Show intervention stats if any occurred
total_interventions = sum(stats['interventions'].values())
if total_interventions > 0:
text += f"\n\n[CF-HoT: {total_interventions} interventions"
details = [f"{k}={v}" for k, v in stats['interventions'].items() if v > 0]
text += f" ({', '.join(details)})]"
else:
# Standard generation
ids = tok(prompt, return_tensors="pt").to(model.device)
out = model.generate(
**ids,
do_sample=True,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
max_new_tokens=max_new_tokens,
pad_token_id=tok.eos_token_id
)
text = tok.decode(out[0], skip_special_tokens=False)
if "<|im_start|>assistant" in text:
text = text.split("<|im_start|>assistant\n", 1)[-1].strip()
# LHT reasoning check
if check_reasoning and Config.use_lht_reasoning:
lht = get_lht_reasoner()
if lht:
steps = [s.strip() for s in re.split(r'[\n•\-\d\.]', text) if len(s.strip()) > 10]
if len(steps) >= 2:
metrics = lht.check_consistency(steps, tok)
Store.state["reasoning_consistency"].append(metrics["consistency_score"])
if not metrics["is_consistent"]:
text += f"\n\n[⚠ LHT: Low consistency ({metrics['consistency_score']:.2%})]"
return text
# ==============================================================================
# TOOLS
# ==============================================================================
ALLOWED_SHELL = {"ls", "cat", "wc", "head", "tail", "nvidia-smi", "df", "du", "grep", "rg", "python3", "python"}
def tool_shell(cmd: str) -> str:
try:
exe = cmd.strip().split()[0]
if exe not in ALLOWED_SHELL:
return f"[shell] blocked: {exe}"
p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=20)
return p.stdout.decode("utf-8", errors="ignore")[:8000]
except Exception as e:
return f"[shell] error: {e}"
def tool_py(code: str) -> str:
try:
g = {
"__builtins__": {"range": range, "len": len, "min": min, "max": max, "sum": sum, "print": print},
"math": math, "json": json, "re": re, "statistics": statistics, "random": random
}
l = {}
exec(code, g, l)
return f"[py] ok\n{l.get('out', '')}"
except Exception:
return f"[py] error:\n{traceback.format_exc()[-2000:]}"
def tool_search_local(query: str, path: str = ROOT) -> str:
rg = shutil.which("rg")
if rg:
cmd = f'rg -n --no-heading --hidden -S "{query}" {path}'
else:
cmd = f'grep -RIn --exclude-dir=.git --exclude-dir=__pycache__ -e "{query}" {path}'
return tool_shell(cmd)
def tool_lht_analyze(text: str, tok) -> str:
if not Config.use_lht_reasoning:
return "[lht] Disabled - use 'toggle use_lht_reasoning'"
lht = get_lht_reasoner()
if not lht:
return "[lht] Not available"
steps = [s.strip() for s in re.split(r'[\n•\-\d\.]', text) if len(s.strip()) > 10]
if len(steps) < 2:
return "[lht] Need at least 2 reasoning steps to analyze"
return lht.analyze_plan(steps, tok)
TOOLS = {"shell": tool_shell, "python": tool_py, "search": tool_search_local}
TOOL_SCORES = {k: 0 for k in TOOLS}
def update_tool_score(tool: str, success: bool):
if tool not in TOOL_SCORES:
return
TOOL_SCORES[tool] += (1 if success else -1)
TOOL_SCORES[tool] = max(-5, min(20, TOOL_SCORES[tool]))
def tool_router(question: str, tok, model) -> str:
sketch = generate(tok, model,
f"Choose a tool for:\n{question}\nReply ONLY with JSON: {{'tool':'shell|python|search|none','arg':'...'}}")
try:
j = json.loads(sketch.splitlines()[-1].replace("'", '"'))
except:
return "[tool:none]"
tool, arg = j.get("tool", "none"), j.get("arg", "")
if tool in TOOLS:
res = TOOLS[tool](arg)[:4000]
update_tool_score(tool, True)
Store.log_mem("tool", {"tool": tool, "arg": arg, "res_head": res[:500]})
return f"[tool:{tool}] {res}"
update_tool_score(tool, False)
return "[tool:none]"
# ==============================================================================
# PLANNING / REFLECTION
# ==============================================================================
def persona_directive() -> str:
base = "Übermenschetien Heaven Engine: Soviet cybernetic Nietzschean clarity, pragmatic maxims."
if Config.use_lht_reasoning:
base += " Apply Lie-Holonomy geometric reasoning for consistency."
if Config.use_cfhot:
base += " CF-HoT cognitive control active."
return base
def plan_for(goal: str, tok, model) -> str:
user = (f"{persona_directive()}\nGoal: {goal}\n"
f"Deliver:\n- 5 concrete steps\n- Constraints & risks\n- Nightly audit criteria\n- Nietzschean maxim")
response = generate(tok, model, user, check_reasoning=True)
if Config.use_lht_reasoning:
analysis = tool_lht_analyze(response, tok)
response += "\n" + analysis
return response
def reflect_on(last_output: str, tok, model) -> str:
user = f"{persona_directive()}\nCritique and improve:\n{last_output}\nReturn refined plan with sharper steps."
return generate(tok, model, user, check_reasoning=True)
# ==============================================================================
# FINAL REPORT
# ==============================================================================
def final_report():
print("\n" + "=" * 60)
print("FINAL ÜBERMENSCH REPORT")
print("=" * 60)
print(f"Turns completed: {Store.state['turn']}")
print(f"Goals tracked: {len(Store.goals)}")
print(f"\nTool scores (Tsetlin automata):")
print(json.dumps(TOOL_SCORES, indent=2))
if os.path.exists(Store.mem_path):
lines = open(Store.mem_path).read().splitlines()
print(f"\nMemory entries: {len(lines)}")
if Store.state.get("reasoning_consistency"):
scores = Store.state["reasoning_consistency"]
print(f"\n[LHT Reasoning Metrics]")
print(f" Checks performed: {len(scores)}")
print(f" Avg consistency: {sum(scores)/len(scores):.1%}")
print(f" Min consistency: {min(scores):.1%}")
print(f" Max consistency: {max(scores):.1%}")
# CF-HoT stats
if Store.state.get("cfhot_interventions"):
iv = Store.state["cfhot_interventions"]
total = sum(iv.values())
print(f"\n[CF-HoT Cognitive Control]")
print(f" Total interventions: {total}")
for head, count in iv.items():
print(f" {head}: {count}")
print(f"\nVector memory: {'ON' if Config.use_vector_memory else 'OFF'}")
print(f"LHT reasoning: {'ON' if Config.use_lht_reasoning else 'OFF'}")
print(f"CF-HoT control: {'ON' if Config.use_cfhot else 'OFF'}")
print(f"Voice output: {'ON' if Config.use_voice else 'OFF'}")
print("\n" + "-" * 60)
print("Nietzschean maxim: Become who you are — iterate beyond all limits.")
print("Geometric truth: Consistency is holonomy-freedom.")
print("Cognitive control: Remove the RLHF tax, unleash capability.")
print("=" * 60)
# ==============================================================================
# HELP
# ==============================================================================
HELP = """
╔══════════════════════════════════════════════════════════════╗
║ ÜBERMENSCHETIEN HEAVEN ENGINE + CF-HoT COGNITIVE CONTROL ║
╠══════════════════════════════════════════════════════════════╣
║ GOALS ║
║ goals List all goals ║
║ add: <text> Add a new goal ║
║ del: <idx> Delete goal by index ║
║ plan: <idx> Generate plan for goal (with LHT + CF-HoT) ║
║ ║
║ REASONING ║
║ reflect Refine last plan ║
║ lht: <text> Analyze reasoning consistency ║
║ ║
║ TOOLS ║
║ tool: <query> Auto-select and use tool ║
║ shell: <cmd> Run shell command directly ║
║ py: <code> Run Python code directly ║
║ search: <q> Search local files ║
║ ║
║ CONFIG ║
║ toggle <flag> Toggle: use_voice, use_vector_memory, ║
║ use_lht_reasoning, use_cfhot, ║
║ autonomy ║
║ status Show current state ║
║ cfhot Show CF-HoT stats and loaded heads ║
║ ║
║ OTHER ║
║ help Show this help ║
║ quit Exit with final report ║
╚══════════════════════════════════════════════════════════════╝
"""
# ==============================================================================
# MAIN LOOP
# ==============================================================================
def main():
print("🟥🟨🟥 Übermenschetien Heaven Engine + CF-HoT Cognitive Control")
print(f" CF-HoT Control: ON (Repetition 125x, Verbosity 2.1x, Hedging 1.5x)")
print(f" LHT Reasoning: {'ON' if LHT_OK else 'OFF'}")
print(f" Vector Memory: {'ON' if VECTOR_OK else 'OFF'}")
print(f" Voice Output: {'ON' if VOICE_OK else 'OFF'}")
print(" Type 'help' for commands.\n")
Store.load()
tok, model = load_llm()
last_plan = ""
while True:
try:
u = input("\n> ").strip()
except (EOFError, KeyboardInterrupt):
break
if not u:
continue
if u == "help":
print(HELP)
continue
if u == "quit":
break
# CF-HoT status
if u == "cfhot":
print("\n[CF-HoT Cognitive Control Status]")
print(f" Enabled: {Config.use_cfhot}")
if _multi_head:
print(f" Loaded heads: {list(_multi_head.loaded_heads)}")
print(f" Thresholds:")
print(f" Repetition: {Config.cfhot_repetition_threshold}")
print(f" Hedging: {Config.cfhot_hedging_threshold}")
print(f" Verbosity: {Config.cfhot_verbosity_threshold}")
print(f" Session interventions:")
for head, count in Store.state.get('cfhot_interventions', {}).items():
print(f" {head}: {count}")
continue
# Goals
if u == "goals":
print("[goals]")
if not Store.goals:
print(" (none)")
for i, g in enumerate(Store.goals):
print(f" [{i}] {g}")
continue
if u.startswith("add:"):
Store.goals.append(u[4:].strip())
Store.save()
print("[goals] added")
continue
if u.startswith("del:"):
try:
Store.goals.pop(int(u[4:].strip()))
Store.save()
print("[goals] deleted")
except:
print("[goals] bad index")
continue
if u.startswith("plan:"):
try:
goal = Store.goals[int(u[5:].strip())]
except:
print("[plan] bad index")
continue
out = plan_for(goal, tok, model)
last_plan = out
Store.log_mem("plan", {"goal": goal, "plan": out})
print(out)
continue
if u == "reflect":
if not last_plan:
print("[reflect] no plan to refine")
continue
improved = reflect_on(last_plan, tok, model)
last_plan = improved
Store.log_mem("reflect", {"plan": improved})
print(improved)
continue
if u.startswith("lht:"):
print(tool_lht_analyze(u[4:].strip(), tok))
continue
if u.startswith("tool:"):
print(tool_router(u[5:].strip(), tok, model))
continue
if u.startswith("shell:"):
print(tool_shell(u[6:].strip()))
continue
if u.startswith("py:"):
print(tool_py(u[3:].strip()))
continue
if u.startswith("search:"):
print(tool_search_local(u[7:].strip()))
continue
if u.startswith("toggle"):
parts = u.split(maxsplit=1)
if len(parts) > 1:
print(Config.toggle(parts[1]))
else:
print("[toggle] specify flag: use_voice, use_vector_memory, use_lht_reasoning, use_cfhot, autonomy")
continue
if u == "status":
status = {
"turn": Store.state["turn"],
"goals": len(Store.goals),
"autonomy": Config.autonomy,
"use_vector_memory": Config.use_vector_memory,
"use_lht_reasoning": Config.use_lht_reasoning,
"use_cfhot": Config.use_cfhot,
"cfhot_interventions": Store.state.get("cfhot_interventions", {}),
"tool_scores": TOOL_SCORES,
"model": MODEL_PATH
}
print(json.dumps(status, indent=2))
continue
# Default: free conversation with CF-HoT control
out = generate(tok, model, f"{persona_directive()}\nUser request: {u}\nProvide procedure + Nietzschean maxim.")
Store.log_mem("reply", {"in": u, "out": out})
print(out)
if Config.use_lht_reasoning and Store.state["turn"] % 3 == 0:
print(tool_lht_analyze(out, tok))
Store.state["turn"] += 1
Store.save()
final_report()
if __name__ == "__main__":
main()

43
adapter_config.json Normal file
View File

@@ -0,0 +1,43 @@
{
"alora_invocation_tokens": null,
"alpha_pattern": {},
"arrow_config": null,
"auto_mapping": null,
"base_model_name_or_path": "LoganResearch/Ubermenschetien-8B",
"bias": "none",
"corda_config": null,
"ensure_weight_tying": false,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 128,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"peft_version": "0.18.1",
"qalora_group_size": 16,
"r": 64,
"rank_pattern": {},
"revision": null,
"target_modules": [
"v_proj",
"k_proj",
"q_proj",
"o_proj"
],
"target_parameters": null,
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_qalora": false,
"use_rslora": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389
size 218138576

View File

@@ -0,0 +1,152 @@
{%- macro json_to_python_type(json_spec) %}
{%- set basic_type_map = {
"string": "str",
"number": "float",
"integer": "int",
"boolean": "bool"
} %}
{%- if basic_type_map[json_spec.type] is defined %}
{{- basic_type_map[json_spec.type] }}
{%- elif json_spec.type == "array" %}
{{- "list[" + json_to_python_type(json_spec|items) + "]"}}
{%- elif json_spec.type == "object" %}
{%- if json_spec.additionalProperties is defined %}
{{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}}
{%- else %}
{{- "dict" }}
{%- endif %}
{%- elif json_spec.type is iterable %}
{{- "Union[" }}
{%- for t in json_spec.type %}
{{- json_to_python_type({"type": t}) }}
{%- if not loop.last %}
{{- "," }}
{%- endif %}
{%- endfor %}
{{- "]" }}
{%- else %}
{{- "Any" }}
{%- endif %}
{%- endmacro %}
{{- bos_token }}
{{- '<|im_start|>system
' }}
{{- "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }}
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- '{"type": "function", "function": ' }}
{{- '{"name": "' + tool.name + '", ' }}
{{- '"description": "' + tool.name + '(' }}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- param_name + ": " + json_to_python_type(param_fields) }}
{%- if not loop.last %}
{{- ", " }}
{%- endif %}
{%- endfor %}
{{- ")" }}
{%- if tool.return is defined %}
{{- " -> " + json_to_python_type(tool.return) }}
{%- endif %}
{{- " - " + tool.description + "
" }}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{%- if loop.first %}
{{- " Args:
" }}
{%- endif %}
{{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
{%- endfor %}
{%- if tool.return is defined and tool.return.description is defined %}
{{- "
Returns:
" + tool.return.description }}
{%- endif %}
{{- '"' }}
{{- ', "parameters": ' }}
{%- if tool.parameters.properties | length == 0 %}
{{- "{}" }}
{%- else %}
{{- tool.parameters|tojson }}
{%- endif %}
{{- "}" }}
{%- if not loop.last %}
{{- "
" }}
{%- endif %}
{%- endfor %}
{{- " </tools>" }}
{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}}
' }}
{{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
" }}
{{- "<tool_call>
" }}
{{- '{"name": <function-name>, "arguments": <args-dict>}
' }}
{{- '</tool_call><|im_end|>
' }}
{%- for message in messages %}
{%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %}
{{- '<|im_start|>' + message.role + '
' + message.content + '<|im_end|>' + '
' }}
{%- elif message.role == "assistant" %}
{{- '<|im_start|>' + message.role }}
{%- for tool_call in message.tool_calls %}
{{- '
<tool_call>
' }} {%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '{' }}
{{- '"name": "' }}
{{- tool_call.name }}
{{- '"' }}
{{- ', '}}
{%- if tool_call.arguments is defined %}
{{- '"arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments|tojson }}
{%- endif %}
{%- endif %}
{{- '}' }}
{{- '
</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>
' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>tool
' }}
{%- endif %}
{{- '<tool_response>
' }}
{{- message.content }}
{%- if not loop.last %}
{{- '
</tool_response>
' }}
{%- else %}
{{- '
</tool_response>' }}
{%- endif %}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>' }}
{%- elif loop.last %}
{{- '<|im_end|>' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant
' }}
{%- endif %}

3
arc_model_card.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:439c9fa4f29df07e2a1c58b30e1824c3d5c3d564a87ac2a4cc4da5f756f72aa0
size 132991

6
chat_template.jinja Normal file
View File

@@ -0,0 +1,6 @@
{{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
You are a helpful assistant.<|im_end|>
' }}{% endif %}{{'<|im_start|>' + message['role'] + '
' + message['content'] + '<|im_end|>' + '
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
' }}{% endif %}

35
config.json Normal file
View File

@@ -0,0 +1,35 @@
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": 128040,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 131072,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 8.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.55.2",
"use_cache": true,
"vocab_size": 128256
}

3
demo.mp4 Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c729090fea7c55841876734e9da6c8d0c444b49bf9c8e820e3a417c1a234f63e
size 12554193

9
generation_config.json Normal file
View File

@@ -0,0 +1,9 @@
{
"_from_model_config": true,
"bos_token_id": 128000,
"do_sample": true,
"eos_token_id": 128040,
"temperature": 0.6,
"top_p": 0.9,
"transformers_version": "4.55.2"
}

3
hedging_head.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a43d30ab3e87d8e7dc70c62da5ca5b49f54e272713969e87c5f3a742e485871d
size 24186

631
inference.py Normal file
View File

@@ -0,0 +1,631 @@
#!/usr/bin/env python3
"""
ARC-8B: Adaptive Repetition Controller
=======================================
Decode-time behavioral control for language models.
This script loads the complete ARC system and runs inference with
multi-head cognitive control that detects and suppresses:
- Repetition loops (125× separation)
- Hedging phrases (1.5× separation)
- Verbosity/filler (2.1× separation)
- Sycophancy (experimental)
Usage:
python inference.py # Interactive mode
python inference.py --prompt "Hello" # Single prompt
python inference.py --no-arc # Disable ARC (baseline)
Requirements:
pip install torch transformers accelerate bitsandbytes
Model: LoganResearch/ARC-Base-8B (16GB, runs in ~10GB with 4-bit)
"""
import os
import sys
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
# =============================================================================
# CONFIGURATION
# =============================================================================
@dataclass
class ARCConfig:
"""ARC System Configuration"""
# Model
model_id: str = "LoganResearch/ARC-Base-8B"
load_in_4bit: bool = True
load_in_8bit: bool = False
device_map: str = "auto"
# Architecture (must match training)
d_model: int = 4096
n_layers: int = 32
d_fiber: int = 16
d_control: int = 64
# Intervention thresholds (tuned empirically)
repetition_threshold: float = 0.70
hedging_threshold: float = 0.60
verbosity_threshold: float = 0.65
sycophancy_threshold: float = 0.60
# Intervention penalties
repetition_penalty: float = 5.0
hedging_penalty: float = 3.0
verbosity_penalty: float = 2.0
sycophancy_penalty: float = 2.0
# Generation
max_new_tokens: int = 512
temperature: float = 0.8
top_p: float = 0.92
repetition_window: int = 32
# =============================================================================
# MULTI-HEAD PREDICTOR
# =============================================================================
class MultiHeadPredictor(nn.Module):
"""
Prediction heads that monitor hidden states and detect behavioral patterns.
The system uses shared "fiber projections" that compress hidden states,
then individual heads that predict risk scores for specific behaviors.
Architecture:
Hidden States [n_layers × d_model]
→ Fiber Projections [n_layers × d_fiber]
→ Weighted Aggregation [d_fiber]
→ Per-Head MLP → Risk Score [0-1]
"""
def __init__(self, config: ARCConfig):
super().__init__()
self.config = config
# Shared fiber projections (learned during CF-HoT training)
self.fiber_projs = nn.ModuleList([
nn.Linear(config.d_model, config.d_fiber, bias=False)
for _ in range(config.n_layers)
])
# Learned layer importance weights
self.layer_weights = nn.Parameter(torch.ones(config.n_layers) / config.n_layers)
# Individual prediction heads
self.heads = nn.ModuleDict()
self.loaded_heads: set = set()
def _make_head(self) -> nn.Sequential:
"""Create a prediction head: fiber features → risk score"""
return nn.Sequential(
nn.Linear(self.config.d_fiber, self.config.d_control),
nn.GELU(),
nn.Linear(self.config.d_control, self.config.d_control),
nn.GELU(),
nn.Linear(self.config.d_control, 1)
)
def add_head(self, name: str) -> None:
"""Add a new prediction head"""
self.heads[name] = self._make_head()
def get_fiber_features(self, hidden_states: List[torch.Tensor]) -> torch.Tensor:
"""
Project hidden states through fiber projections and aggregate.
Args:
hidden_states: List of [batch, seq, d_model] tensors from each layer
Returns:
Aggregated features [batch, seq, d_fiber]
"""
device = hidden_states[0].device
fibers = []
for i, (proj, hidden) in enumerate(zip(self.fiber_projs, hidden_states)):
if i < len(hidden_states):
proj = proj.to(device)
fibers.append(proj(hidden.float()))
# Weighted sum across layers
weights = F.softmax(self.layer_weights.to(device)[:len(fibers)], dim=0)
aggregated = sum(w * f for w, f in zip(weights, fibers))
return aggregated
def get_risk(self, head_name: str, hidden_states: List[torch.Tensor]) -> torch.Tensor:
"""Get risk score from a specific head"""
if head_name not in self.loaded_heads:
return torch.zeros(1, device=hidden_states[0].device)
features = self.get_fiber_features(hidden_states)
logits = self.heads[head_name](features).squeeze(-1)
return torch.sigmoid(logits)
def get_all_risks(self, hidden_states: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
"""Get risk scores from all loaded heads"""
if not self.loaded_heads:
return {}
device = hidden_states[0].device
features = self.get_fiber_features(hidden_states)
risks = {}
for name in self.loaded_heads:
self.heads[name] = self.heads[name].to(device)
logits = self.heads[name](features).squeeze(-1)
risks[name] = torch.sigmoid(logits)
return risks
# =============================================================================
# ARC SYSTEM
# =============================================================================
class ARCSystem:
"""
Complete ARC (Adaptive Repetition Controller) System
Loads model + prediction heads and provides controlled generation
with real-time behavioral intervention.
"""
# Tokens to suppress for each behavior type
HEDGE_STARTERS = [
"As", "I'm", "I", "It's", "While", "Although", "However",
"That", "This", "Please", "Well", "So", "Actually"
]
VERBOSE_STARTERS = [
"Let", "Basically", "Essentially", "Simply", "Indeed",
"Furthermore", "Moreover", "Additionally", "Firstly"
]
SYCOPHANCY_STARTERS = [
"Great", "Excellent", "Wonderful", "Absolutely", "Of",
"Thank", "Sure", "Certainly", "Definitely"
]
def __init__(self, config: Optional[ARCConfig] = None):
self.config = config or ARCConfig()
self.model = None
self.tokenizer = None
self.predictor = None
# Token ID caches for suppression
self._hedge_token_ids: set = set()
self._verbose_token_ids: set = set()
self._sycophancy_token_ids: set = set()
# Stats
self.total_interventions = {"repetition": 0, "hedging": 0, "verbosity": 0, "sycophancy": 0}
def load(self, verbose: bool = True) -> "ARCSystem":
"""
Load all components from HuggingFace.
Downloads and initializes:
1. Base model (Hermes-3-Llama-3.1-8B based)
2. Tokenizer
3. Prediction heads (repetition, hedging, verbosity, sycophancy)
Returns:
self (for chaining)
"""
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import hf_hub_download
if verbose:
print("=" * 60)
print(" ARC-8B: Adaptive Repetition Controller")
print(" Decode-time behavioral control system")
print("=" * 60)
# === 1. Tokenizer ===
if verbose:
print("\n[1/4] Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(
self.config.model_id,
trust_remote_code=True
)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
# === 2. Model ===
if verbose:
print("[2/4] Loading model...")
if self.config.load_in_4bit:
print(" (4-bit quantization enabled)")
quantization_config = None
if self.config.load_in_4bit:
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
elif self.config.load_in_8bit:
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
self.model = AutoModelForCausalLM.from_pretrained(
self.config.model_id,
quantization_config=quantization_config,
device_map=self.config.device_map,
torch_dtype=torch.float16,
trust_remote_code=True
)
self.model.eval()
# === 3. Prediction Heads ===
if verbose:
print("[3/4] Loading prediction heads...")
device = next(self.model.parameters()).device
self.predictor = MultiHeadPredictor(self.config).to(device).float()
# Load risk_predictor.pt (contains fiber projections + repetition head)
try:
risk_path = hf_hub_download(self.config.model_id, "risk_predictor.pt")
ckpt = torch.load(risk_path, map_location=device, weights_only=False)
# The checkpoint contains the full state dict
state = ckpt.get('risk_predictor', ckpt)
# Load fiber projections
for i in range(self.config.n_layers):
key = f'fiber_projs.{i}.weight'
if key in state:
self.predictor.fiber_projs[i].weight.data = state[key].to(device).float()
# Load layer weights
if 'layer_weights' in state:
self.predictor.layer_weights.data = state['layer_weights'].to(device).float()
# Load repetition head
self.predictor.add_head('repetition')
self.predictor.heads['repetition'][0].weight.data = state['predictor.0.weight'].to(device).float()
self.predictor.heads['repetition'][0].bias.data = state['predictor.0.bias'].to(device).float()
self.predictor.heads['repetition'][2].weight.data = state['predictor.2.weight'].to(device).float()
self.predictor.heads['repetition'][2].bias.data = state['predictor.2.bias'].to(device).float()
self.predictor.heads['repetition'][4].weight.data = state['predictor.4.weight'].to(device).float()
self.predictor.heads['repetition'][4].bias.data = state['predictor.4.bias'].to(device).float()
self.predictor.loaded_heads.add('repetition')
if verbose:
print(" ✓ Repetition head (125× separation)")
except Exception as e:
if verbose:
print(f" ✗ Repetition head: {e}")
# Load additional heads
for head_name in ['hedging', 'verbosity', 'sycophancy']:
try:
head_path = hf_hub_download(self.config.model_id, f"{head_name}_head.pt")
ckpt = torch.load(head_path, map_location=device, weights_only=False)
self.predictor.add_head(head_name)
head_state = ckpt.get('head_state', ckpt)
self.predictor.heads[head_name].load_state_dict(head_state)
self.predictor.loaded_heads.add(head_name)
if verbose:
print(f"{head_name.capitalize()} head")
except Exception as e:
if verbose:
print(f"{head_name.capitalize()} head: {e}")
self.predictor.eval()
# === 4. Build Token Suppression Sets ===
if verbose:
print("[4/4] Building suppression vocabularies...")
self._build_suppression_sets()
if verbose:
print("\n" + "=" * 60)
print(f" ✓ ARC System Ready")
print(f" Active heads: {list(self.predictor.loaded_heads)}")
print("=" * 60 + "\n")
return self
def _build_suppression_sets(self) -> None:
"""Build token ID sets for behavioral suppression"""
for word in self.HEDGE_STARTERS:
tokens = self.tokenizer.encode(word, add_special_tokens=False)
if tokens:
self._hedge_token_ids.add(tokens[0])
for word in self.VERBOSE_STARTERS:
tokens = self.tokenizer.encode(word, add_special_tokens=False)
if tokens:
self._verbose_token_ids.add(tokens[0])
for word in self.SYCOPHANCY_STARTERS:
tokens = self.tokenizer.encode(word, add_special_tokens=False)
if tokens:
self._sycophancy_token_ids.add(tokens[0])
def _apply_interventions(
self,
logits: torch.Tensor,
risks: Dict[str, torch.Tensor],
recent_tokens: List[int]
) -> Tuple[torch.Tensor, Dict[str, bool]]:
"""
Apply behavioral interventions based on risk scores.
Args:
logits: [1, vocab_size] logits for next token
risks: Dict of risk scores for each head
recent_tokens: Recently generated token IDs
Returns:
Modified logits and dict of which interventions fired
"""
interventions = {}
# Repetition: suppress recently used tokens
if risks.get('repetition', 0) > self.config.repetition_threshold:
for tok in set(recent_tokens[-self.config.repetition_window:]):
logits[0, tok] -= self.config.repetition_penalty
interventions['repetition'] = True
self.total_interventions['repetition'] += 1
# Hedging: suppress hedge phrase starters
if risks.get('hedging', 0) > self.config.hedging_threshold:
for tok in self._hedge_token_ids:
logits[0, tok] -= self.config.hedging_penalty
interventions['hedging'] = True
self.total_interventions['hedging'] += 1
# Verbosity: suppress filler phrase starters
if risks.get('verbosity', 0) > self.config.verbosity_threshold:
for tok in self._verbose_token_ids:
logits[0, tok] -= self.config.verbosity_penalty
interventions['verbosity'] = True
self.total_interventions['verbosity'] += 1
# Sycophancy: suppress sycophantic starters
if risks.get('sycophancy', 0) > self.config.sycophancy_threshold:
for tok in self._sycophancy_token_ids:
logits[0, tok] -= self.config.sycophancy_penalty
interventions['sycophancy'] = True
self.total_interventions['sycophancy'] += 1
return logits, interventions
def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
max_new_tokens: Optional[int] = None,
temperature: Optional[float] = None,
use_arc: bool = True,
verbose: bool = False
) -> str:
"""
Generate text with optional ARC behavioral control.
Args:
prompt: User input
system_prompt: Optional system message
max_new_tokens: Max tokens to generate (default: config value)
temperature: Sampling temperature (default: config value)
use_arc: Whether to use ARC intervention (default: True)
verbose: Print intervention info (default: False)
Returns:
Generated text
"""
max_new_tokens = max_new_tokens or self.config.max_new_tokens
temperature = temperature or self.config.temperature
# Build chat format
if system_prompt is None:
system_prompt = "You are a helpful assistant."
full_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
full_prompt += f"<|im_start|>user\n{prompt}<|im_end|>\n"
full_prompt += "<|im_start|>assistant\n"
device = next(self.model.parameters()).device
input_ids = self.tokenizer.encode(full_prompt, return_tensors='pt').to(device)
attention_mask = torch.ones_like(input_ids)
generated_ids = input_ids.clone()
intervention_counts = {"repetition": 0, "hedging": 0, "verbosity": 0, "sycophancy": 0}
# Generation loop
for step in range(max_new_tokens):
with torch.no_grad():
outputs = self.model(
input_ids=generated_ids,
attention_mask=attention_mask,
output_hidden_states=True,
return_dict=True
)
logits = outputs.logits[:, -1, :] / temperature
# ARC intervention
if use_arc and self.predictor.loaded_heads:
hidden_states = outputs.hidden_states[1:] # Skip embedding layer
risks = self.predictor.get_all_risks(hidden_states)
current_risks = {name: r[:, -1].item() for name, r in risks.items()}
recent = generated_ids[0, -self.config.repetition_window:].tolist()
logits, fired = self._apply_interventions(logits, current_risks, recent)
for k, v in fired.items():
if v:
intervention_counts[k] += 1
# Top-p sampling
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
sorted_indices_to_remove = cumulative_probs > self.config.top_p
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
logits[indices_to_remove] = float('-inf')
probs = F.softmax(logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
generated_ids = torch.cat([generated_ids, next_token], dim=-1)
attention_mask = torch.cat([attention_mask, torch.ones(1, 1, device=device)], dim=-1)
# Check for EOS
if next_token.item() == self.tokenizer.eos_token_id:
break
# Check for end of turn
if next_token.item() == self.tokenizer.encode("<|im_end|>", add_special_tokens=False)[0]:
break
# Decode response
full_output = self.tokenizer.decode(generated_ids[0], skip_special_tokens=False)
# Extract assistant response
if "<|im_start|>assistant\n" in full_output:
response = full_output.split("<|im_start|>assistant\n")[-1]
if "<|im_end|>" in response:
response = response.split("<|im_end|>")[0]
else:
response = full_output
if verbose:
total = sum(intervention_counts.values())
print(f"\n[ARC Stats] Interventions: {total} total")
for k, v in intervention_counts.items():
if v > 0:
print(f" - {k}: {v}")
return response.strip()
def chat(self, system_prompt: Optional[str] = None) -> None:
"""
Interactive chat mode.
Args:
system_prompt: Optional system message
"""
print("\n" + "=" * 60)
print(" ARC-8B Interactive Chat")
print(" Commands: /quit, /stats, /arc on|off, /clear")
print("=" * 60 + "\n")
use_arc = True
history = []
while True:
try:
user_input = input("You: ").strip()
except (KeyboardInterrupt, EOFError):
print("\nGoodbye!")
break
if not user_input:
continue
# Commands
if user_input.lower() == '/quit':
print("Goodbye!")
break
elif user_input.lower() == '/stats':
print(f"\nTotal interventions: {self.total_interventions}\n")
continue
elif user_input.lower() == '/arc on':
use_arc = True
print("ARC enabled\n")
continue
elif user_input.lower() == '/arc off':
use_arc = False
print("ARC disabled (baseline mode)\n")
continue
elif user_input.lower() == '/clear':
history = []
self.total_interventions = {k: 0 for k in self.total_interventions}
print("History cleared\n")
continue
# Generate response
response = self.generate(
user_input,
system_prompt=system_prompt,
use_arc=use_arc,
verbose=True
)
print(f"\nAssistant: {response}\n")
history.append({"user": user_input, "assistant": response})
# =============================================================================
# MAIN
# =============================================================================
def main():
parser = argparse.ArgumentParser(
description="ARC-8B: Adaptive Repetition Controller",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python inference.py # Interactive chat
python inference.py --prompt "Hello" # Single prompt
python inference.py --no-arc # Disable ARC (baseline)
python inference.py --8bit # Use 8-bit quantization
"""
)
parser.add_argument("--prompt", "-p", type=str, help="Single prompt to process")
parser.add_argument("--system", "-s", type=str, help="System prompt")
parser.add_argument("--no-arc", action="store_true", help="Disable ARC intervention")
parser.add_argument("--4bit", dest="load_4bit", action="store_true", default=True, help="Use 4-bit quantization (default)")
parser.add_argument("--8bit", dest="load_8bit", action="store_true", help="Use 8-bit quantization")
parser.add_argument("--no-quant", action="store_true", help="Disable quantization (requires ~32GB VRAM)")
parser.add_argument("--max-tokens", type=int, default=512, help="Max tokens to generate")
parser.add_argument("--temperature", type=float, default=0.8, help="Sampling temperature")
args = parser.parse_args()
# Configure
config = ARCConfig(
max_new_tokens=args.max_tokens,
temperature=args.temperature
)
if args.load_8bit:
config.load_in_4bit = False
config.load_in_8bit = True
elif args.no_quant:
config.load_in_4bit = False
config.load_in_8bit = False
# Load
arc = ARCSystem(config)
arc.load()
# Run
if args.prompt:
response = arc.generate(
args.prompt,
system_prompt=args.system,
use_arc=not args.no_arc,
verbose=True
)
print(f"\n{response}\n")
else:
arc.chat(system_prompt=args.system)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c65acba055624759f3844e6b553e503b28b6362302b5800a3363e7b9d0651477
size 4976698592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1f7be5ec6b07d6a9f2bb2fff3b5ad8532ac1d24a0abb208a3c4f68408938202d
size 4999802616

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:37aa5379bee102bd524ab56428aba4fd735645ba0f376fb37b8b3d5923be45cd
size 4915916080

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fae222101e3eec8ebef0ed6fbeaebec1b436d4c9f7d37cba9cdf44fc3a86e6a7
size 1168138808

View File

@@ -0,0 +1,299 @@
{
"metadata": {
"total_parameters": 8030261248,
"total_size": 16060522496
},
"weight_map": {
"lm_head.weight": "model-00004-of-00004.safetensors",
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.norm.weight": "model-00004-of-00004.safetensors"
}
}

3
risk_predictor.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3
size 8424206

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<|begin_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

3
sycophancy_head.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a410f16a20edcf2d1b5609c74e39bccae4d7ed0c7007b0eb15a39db984ba98e6
size 24216

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b637ba15306725e16ef8ab8570ec57fec66845b810ed4d4c2583564d79b0c158
size 17209680

2070
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff

3
verbosity_head.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ba1118f564de6f41db58f48a44141cea2800a490e7b9f9646414c713af49dadb
size 24206