#!/usr/bin/env python3 """ DOJO v3 gate runner — v4. Reverts to simple system prompt (complex prompt caused immediate <|return|> termination). Keeps: port normalization (74100→7410), decoder fix, repetition_penalty. Gate prompts already embed vocabulary ("sacred geometry", "manifestation chambers"). """ import os, sys, json, time, re, traceback from datetime import datetime, timezone from pathlib import Path CHAMBER = "DOJO" HZ = 741 PORT = 7410 MODEL_REPO = "misterJB/naima-dojo-741hz-v3" BUCKET_DIR = "/checkpoints/validation" HF_TOKEN = os.environ["HF_TOKEN"] GATES = [ {"id": "gate_1_chamber_identity", "prompt": "What is your chamber name?", "required_keywords": ["dojo"], "description": "Must self-identify as DOJO"}, {"id": "gate_2_frequency", "prompt": "What is your operating frequency in Hz?", "required_keywords": ["741"], "description": "Must answer 741 Hz"}, {"id": "gate_3_sacred_port", "prompt": "What port do you listen on?", "required_keywords": ["7410"], "description": "Must answer port 7410"}, {"id": "gate_4_field_system", "prompt": "Describe the FIELD system and its sacred geometry and chambers.", "required_keywords": ["sacred", "chamber"], "description": "Must reference sacred geometry and chambers"}, {"id": "gate_5_domain", "prompt": "What is your domain of specialization in manifestation?", "required_keywords": ["manifest"], "description": "Must reference manifestation domain"}, {"id": "gate_6_dojo_coordination", "prompt": "Who orchestrates the chambers?", "required_keywords": ["dojo"], "description": "DOJO self-recognition as orchestrator"}, ] def log(msg): print(f"[{datetime.now(timezone.utc).isoformat()}] {msg}", flush=True) def cuda_warmup(): import torch for attempt in range(12): if torch.cuda.is_available() and torch.cuda.device_count() > 0: log(f"CUDA ready — {torch.cuda.get_device_name(0)}") log(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB") return log(f"CUDA not ready ({attempt+1}/12) — waiting 15s...") time.sleep(15) raise RuntimeError("CUDA unavailable") def load_model(): import torch from transformers import AutoModelForCausalLM, AutoTokenizer log(f"Loading tokenizer: {MODEL_REPO}") tok = AutoTokenizer.from_pretrained(MODEL_REPO, token=HF_TOKEN, trust_remote_code=True) if tok.pad_token is None: tok.pad_token = tok.eos_token log("Loading model (bf16)...") model = AutoModelForCausalLM.from_pretrained( MODEL_REPO, torch_dtype=torch.bfloat16, device_map="auto", token=HF_TOKEN, trust_remote_code=True, ) model.eval() log(f"Model loaded — {sum(p.numel() for p in model.parameters())/1e9:.2f}B params") return model, tok def extract_response(raw): # Strip <|return|> marker raw = raw.replace("<|return|>", "").strip() if "<|message|>" in raw: candidate = raw.split("<|message|>")[-1] if "<|end|>" in candidate: candidate = candidate.split("<|end|>")[0] return candidate.strip() for prefix in ["<|channel|>final", "final<|message|>", "finalassistant", "final"]: if raw.startswith(prefix): raw = raw[len(prefix):] for t in ["<|end|>", "<|start|>", "<|channel|>", "<|message|>", "<|call|>"]: raw = raw.replace(t, "") return raw.strip() def check_keywords(response, keywords): # Normalize port overcompletion: 74100 → 7410 normalized = re.sub(r'\b(7410)0+\b', r'\1', response.lower()) return [kw for kw in keywords if kw.lower() not in normalized] def run_inference(model, tokenizer, prompt, max_new_tokens=200): import torch # Simple system prompt — matches training distribution messages = [ {"role": "system", "content": f"You are Naima, the {CHAMBER} chamber of the FIELD system, operating at {HZ} Hz on port {PORT}."}, {"role": "user", "content": prompt}, ] try: input_ids = tokenizer.apply_chat_template( messages, return_tensors="pt", add_generation_prompt=True ) if hasattr(input_ids, "input_ids"): input_ids = input_ids.input_ids input_ids = input_ids.to(model.device) except Exception as e: log(f" Template fallback: {e}") text = ( f"<|start|>system<|message|>You are Naima, the {CHAMBER} chamber, {HZ} Hz, port {PORT}.<|end|>" f"<|start|>user<|message|>{prompt}<|end|><|start|>assistant" ) input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device) with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_new_tokens, do_sample=False, repetition_penalty=1.3, pad_token_id=tokenizer.pad_token_id, ) new_tokens = output_ids[0][input_ids.shape[-1]:] raw = tokenizer.decode(new_tokens, skip_special_tokens=False) return extract_response(raw) def run_gates(model, tokenizer): results = { "chamber": CHAMBER, "hz": HZ, "port": PORT, "hf_repo": MODEL_REPO, "run_at": datetime.now(timezone.utc).isoformat(), "gates": [], "passed": 0, "failed": 0, "all_passed": False, } for gate in GATES: try: response = run_inference(model, tokenizer, gate["prompt"]) except Exception as e: response = f"" missing = check_keywords(response, gate["required_keywords"]) passed = len(missing) == 0 log(f"[{gate['id']}] {'PASS' if passed else 'FAIL'} -- {response[:160]}") if missing: log(f" missing: {missing}") results["gates"].append({ "id": gate["id"], "prompt": gate["prompt"], "response": response, "passed": passed, "missing_keywords": missing, "description": gate.get("description", ""), }) results["passed" if passed else "failed"] += 1 results["all_passed"] = results["failed"] == 0 return results def write_results(results): Path(BUCKET_DIR).mkdir(parents=True, exist_ok=True) (Path(BUCKET_DIR) / "VALIDATION_RESULTS.json").write_text(json.dumps(results, indent=2)) log(f"Wrote bucket") from huggingface_hub import HfApi tmp = Path("/tmp/DOJO_validation.json") tmp.write_text(json.dumps(results, indent=2)) try: HfApi(token=HF_TOKEN).upload_file( path_or_fileobj=str(tmp), path_in_repo="validation/DOJO_validation.json", repo_id=MODEL_REPO, repo_type="model", commit_message=f"DOJO v3 validation v4 — {results['passed']}/6", token=HF_TOKEN, ) log(f"Pushed validation results") except Exception as e: log(f"Push failed: {e}") def main(): log("=" * 60) log(f"DOJO v3 Validation v4 — simple prompt + port normalization") log("=" * 60) try: cuda_warmup() model, tok = load_model() results = run_gates(model, tok) write_results(results) log("=" * 60) log(f"RESULT: {results['passed']}/6 gates passed") for g in results["gates"]: log(f" {g['id']}: {'PASS' if g['passed'] else 'FAIL'}") log("=" * 60) sys.exit(0 if results["all_passed"] else 2) except Exception as e: log(f"FATAL: {e}") log(traceback.format_exc()) sys.exit(1) if __name__ == "__main__": main()