#!/usr/bin/env python3 """Build a concept index and glossary for the VIBECODE-THEORY corpus.""" from __future__ import annotations import json import re from collections import defaultdict from dataclasses import dataclass from pathlib import Path ROOT = Path(__file__).resolve().parents[2] OUT_DIR = Path(__file__).resolve().parent @dataclass class Document: doc_id: str title: str path: Path text: str supersedes: str | None CONCEPTS = [ { "name": "Vibe Coding", "aliases": ["vibe coding"], "introduced_in": "001", "status": "active", "related_concepts": ["Social-Cognitive Framework", "Mental Model Accuracy", "Meta-Skill Argument"], }, { "name": "Social-Cognitive Framework", "aliases": ["social-cognitive framework", "vibe coding as social skill", "social-cognitive processes"], "introduced_in": "004", "status": "active", "related_concepts": ["Vibe Coding", "Mental Model Accuracy", "Adaptive Communication", "Collaboration Management"], }, { "name": "Mental Model Accuracy", "aliases": ["mental model accuracy", "mental model"], "introduced_in": "001", "status": "active", "related_concepts": ["Social-Cognitive Framework", "Adaptive Communication", "Collaboration Management"], }, { "name": "Adaptive Communication", "aliases": ["adaptive communication", "constraint calibration", "register matching"], "introduced_in": "001", "status": "active", "related_concepts": ["Social-Cognitive Framework", "Mental Model Accuracy", "Collaboration Management"], }, { "name": "Collaboration Management", "aliases": ["collaboration management", "task decomposition", "trust calibration", "recovery"], "introduced_in": "001", "status": "active", "related_concepts": ["Social-Cognitive Framework", "Adaptive Communication", "Technical Foundation"], }, { "name": "Technical Foundation", "aliases": ["technical foundation", "technical expertise"], "introduced_in": "001", "status": "active", "related_concepts": ["Vibe Coding", "Collaboration Management", "Meta-Skill Argument"], }, { "name": "Neurodivergence Note", "aliases": ["neurodivergence note", "neurodivergence hypothesis"], "introduced_in": "001", "status": "open question", "related_concepts": ["Social-Cognitive Framework"], }, { "name": "Shelf-Life Problem", "aliases": ["shelf-life problem", "shelf life problem"], "introduced_in": "003", "status": "active", "related_concepts": ["Meta-Skill Argument", "Infrastructure Threshold"], }, { "name": "Meta-Skill Argument", "aliases": ["meta-skill argument", "meta-skill"], "introduced_in": "004", "status": "active", "related_concepts": ["Shelf-Life Problem", "Vibe Coding", "Social-Cognitive Framework"], }, { "name": "Cognitive Surplus", "aliases": ["cognitive surplus", "surplus of cognition", "the cognitive surplus"], "introduced_in": "002", "status": "active", "related_concepts": ["Agricultural Parallel", "Cognition as a Commodity", "Automation Spiral"], }, { "name": "Agricultural Parallel", "aliases": ["agricultural parallel", "agricultural analogy"], "introduced_in": "002", "status": "active", "related_concepts": ["Cognitive Surplus", "Green Revolution", "Feudal Internet", "Dependency Trap"], }, { "name": "Dual Cognition Problem", "aliases": ["dual cognition problem", "the dual cognition problem"], "introduced_in": "002", "status": "active", "related_concepts": ["Cognitive Preference Shift", "Cognitive Atrophy", "Cognitive Surplus"], }, { "name": "Cognitive Atrophy", "aliases": ["cognitive atrophy", "capability loss"], "introduced_in": "002", "status": "open question", "related_concepts": ["Dual Cognition Problem", "Cognitive Preference Shift", "Biological Ratchet"], }, { "name": "Green Revolution", "aliases": ["green revolution"], "introduced_in": "002", "status": "active", "related_concepts": ["Agricultural Parallel", "Feudal Internet"], }, { "name": "Feudal Internet", "aliases": ["feudal internet"], "introduced_in": "002", "status": "active", "related_concepts": ["Agricultural Parallel", "Dependency Trap", "Cognition as a Commodity"], }, { "name": "Dependency Trap", "aliases": ["dependency trap", "future 3: the dependency trap"], "introduced_in": "002", "status": "active", "related_concepts": ["Feudal Internet", "Cognitive Atrophy", "Y2K Parallel"], }, { "name": "Automation Spiral", "aliases": ["automation spiral"], "introduced_in": "003", "status": "active", "related_concepts": ["Cognitive Surplus", "Feedback Loop", "Master-Apprentice Parallel"], }, { "name": "Cognitive Preference Shift", "aliases": ["cognitive preference shift", "preference shift"], "introduced_in": "003", "status": "active", "related_concepts": ["Dual Cognition Problem", "Cognitive Atrophy", "Biological Ratchet"], }, { "name": "Cognition as a Commodity", "aliases": ["cognition as a commodity", "cognition-as-commodity framing"], "introduced_in": "005", "status": "active", "related_concepts": ["Cognitive Surplus", "Feudal Internet", "Information/Cognition Resource Hierarchy"], }, { "name": "Y2K Parallel", "aliases": ["y2k parallel", "ai y2k moment", "y2k moment"], "introduced_in": "005", "status": "active", "related_concepts": ["Dependency Trap", "Infrastructure Threshold", "Cognitive Surplus"], }, { "name": "Information/Cognition Resource Hierarchy", "aliases": ["information and cognition as resources", "resource hierarchy"], "introduced_in": "005", "status": "active", "related_concepts": ["Cognition as a Commodity", "Knowledge Unification"], }, { "name": "Feedback Loop", "aliases": ["feedback loop"], "introduced_in": "006", "status": "active", "related_concepts": ["Automation Spiral", "Master-Apprentice Parallel", "Niche Construction"], }, { "name": "Master-Apprentice Parallel", "aliases": ["master-apprentice parallel", "master-apprentice relationship"], "introduced_in": "006", "status": "active", "related_concepts": ["Feedback Loop", "Automation Spiral", "The Golem"], }, { "name": "Niche Construction", "aliases": ["niche construction"], "introduced_in": "006", "status": "active", "related_concepts": ["Feedback Loop", "Recursion Observation"], }, { "name": "Theological Thread", "aliases": ["theological thread"], "introduced_in": "006", "status": "active", "related_concepts": ["Prometheus", "Knowledge Unification", "Recursion Observation"], }, { "name": "Recursion Observation", "aliases": ["recursion observation", "cosmological → biological → linguistic → computational"], "introduced_in": "006", "status": "open question", "related_concepts": ["Theological Thread", "Niche Construction", "Knowledge Unification"], }, { "name": "Infrastructure Threshold", "aliases": ["infrastructure threshold"], "introduced_in": "007", "status": "active", "related_concepts": ["Biological Ratchet", "Premature Dependencies", "Y2K Parallel"], }, { "name": "Premature Dependencies", "aliases": ["premature dependencies", "dependency waiting for its enabling technology"], "introduced_in": "007", "status": "active", "related_concepts": ["Infrastructure Threshold", "Biological Ratchet"], }, { "name": "Biological Ratchet", "aliases": ["biological ratchet", "dependency ratchet", "ratchet thesis"], "introduced_in": "007", "status": "active", "related_concepts": ["Infrastructure Threshold", "Cognitive Preference Shift", "Knowledge Unification"], }, { "name": "Dependency Chain", "aliases": ["dependency chain"], "introduced_in": "007", "status": "active", "related_concepts": ["Biological Ratchet", "Knowledge Unification", "Cheating Frame"], }, { "name": "Knowledge Unification", "aliases": ["knowledge unification", "unification thesis", "unification of human knowledge", "the dependency chain as knowledge unification"], "introduced_in": "008", "status": "active", "related_concepts": ["Dependency Chain", "Singularity as Compilation", "Integration Layer"], }, { "name": "Singularity as Compilation", "aliases": ["singularity as compilation", "compilation not transcendence", "compilation, not transcendence"], "introduced_in": "008", "status": "active", "related_concepts": ["Knowledge Unification", "Integration Layer", "Cheating Frame"], }, { "name": "Integration Layer", "aliases": ["integration layer"], "introduced_in": "008", "status": "active", "related_concepts": ["Knowledge Unification", "Singularity as Compilation", "Existential Purpose of the Chain"], }, { "name": "Ship of Theseus Problem", "aliases": ["ship of theseus problem", "identity problem", "species identity problem", "the identity problem"], "introduced_in": "008", "status": "active", "related_concepts": ["Continuity Argument", "Identity Argument", "Pragmatic Argument"], }, { "name": "Continuity Argument", "aliases": ["continuity argument", "the continuity argument"], "introduced_in": "008", "status": "active", "related_concepts": ["Ship of Theseus Problem", "Identity Argument", "Pragmatic Argument"], }, { "name": "Identity Argument", "aliases": ["identity argument", "essentialist", "the identity argument"], "introduced_in": "008", "status": "active", "related_concepts": ["Ship of Theseus Problem", "Continuity Argument", "Pragmatic Argument"], }, { "name": "Pragmatic Argument", "aliases": ["pragmatic argument", "the pragmatic argument"], "introduced_in": "008", "status": "active", "related_concepts": ["Ship of Theseus Problem", "Continuity Argument", "Identity Argument"], }, { "name": "Cheating Frame", "aliases": ["did we cheat", "cheating frame"], "introduced_in": "008", "status": "active", "related_concepts": ["Dependency Chain", "Singularity as Compilation", "Existential Purpose of the Chain"], }, { "name": "Existential Purpose of the Chain", "aliases": ["existential purpose of the dependency chain", "existential purpose", "the existential purpose of the dependency chain"], "introduced_in": "008", "status": "active", "related_concepts": ["Integration Layer", "Cheating Frame", "Knowledge Unification"], }, { "name": "Eve's Apple", "aliases": ["eve's apple"], "introduced_in": "eves-apple", "status": "reference allegory", "related_concepts": ["Cognitive Preference Shift", "Dependency Chain"], }, { "name": "Pandora's Box", "aliases": ["pandora's box"], "introduced_in": "pandoras-box", "status": "reference allegory", "related_concepts": ["Dependency Chain", "Automation Spiral"], }, { "name": "Prometheus", "aliases": ["prometheus"], "introduced_in": "prometheus", "status": "reference allegory", "related_concepts": ["Theological Thread", "Dependency Chain", "Cheating Frame"], }, { "name": "Sorcerer's Apprentice", "aliases": ["sorcerer's apprentice"], "introduced_in": "sorcerers-apprentice", "status": "reference allegory", "related_concepts": ["Automation Spiral", "Feedback Loop", "Dependency Chain"], }, { "name": "The Golem", "aliases": ["the golem", "golem"], "introduced_in": "the-golem", "status": "reference allegory", "related_concepts": ["Master-Apprentice Parallel", "Dependency Chain"], }, { "name": "Faustian Bargain", "aliases": ["faustian bargain", "faust"], "introduced_in": "faust", "status": "reference allegory", "related_concepts": ["Feedback Loop", "Cognitive Preference Shift"], }, { "name": "Icarus", "aliases": ["icarus"], "introduced_in": "icarus", "status": "reference allegory", "related_concepts": ["Shelf-Life Problem", "Infrastructure Threshold"], }, { "name": "Tower of Babel", "aliases": ["tower of babel", "babel"], "introduced_in": "tower-of-babel", "status": "reference allegory", "related_concepts": ["Dependency Chain", "Knowledge Unification"], }, ] def clean_text(text: str) -> str: text = text.replace("\r\n", "\n") return text def load_documents() -> dict[str, Document]: docs: dict[str, Document] = {} for path in sorted(ROOT.glob("00*.md")): text = clean_text(path.read_text(encoding="utf-8")) title_match = re.search(r"^#\s+Paper\s+(\d{3}):\s*(.+)$", text, re.MULTILINE) supersedes_match = re.search(r"^\*\*Supersedes:\*\*\s*Paper\s+(\d{3})", text, re.MULTILINE) if not title_match: continue doc_id = title_match.group(1) docs[doc_id] = Document( doc_id=doc_id, title=title_match.group(2).strip(), path=path, text=text, supersedes=supersedes_match.group(1) if supersedes_match else None, ) for path in sorted((ROOT / "allegorical").glob("*.md")): text = clean_text(path.read_text(encoding="utf-8")) title_match = re.search(r"^#\s+(.+)$", text, re.MULTILINE) doc_id = path.stem docs[doc_id] = Document( doc_id=doc_id, title=title_match.group(1).strip() if title_match else path.stem, path=path, text=text, supersedes=None, ) return docs def paragraphs(text: str) -> list[str]: return [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()] def sentences(text: str) -> list[str]: normalized = re.sub(r"\s+", " ", text.strip()) return [s.strip() for s in re.split(r"(?<=[.!?])\s+", normalized) if s.strip()] def alias_present(text: str, alias: str) -> bool: pattern = r"\b" + re.escape(alias.lower()) + r"\b" return re.search(pattern, text.lower()) is not None def extract_section(text: str, heading: str) -> str: pattern = rf"^##+\s+{re.escape(heading)}\s*$" match = re.search(pattern, text, re.MULTILINE | re.IGNORECASE) if not match: return "" start = match.end() next_heading = re.search(r"^##+\s+", text[start:], re.MULTILINE) end = start + next_heading.start() if next_heading else len(text) return text[start:end].strip() def first_matching_sentence(doc: Document, aliases: list[str]) -> str: for alias in aliases: section = extract_section(doc.text, alias) if section: for paragraph in paragraphs(section): if paragraph.startswith("#"): continue for sentence in sentences(paragraph): if len(sentence) >= 40: return sentence for paragraph in paragraphs(doc.text): if paragraph.startswith("#") or paragraph.startswith("**Authors:**") or paragraph.startswith("**Date:**"): continue for sentence in sentences(paragraph): if any(alias_present(sentence, alias) for alias in aliases) and len(sentence) >= 40: return sentence return "Definition sentence not found in source text." def find_mentions(docs: dict[str, Document], aliases: list[str]) -> list[str]: refs: list[str] = [] for doc_id, doc in docs.items(): if any(alias_present(doc.text, alias) for alias in aliases): refs.append(doc_id) return refs def find_revisions(docs: dict[str, Document], concept: dict, mentions: list[str]) -> list[str]: introduced_in = concept["introduced_in"] revisions: list[str] = [] for doc_id in mentions: doc = docs[doc_id] if doc.supersedes == introduced_in: revisions.append(doc_id) return sorted(revisions) def find_challenges(docs: dict[str, Document], concept: dict, mentions: list[str]) -> list[str]: aliases = [concept["name"]] + concept["aliases"] challenged: list[str] = [] for doc_id in mentions: if doc_id == concept["introduced_in"]: continue doc = docs[doc_id] section = extract_section(doc.text, "Relationship to Prior Papers") open_q = extract_section(doc.text, "Open Questions") + "\n" + extract_section(doc.text, "Open Questions for Paper 007") + "\n" + extract_section(doc.text, "Open Questions for Paper 009") corpus = f"{section}\n{open_q}\n{doc.text[:3000]}" if any(alias_present(corpus, alias) for alias in aliases) and re.search( r"challenge|critic|rebuttal|unfalsif|weak|bounded|downgrade|unknown", corpus, re.IGNORECASE, ): challenged.append(doc_id) return sorted(set(challenged)) def mermaid_id(name: str) -> str: return "c_" + re.sub(r"[^a-z0-9]+", "_", name.lower()).strip("_") def build_index() -> dict[str, list[dict]]: docs = load_documents() items: list[dict] = [] for concept in CONCEPTS: aliases = [concept["name"]] + concept["aliases"] intro_doc = docs[concept["introduced_in"]] mentions = find_mentions(docs, aliases) revised_in = find_revisions(docs, concept, mentions) challenged_in = find_challenges(docs, concept, mentions) referenced_in = [doc_id for doc_id in mentions if doc_id != concept["introduced_in"] and doc_id not in revised_in] definition = first_matching_sentence(intro_doc, aliases) items.append( { "name": concept["name"], "aliases": sorted(set(concept["aliases"])), "introduced_in": concept["introduced_in"], "definition": definition, "revised_in": revised_in, "challenged_in": challenged_in, "referenced_in": referenced_in, "status": concept["status"], "related_concepts": concept["related_concepts"], } ) return {"concepts": items} def write_glossary(index: dict[str, list[dict]]) -> None: lines = ["# VIBECODE-THEORY Glossary", ""] for item in sorted(index["concepts"], key=lambda x: x["name"].lower()): lines.extend( [ f"## {item['name']}", f"Origin: {item['introduced_in']}", f"Status: {item['status']}", f"Aliases: {', '.join(item['aliases']) if item['aliases'] else 'None'}", item["definition"], f"Revised in: {', '.join(item['revised_in']) if item['revised_in'] else 'None'}", f"Challenged in: {', '.join(item['challenged_in']) if item['challenged_in'] else 'None'}", f"Referenced in: {', '.join(item['referenced_in']) if item['referenced_in'] else 'None'}", f"Related concepts: {', '.join(item['related_concepts']) if item['related_concepts'] else 'None'}", "", ] ) (OUT_DIR / "glossary.md").write_text("\n".join(lines), encoding="utf-8") def write_mermaid(index: dict[str, list[dict]]) -> None: lines = ["graph TD"] for item in index["concepts"]: lines.append(f' {mermaid_id(item["name"])}["{item["name"]}"]') seen: set[tuple[str, str]] = set() for item in index["concepts"]: for related in item["related_concepts"]: edge = tuple(sorted((item["name"], related))) if edge in seen: continue seen.add(edge) lines.append( f" {mermaid_id(item['name'])} -->|relates to| {mermaid_id(related)}" ) (OUT_DIR / "concept_map.mermaid").write_text("\n".join(lines) + "\n", encoding="utf-8") def main() -> None: index = build_index() (OUT_DIR / "index.json").write_text(json.dumps(index, indent=2) + "\n", encoding="utf-8") write_glossary(index) write_mermaid(index) print(f"Indexed {len(index['concepts'])} concepts.") print(f"Wrote {OUT_DIR / 'index.json'}") print(f"Wrote {OUT_DIR / 'glossary.md'}") print(f"Wrote {OUT_DIR / 'concept_map.mermaid'}") if __name__ == "__main__": main()