Files
Mortdecai f654b30de9 docs: integration tools — cross-reference graph, concept index, research digest
Codex-built tooling: cross-reference graph, concept index with build script,
and research integrator that extracted 142 scholars, 175 bibliography items,
4 contradiction topics, and coverage maps for Paper 009 planning.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 08:31:20 -04:00

559 lines
21 KiB
Python

#!/usr/bin/env python3
"""Build a concept index and glossary for the VIBECODE-THEORY corpus."""
from __future__ import annotations
import json
import re
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
OUT_DIR = Path(__file__).resolve().parent
@dataclass
class Document:
doc_id: str
title: str
path: Path
text: str
supersedes: str | None
CONCEPTS = [
{
"name": "Vibe Coding",
"aliases": ["vibe coding"],
"introduced_in": "001",
"status": "active",
"related_concepts": ["Social-Cognitive Framework", "Mental Model Accuracy", "Meta-Skill Argument"],
},
{
"name": "Social-Cognitive Framework",
"aliases": ["social-cognitive framework", "vibe coding as social skill", "social-cognitive processes"],
"introduced_in": "004",
"status": "active",
"related_concepts": ["Vibe Coding", "Mental Model Accuracy", "Adaptive Communication", "Collaboration Management"],
},
{
"name": "Mental Model Accuracy",
"aliases": ["mental model accuracy", "mental model"],
"introduced_in": "001",
"status": "active",
"related_concepts": ["Social-Cognitive Framework", "Adaptive Communication", "Collaboration Management"],
},
{
"name": "Adaptive Communication",
"aliases": ["adaptive communication", "constraint calibration", "register matching"],
"introduced_in": "001",
"status": "active",
"related_concepts": ["Social-Cognitive Framework", "Mental Model Accuracy", "Collaboration Management"],
},
{
"name": "Collaboration Management",
"aliases": ["collaboration management", "task decomposition", "trust calibration", "recovery"],
"introduced_in": "001",
"status": "active",
"related_concepts": ["Social-Cognitive Framework", "Adaptive Communication", "Technical Foundation"],
},
{
"name": "Technical Foundation",
"aliases": ["technical foundation", "technical expertise"],
"introduced_in": "001",
"status": "active",
"related_concepts": ["Vibe Coding", "Collaboration Management", "Meta-Skill Argument"],
},
{
"name": "Neurodivergence Note",
"aliases": ["neurodivergence note", "neurodivergence hypothesis"],
"introduced_in": "001",
"status": "open question",
"related_concepts": ["Social-Cognitive Framework"],
},
{
"name": "Shelf-Life Problem",
"aliases": ["shelf-life problem", "shelf life problem"],
"introduced_in": "003",
"status": "active",
"related_concepts": ["Meta-Skill Argument", "Infrastructure Threshold"],
},
{
"name": "Meta-Skill Argument",
"aliases": ["meta-skill argument", "meta-skill"],
"introduced_in": "004",
"status": "active",
"related_concepts": ["Shelf-Life Problem", "Vibe Coding", "Social-Cognitive Framework"],
},
{
"name": "Cognitive Surplus",
"aliases": ["cognitive surplus", "surplus of cognition", "the cognitive surplus"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Agricultural Parallel", "Cognition as a Commodity", "Automation Spiral"],
},
{
"name": "Agricultural Parallel",
"aliases": ["agricultural parallel", "agricultural analogy"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Cognitive Surplus", "Green Revolution", "Feudal Internet", "Dependency Trap"],
},
{
"name": "Dual Cognition Problem",
"aliases": ["dual cognition problem", "the dual cognition problem"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Cognitive Preference Shift", "Cognitive Atrophy", "Cognitive Surplus"],
},
{
"name": "Cognitive Atrophy",
"aliases": ["cognitive atrophy", "capability loss"],
"introduced_in": "002",
"status": "open question",
"related_concepts": ["Dual Cognition Problem", "Cognitive Preference Shift", "Biological Ratchet"],
},
{
"name": "Green Revolution",
"aliases": ["green revolution"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Agricultural Parallel", "Feudal Internet"],
},
{
"name": "Feudal Internet",
"aliases": ["feudal internet"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Agricultural Parallel", "Dependency Trap", "Cognition as a Commodity"],
},
{
"name": "Dependency Trap",
"aliases": ["dependency trap", "future 3: the dependency trap"],
"introduced_in": "002",
"status": "active",
"related_concepts": ["Feudal Internet", "Cognitive Atrophy", "Y2K Parallel"],
},
{
"name": "Automation Spiral",
"aliases": ["automation spiral"],
"introduced_in": "003",
"status": "active",
"related_concepts": ["Cognitive Surplus", "Feedback Loop", "Master-Apprentice Parallel"],
},
{
"name": "Cognitive Preference Shift",
"aliases": ["cognitive preference shift", "preference shift"],
"introduced_in": "003",
"status": "active",
"related_concepts": ["Dual Cognition Problem", "Cognitive Atrophy", "Biological Ratchet"],
},
{
"name": "Cognition as a Commodity",
"aliases": ["cognition as a commodity", "cognition-as-commodity framing"],
"introduced_in": "005",
"status": "active",
"related_concepts": ["Cognitive Surplus", "Feudal Internet", "Information/Cognition Resource Hierarchy"],
},
{
"name": "Y2K Parallel",
"aliases": ["y2k parallel", "ai y2k moment", "y2k moment"],
"introduced_in": "005",
"status": "active",
"related_concepts": ["Dependency Trap", "Infrastructure Threshold", "Cognitive Surplus"],
},
{
"name": "Information/Cognition Resource Hierarchy",
"aliases": ["information and cognition as resources", "resource hierarchy"],
"introduced_in": "005",
"status": "active",
"related_concepts": ["Cognition as a Commodity", "Knowledge Unification"],
},
{
"name": "Feedback Loop",
"aliases": ["feedback loop"],
"introduced_in": "006",
"status": "active",
"related_concepts": ["Automation Spiral", "Master-Apprentice Parallel", "Niche Construction"],
},
{
"name": "Master-Apprentice Parallel",
"aliases": ["master-apprentice parallel", "master-apprentice relationship"],
"introduced_in": "006",
"status": "active",
"related_concepts": ["Feedback Loop", "Automation Spiral", "The Golem"],
},
{
"name": "Niche Construction",
"aliases": ["niche construction"],
"introduced_in": "006",
"status": "active",
"related_concepts": ["Feedback Loop", "Recursion Observation"],
},
{
"name": "Theological Thread",
"aliases": ["theological thread"],
"introduced_in": "006",
"status": "active",
"related_concepts": ["Prometheus", "Knowledge Unification", "Recursion Observation"],
},
{
"name": "Recursion Observation",
"aliases": ["recursion observation", "cosmological → biological → linguistic → computational"],
"introduced_in": "006",
"status": "open question",
"related_concepts": ["Theological Thread", "Niche Construction", "Knowledge Unification"],
},
{
"name": "Infrastructure Threshold",
"aliases": ["infrastructure threshold"],
"introduced_in": "007",
"status": "active",
"related_concepts": ["Biological Ratchet", "Premature Dependencies", "Y2K Parallel"],
},
{
"name": "Premature Dependencies",
"aliases": ["premature dependencies", "dependency waiting for its enabling technology"],
"introduced_in": "007",
"status": "active",
"related_concepts": ["Infrastructure Threshold", "Biological Ratchet"],
},
{
"name": "Biological Ratchet",
"aliases": ["biological ratchet", "dependency ratchet", "ratchet thesis"],
"introduced_in": "007",
"status": "active",
"related_concepts": ["Infrastructure Threshold", "Cognitive Preference Shift", "Knowledge Unification"],
},
{
"name": "Dependency Chain",
"aliases": ["dependency chain"],
"introduced_in": "007",
"status": "active",
"related_concepts": ["Biological Ratchet", "Knowledge Unification", "Cheating Frame"],
},
{
"name": "Knowledge Unification",
"aliases": ["knowledge unification", "unification thesis", "unification of human knowledge", "the dependency chain as knowledge unification"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Dependency Chain", "Singularity as Compilation", "Integration Layer"],
},
{
"name": "Singularity as Compilation",
"aliases": ["singularity as compilation", "compilation not transcendence", "compilation, not transcendence"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Knowledge Unification", "Integration Layer", "Cheating Frame"],
},
{
"name": "Integration Layer",
"aliases": ["integration layer"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Knowledge Unification", "Singularity as Compilation", "Existential Purpose of the Chain"],
},
{
"name": "Ship of Theseus Problem",
"aliases": ["ship of theseus problem", "identity problem", "species identity problem", "the identity problem"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Continuity Argument", "Identity Argument", "Pragmatic Argument"],
},
{
"name": "Continuity Argument",
"aliases": ["continuity argument", "the continuity argument"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Ship of Theseus Problem", "Identity Argument", "Pragmatic Argument"],
},
{
"name": "Identity Argument",
"aliases": ["identity argument", "essentialist", "the identity argument"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Ship of Theseus Problem", "Continuity Argument", "Pragmatic Argument"],
},
{
"name": "Pragmatic Argument",
"aliases": ["pragmatic argument", "the pragmatic argument"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Ship of Theseus Problem", "Continuity Argument", "Identity Argument"],
},
{
"name": "Cheating Frame",
"aliases": ["did we cheat", "cheating frame"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Dependency Chain", "Singularity as Compilation", "Existential Purpose of the Chain"],
},
{
"name": "Existential Purpose of the Chain",
"aliases": ["existential purpose of the dependency chain", "existential purpose", "the existential purpose of the dependency chain"],
"introduced_in": "008",
"status": "active",
"related_concepts": ["Integration Layer", "Cheating Frame", "Knowledge Unification"],
},
{
"name": "Eve's Apple",
"aliases": ["eve's apple"],
"introduced_in": "eves-apple",
"status": "reference allegory",
"related_concepts": ["Cognitive Preference Shift", "Dependency Chain"],
},
{
"name": "Pandora's Box",
"aliases": ["pandora's box"],
"introduced_in": "pandoras-box",
"status": "reference allegory",
"related_concepts": ["Dependency Chain", "Automation Spiral"],
},
{
"name": "Prometheus",
"aliases": ["prometheus"],
"introduced_in": "prometheus",
"status": "reference allegory",
"related_concepts": ["Theological Thread", "Dependency Chain", "Cheating Frame"],
},
{
"name": "Sorcerer's Apprentice",
"aliases": ["sorcerer's apprentice"],
"introduced_in": "sorcerers-apprentice",
"status": "reference allegory",
"related_concepts": ["Automation Spiral", "Feedback Loop", "Dependency Chain"],
},
{
"name": "The Golem",
"aliases": ["the golem", "golem"],
"introduced_in": "the-golem",
"status": "reference allegory",
"related_concepts": ["Master-Apprentice Parallel", "Dependency Chain"],
},
{
"name": "Faustian Bargain",
"aliases": ["faustian bargain", "faust"],
"introduced_in": "faust",
"status": "reference allegory",
"related_concepts": ["Feedback Loop", "Cognitive Preference Shift"],
},
{
"name": "Icarus",
"aliases": ["icarus"],
"introduced_in": "icarus",
"status": "reference allegory",
"related_concepts": ["Shelf-Life Problem", "Infrastructure Threshold"],
},
{
"name": "Tower of Babel",
"aliases": ["tower of babel", "babel"],
"introduced_in": "tower-of-babel",
"status": "reference allegory",
"related_concepts": ["Dependency Chain", "Knowledge Unification"],
},
]
def clean_text(text: str) -> str:
text = text.replace("\r\n", "\n")
return text
def load_documents() -> dict[str, Document]:
docs: dict[str, Document] = {}
for path in sorted(ROOT.glob("00*.md")):
text = clean_text(path.read_text(encoding="utf-8"))
title_match = re.search(r"^#\s+Paper\s+(\d{3}):\s*(.+)$", text, re.MULTILINE)
supersedes_match = re.search(r"^\*\*Supersedes:\*\*\s*Paper\s+(\d{3})", text, re.MULTILINE)
if not title_match:
continue
doc_id = title_match.group(1)
docs[doc_id] = Document(
doc_id=doc_id,
title=title_match.group(2).strip(),
path=path,
text=text,
supersedes=supersedes_match.group(1) if supersedes_match else None,
)
for path in sorted((ROOT / "allegorical").glob("*.md")):
text = clean_text(path.read_text(encoding="utf-8"))
title_match = re.search(r"^#\s+(.+)$", text, re.MULTILINE)
doc_id = path.stem
docs[doc_id] = Document(
doc_id=doc_id,
title=title_match.group(1).strip() if title_match else path.stem,
path=path,
text=text,
supersedes=None,
)
return docs
def paragraphs(text: str) -> list[str]:
return [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
def sentences(text: str) -> list[str]:
normalized = re.sub(r"\s+", " ", text.strip())
return [s.strip() for s in re.split(r"(?<=[.!?])\s+", normalized) if s.strip()]
def alias_present(text: str, alias: str) -> bool:
pattern = r"\b" + re.escape(alias.lower()) + r"\b"
return re.search(pattern, text.lower()) is not None
def extract_section(text: str, heading: str) -> str:
pattern = rf"^##+\s+{re.escape(heading)}\s*$"
match = re.search(pattern, text, re.MULTILINE | re.IGNORECASE)
if not match:
return ""
start = match.end()
next_heading = re.search(r"^##+\s+", text[start:], re.MULTILINE)
end = start + next_heading.start() if next_heading else len(text)
return text[start:end].strip()
def first_matching_sentence(doc: Document, aliases: list[str]) -> str:
for alias in aliases:
section = extract_section(doc.text, alias)
if section:
for paragraph in paragraphs(section):
if paragraph.startswith("#"):
continue
for sentence in sentences(paragraph):
if len(sentence) >= 40:
return sentence
for paragraph in paragraphs(doc.text):
if paragraph.startswith("#") or paragraph.startswith("**Authors:**") or paragraph.startswith("**Date:**"):
continue
for sentence in sentences(paragraph):
if any(alias_present(sentence, alias) for alias in aliases) and len(sentence) >= 40:
return sentence
return "Definition sentence not found in source text."
def find_mentions(docs: dict[str, Document], aliases: list[str]) -> list[str]:
refs: list[str] = []
for doc_id, doc in docs.items():
if any(alias_present(doc.text, alias) for alias in aliases):
refs.append(doc_id)
return refs
def find_revisions(docs: dict[str, Document], concept: dict, mentions: list[str]) -> list[str]:
introduced_in = concept["introduced_in"]
revisions: list[str] = []
for doc_id in mentions:
doc = docs[doc_id]
if doc.supersedes == introduced_in:
revisions.append(doc_id)
return sorted(revisions)
def find_challenges(docs: dict[str, Document], concept: dict, mentions: list[str]) -> list[str]:
aliases = [concept["name"]] + concept["aliases"]
challenged: list[str] = []
for doc_id in mentions:
if doc_id == concept["introduced_in"]:
continue
doc = docs[doc_id]
section = extract_section(doc.text, "Relationship to Prior Papers")
open_q = extract_section(doc.text, "Open Questions") + "\n" + extract_section(doc.text, "Open Questions for Paper 007") + "\n" + extract_section(doc.text, "Open Questions for Paper 009")
corpus = f"{section}\n{open_q}\n{doc.text[:3000]}"
if any(alias_present(corpus, alias) for alias in aliases) and re.search(
r"challenge|critic|rebuttal|unfalsif|weak|bounded|downgrade|unknown",
corpus,
re.IGNORECASE,
):
challenged.append(doc_id)
return sorted(set(challenged))
def mermaid_id(name: str) -> str:
return "c_" + re.sub(r"[^a-z0-9]+", "_", name.lower()).strip("_")
def build_index() -> dict[str, list[dict]]:
docs = load_documents()
items: list[dict] = []
for concept in CONCEPTS:
aliases = [concept["name"]] + concept["aliases"]
intro_doc = docs[concept["introduced_in"]]
mentions = find_mentions(docs, aliases)
revised_in = find_revisions(docs, concept, mentions)
challenged_in = find_challenges(docs, concept, mentions)
referenced_in = [doc_id for doc_id in mentions if doc_id != concept["introduced_in"] and doc_id not in revised_in]
definition = first_matching_sentence(intro_doc, aliases)
items.append(
{
"name": concept["name"],
"aliases": sorted(set(concept["aliases"])),
"introduced_in": concept["introduced_in"],
"definition": definition,
"revised_in": revised_in,
"challenged_in": challenged_in,
"referenced_in": referenced_in,
"status": concept["status"],
"related_concepts": concept["related_concepts"],
}
)
return {"concepts": items}
def write_glossary(index: dict[str, list[dict]]) -> None:
lines = ["# VIBECODE-THEORY Glossary", ""]
for item in sorted(index["concepts"], key=lambda x: x["name"].lower()):
lines.extend(
[
f"## {item['name']}",
f"Origin: {item['introduced_in']}",
f"Status: {item['status']}",
f"Aliases: {', '.join(item['aliases']) if item['aliases'] else 'None'}",
item["definition"],
f"Revised in: {', '.join(item['revised_in']) if item['revised_in'] else 'None'}",
f"Challenged in: {', '.join(item['challenged_in']) if item['challenged_in'] else 'None'}",
f"Referenced in: {', '.join(item['referenced_in']) if item['referenced_in'] else 'None'}",
f"Related concepts: {', '.join(item['related_concepts']) if item['related_concepts'] else 'None'}",
"",
]
)
(OUT_DIR / "glossary.md").write_text("\n".join(lines), encoding="utf-8")
def write_mermaid(index: dict[str, list[dict]]) -> None:
lines = ["graph TD"]
for item in index["concepts"]:
lines.append(f' {mermaid_id(item["name"])}["{item["name"]}"]')
seen: set[tuple[str, str]] = set()
for item in index["concepts"]:
for related in item["related_concepts"]:
edge = tuple(sorted((item["name"], related)))
if edge in seen:
continue
seen.add(edge)
lines.append(
f" {mermaid_id(item['name'])} -->|relates to| {mermaid_id(related)}"
)
(OUT_DIR / "concept_map.mermaid").write_text("\n".join(lines) + "\n", encoding="utf-8")
def main() -> None:
index = build_index()
(OUT_DIR / "index.json").write_text(json.dumps(index, indent=2) + "\n", encoding="utf-8")
write_glossary(index)
write_mermaid(index)
print(f"Indexed {len(index['concepts'])} concepts.")
print(f"Wrote {OUT_DIR / 'index.json'}")
print(f"Wrote {OUT_DIR / 'glossary.md'}")
print(f"Wrote {OUT_DIR / 'concept_map.mermaid'}")
if __name__ == "__main__":
main()