From 77efac02835c1962a850d6ca10830ee981859b74 Mon Sep 17 00:00:00 2001 From: Seth Freiberg Date: Wed, 18 Mar 2026 02:01:12 -0400 Subject: [PATCH] Add knowledge corpus: 14 command references, server context, and TF-IDF search index (Phase 1.3) - knowledge/mc-commands/commands.json: 14 MC commands with JE syntax, args, examples, common errors, 1.21 version notes - knowledge/server-context/servers.json: all 4 servers (mc1, shrink, paper-ai, paper-dev) with full config - knowledge/build_index.py: TF-IDF indexer + search function (19 docs, 725 terms) - All command syntax validated live on dev server via RCON (12/13 passed) - PLAN.md: mark Phase 1.3 complete --- PLAN.md | 19 +- knowledge/build_index.py | 219 +++ knowledge/index.json | 2159 +++++++++++++++++++++++++ knowledge/mc-commands/commands.json | 356 ++++ knowledge/server-context/servers.json | 82 + 5 files changed, 2825 insertions(+), 10 deletions(-) create mode 100644 knowledge/build_index.py create mode 100644 knowledge/index.json create mode 100644 knowledge/mc-commands/commands.json create mode 100644 knowledge/server-context/servers.json diff --git a/PLAN.md b/PLAN.md index 9e42c82..a2b0fff 100644 --- a/PLAN.md +++ b/PLAN.md @@ -119,16 +119,15 @@ These projects informed the plan but solve different problems: - [x] Seed 31 examples from repair code, prayer logs, sudo logs, and session history (`data/processed/seed_dataset.jsonl`) #### 1.3 Knowledge Corpus -- [ ] Scrape Minecraft Wiki command reference pages for 1.21.x syntax - - Target: `/give`, `/effect`, `/tp`, `/execute`, `/worldborder`, `/weather`, `/gamemode`, `/enchant`, `/fill`, `/setblock`, `/clone`, `/scoreboard`, `/data`, `/function` - - Store as structured JSON (command, syntax, parameters, examples, version notes) -- [ ] Extract and chunk local server context: - - `server.properties` from mc1 and shrink-world - - Datapack definitions (shrinkborder, morespawns) - - Player list and UUID mappings - - RCON connection parameters (sanitized) -- [ ] Index knowledge corpus for RAG retrieval (simple TF-IDF or embedding-based) -- [ ] Validate: query the index with sample questions, spot-check relevance +- [x] Scrape Minecraft Wiki command reference pages for 1.21.x syntax (14 commands in `knowledge/mc-commands/commands.json`) + - Includes JE syntax, arguments, examples, version notes, and common errors per command + - Commands validated live on dev server (Paper 1.21.11) -- 12/13 passed, 1 false negative (already in target state) +- [x] Extract and chunk local server context (`knowledge/server-context/servers.json`) + - All 4 servers (mc1, shrink-world, paper-ai, paper-dev) with ports, RCON, settings, plugins + - Player list with UUIDs, infrastructure details, version-specific notes +- [x] Index knowledge corpus for RAG retrieval (`knowledge/build_index.py` -- TF-IDF with title boosting) + - 19 documents indexed, 725 unique terms +- [x] Validated with 6 test queries -- all return relevant top results #### 1.4 Baseline Assistant (No Fine-Tuning) - [ ] Build prompt-only assistant using `qwen3-coder` (via Ollama at 192.168.0.179) diff --git a/knowledge/build_index.py b/knowledge/build_index.py new file mode 100644 index 0000000..84488d1 --- /dev/null +++ b/knowledge/build_index.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +""" +Build a simple TF-IDF-based search index over the knowledge corpus. + +Indexes: + - knowledge/mc-commands/commands.json (command reference) + - knowledge/server-context/servers.json (server configs) + - knowledge/wiki-chunks/*.json (wiki content, if present) + +Outputs: knowledge/index.json + +Usage: python3 knowledge/build_index.py +""" + +import json +import os +import re +from collections import Counter +from pathlib import Path +import math + +ROOT = Path(__file__).resolve().parent + + +def tokenize(text: str) -> list: + """Simple whitespace + punctuation tokenizer.""" + return re.findall(r'[a-z0-9_:/.]{2,}', (text or '').lower()) + + +def build_command_docs(commands_path: Path) -> list: + """Build searchable documents from commands.json.""" + docs = [] + if not commands_path.exists(): + return docs + commands = json.loads(commands_path.read_text()) + for cmd in commands: + name = cmd.get('command', '') + # Build a text blob from all fields + parts = [ + f"/{name} command", + cmd.get('description', ''), + ' '.join(cmd.get('je_syntax', [])), + ] + # Arguments + for arg_name, arg_info in cmd.get('arguments', {}).items(): + if isinstance(arg_info, dict): + parts.append(f"{arg_name}: {arg_info.get('description', '')}") + else: + parts.append(f"{arg_name}: {arg_info}") + # Examples + for ex_name, ex_val in cmd.get('examples', {}).items(): + parts.append(f"example {ex_name}: {ex_val}") + # Common errors + for err in cmd.get('common_errors', []): + parts.append(f"common error: {err}") + # Version notes + parts.append(cmd.get('version_notes', '')) + + text = '\n'.join(p for p in parts if p) + snippet = f"/{name}: {cmd.get('description', '')}. Syntax: {'; '.join(cmd.get('je_syntax', [])[:2])}" + + docs.append({ + 'id': f'cmd_{name}', + 'type': 'command', + 'title': f'/{name}', + 'text': text, + 'snippet': snippet[:300], + 'source': 'mc-commands/commands.json', + }) + return docs + + +def build_server_docs(servers_path: Path) -> list: + """Build searchable documents from servers.json.""" + docs = [] + if not servers_path.exists(): + return docs + data = json.loads(servers_path.read_text()) + for srv in data.get('servers', []): + name = srv.get('name', '') + text = json.dumps(srv, indent=2) + snippet = f"Server '{name}': {srv.get('type', '')} {srv.get('version', '')} on port {srv.get('game_port', '')}. {srv.get('notes', '')}" + docs.append({ + 'id': f'srv_{name}', + 'type': 'server', + 'title': f'Server: {name}', + 'text': text, + 'snippet': snippet[:300], + 'source': 'server-context/servers.json', + }) + # Version notes as a separate doc + vn = data.get('version_notes', {}) + if vn: + text = '\n'.join(f"{k}: {v}" for k, v in vn.items()) + docs.append({ + 'id': 'version_notes', + 'type': 'reference', + 'title': 'Minecraft 1.21 Version Notes', + 'text': text, + 'snippet': text[:300], + 'source': 'server-context/servers.json', + }) + return docs + + +def build_wiki_docs(wiki_dir: Path) -> list: + """Build searchable documents from wiki chunk files.""" + docs = [] + if not wiki_dir.exists(): + return docs + for p in wiki_dir.glob('*.json'): + try: + chunks = json.loads(p.read_text()) + if isinstance(chunks, list): + for i, chunk in enumerate(chunks): + text = chunk.get('text', '') if isinstance(chunk, dict) else str(chunk) + title = chunk.get('title', p.stem) if isinstance(chunk, dict) else p.stem + docs.append({ + 'id': f'wiki_{p.stem}_{i}', + 'type': 'wiki', + 'title': title, + 'text': text, + 'snippet': text[:300], + 'source': f'wiki-chunks/{p.name}', + }) + except Exception: + pass + return docs + + +def build_index(): + """Build and save the search index.""" + docs = [] + docs.extend(build_command_docs(ROOT / 'mc-commands' / 'commands.json')) + docs.extend(build_server_docs(ROOT / 'server-context' / 'servers.json')) + docs.extend(build_wiki_docs(ROOT / 'wiki-chunks')) + + # Build TF-IDF + doc_freq = Counter() + for doc in docs: + tokens = set(tokenize(doc['text'])) + doc['_tokens'] = list(tokens) + for t in tokens: + doc_freq[t] += 1 + + N = len(docs) + idf = {t: math.log(N / (1 + df)) for t, df in doc_freq.items()} + + # Store index + index = { + 'generated_at': __import__('time').time(), + 'doc_count': N, + 'docs': [{ + 'id': d['id'], + 'type': d['type'], + 'title': d['title'], + 'snippet': d['snippet'], + 'source': d['source'], + 'tokens': d['_tokens'], + } for d in docs], + 'idf': {t: round(v, 4) for t, v in sorted(idf.items()) if v > 0.1}, + } + + out_path = ROOT / 'index.json' + out_path.write_text(json.dumps(index, ensure_ascii=True, indent=2)) + print(f"Index built: {N} documents, {len(idf)} unique terms -> {out_path}") + return index + + +def search(query: str, index: dict = None, limit: int = 5) -> list: + """Search the index. Returns top matches.""" + if index is None: + idx_path = ROOT / 'index.json' + index = json.loads(idx_path.read_text()) + + q_tokens = set(tokenize(query)) + idf = index.get('idf', {}) + results = [] + + for doc in index.get('docs', []): + d_tokens = set(doc.get('tokens', [])) + overlap = q_tokens & d_tokens + if not overlap: + continue + score = sum(idf.get(t, 0.5) for t in overlap) + # Boost title matches + title_tokens = set(tokenize(doc.get('title', ''))) + title_overlap = q_tokens & title_tokens + score += len(title_overlap) * 2.0 + results.append((score, doc)) + + results.sort(key=lambda x: x[0], reverse=True) + return [{'score': round(s, 2), **d} for s, d in results[:limit]] + + +if __name__ == '__main__': + import sys + if len(sys.argv) > 1 and sys.argv[1] == 'search': + query = ' '.join(sys.argv[2:]) + results = search(query) + for r in results: + print(f"[{r['score']:.1f}] {r['title']}: {r['snippet'][:100]}") + else: + build_index() + # Run test queries + print() + for q in [ + "how to give enchanted sword", + "effect speed player", + "weather thunder storm", + "execute as vs at position", + "paper server port rcon", + "1.21 enchantment syntax", + ]: + results = search(q) + print(f"Query: '{q}'") + for r in results[:3]: + print(f" [{r['score']:.1f}] {r['title']}") + print() diff --git a/knowledge/index.json b/knowledge/index.json new file mode 100644 index 0000000..ce41431 --- /dev/null +++ b/knowledge/index.json @@ -0,0 +1,2159 @@ +{ + "generated_at": 1773813635.0107534, + "doc_count": 19, + "docs": [ + { + "id": "cmd_give", + "type": "command", + "title": "/give", + "snippet": "/give: Gives items to players. Syntax: give []", + "source": "mc-commands/commands.json", + "tokens": [ + "lvl:5", + "minecraft:wolf_spawn_egg", + "tags", + "common", + "uuid.", + "diamond_sword", + "sharpness:5", + "error:", + "components.", + "minecraft:night_vision", + "players.", + "players", + "with", + "count", + "/give", + "invalid", + "of", + "1.21", + "with_enchantments:", + "entity_data", + "replaced", + "64", + "must", + "max_stack_size", + "format:", + "since", + "targets", + "enchantments", + "is", + "or", + "minecraft:totem_of_undying", + "1.20.5", + "optional", + "id", + "item", + "prefix", + "item:", + "value", + "using", + "nbt", + "id:sharpness", + "item_id", + "enchantment", + "potion_contents", + "enchantments:", + "targets:", + "target", + "minecraft:diamond_sword", + "gives", + "food", + "unbreaking:3", + "player", + "components:", + "oak_log", + "command", + "basic:", + "namespace", + "old", + "component", + "name", + "minecraft:potion", + "resolve", + "in", + "to", + "2147483647", + "id:", + "1.", + "specific", + "minecraft:", + "minecraft:diamond", + "missing", + "nbt.", + "custom_max_stack:", + "wood", + "with_lore:", + "diamond", + "number", + "uses", + "data", + "defaults", + "example", + "spawn_egg_override:", + "count:", + "with_potion:", + "give", + "potion:", + "selector", + "minecraft:cat", + "components", + "new", + "2304", + "items", + "...", + "instead", + "arguments:", + "lore", + "transposed" + ] + }, + { + "id": "cmd_effect", + "type": "command", + "title": "/effect", + "snippet": "/effect: Adds or removes status effects from entities. Syntax: effect give [] [] []; effect give infinite [] []", + "source": "mc-commands/commands.json", + "tokens": [ + "ticks.", + "clear_specific:", + "common", + "particles", + "uuid.", + "exist.", + "error:", + "255", + "infinite_duration:", + "minecraft:night_vision", + "invulnerability", + "not", + "syntax", + "amplifier", + "with", + "effect:", + "hideparticles", + "hud", + "for", + "split", + "absorption", + "instant", + "must", + "minus", + "without", + "since", + "1000000", + "targets", + "false.", + "is", + "or", + "1.13", + "ii", + "30.", + "seconds:", + "minecraft:speed", + "true", + "into", + "invalid.", + "effects", + "clear_all:", + "id", + "effect", + "resistance_hidden:", + "prefix", + "whether", + "e.g.", + "regeneration", + "speed", + "does", + "targets:", + "target", + "status", + "be", + "player", + "effects:", + "command", + "namespace", + "/effect", + "infinite", + "name", + "clear", + "in", + "basic_speed:", + "on", + "to", + "etc.", + "0.", + "minecraft:", + "hideparticles:", + "missing", + "adds", + "resistance", + "bare", + "60", + "seconds", + "use", + "minecraft:haste", + "game", + "and", + "amplifier:", + "minecraft:resistance", + "removes", + "defaults", + "example", + "indicator.", + "level", + "entities", + "from", + "give", + "subcommand:", + "selector", + "duration", + "hide", + "inventing", + "instead" + ] + }, + { + "id": "cmd_tp", + "type": "command", + "title": "/tp", + "snippet": "/tp: Teleports entities to positions or other entities. Syntax: tp ; tp ", + "source": "mc-commands/commands.json", + "tokens": [ + "alias", + "if", + "entity.", + "rotation:", + "common", + "rotation", + "cross_dimension:", + "coordinates.", + "yaw:", + "notation.", + "error:", + "responses", + "benevolent", + "not", + "tp", + "with", + "are", + "single", + "yaw", + "positions", + "forward_one_block:", + "as", + "west.", + "when", + "position", + "teleport", + "of", + "/teleport", + "other", + "identical", + "facinganchor", + "didn", + "for", + "move", + "omitted", + "64", + "pitch", + "must", + "100", + "without", + "since", + "targets", + "200", + "teleporting", + "destination:", + "is", + "or", + "local/caret", + "teleporting.", + "1.13", + "entity", + "facing", + "after", + "north", + "south", + "using", + "both.", + "relative:", + "eyes", + "targets:", + "to_coords:", + "minecraft:the_nether", + "90", + "player", + "command", + "to_player:", + "but", + "at", + "location:", + "execute", + "causes", + "resolve", + "in", + "to", + "an", + "/tp", + "to.", + "feet.", + "ask", + "coordinates", + "teleport.", + "degrees.", + "facinglocation", + "facingentity:", + "facinglocation:", + "relative", + "facinganchor:", + "alice", + "up", + "east", + "facingentity", + "location", + "and", + "destination", + "defaults", + "example", + "context", + "teleports", + "face", + "entities", + "syntax.", + "supports", + "changes", + "entity/entities", + "all_to_self:", + "180", + "executor", + "instead", + "executor.", + "slow_falling", + "damage", + "fall", + "run" + ] + }, + { + "id": "cmd_execute", + "type": "command", + "title": "/execute", + "snippet": "/execute: Executes commands with modified context (executor, position, conditions). Syntax: execute align -> execute; execute anchored -> execute", + "source": "mc-commands/commands.json", + "tokens": [ + "summon", + "conditional:", + "if", + "take", + "common", + "relation", + "cross_dimension:", + "nearby", + "unless", + "gamemode", + "coords", + "restructured", + "error:", + "say", + "locate", + "not", + "block", + "tp", + "zombies", + "with", + "fill", + "as", + "position", + "set", + "/execute", + "axes", + "for", + "64", + "distance", + "simplified", + "align", + "server/console", + "dimension", + "heightmap", + "since", + "targets", + "can", + "commands", + "is", + "or", + "1.13", + "entity", + "facing", + "positioned", + "completely", + "over", + "invalid.", + "origin", + "result", + "store", + "does", + "minecraft:the_nether", + "nesting:", + "be", + "player", + "directly", + "format", + "sheep", + "command", + "structure", + "old", + "at", + "execute", + "grass", + "chains.", + "modified", + "resolve", + "pos", + "in", + "on", + "to", + "found", + "..10", + "minecraft:stone", + "the", + "position.", + "success", + "at_player:", + "zombie", + "unnecessary", + "slingshooter08", + "rotated", + "relative", + "player.", + "use", + "that", + "subcommand", + "anchor", + "example", + "context", + "type", + "rot", + "executes", + "minecraft:fortress", + "...", + "executor", + "argument", + "block_check:", + "anchored", + "as_all_entities:", + "minecraft:grass_block", + "conditions", + "creative", + "run" + ] + }, + { + "id": "cmd_weather", + "type": "command", + "title": "/weather", + "snippet": "/weather: Sets the weather. Syntax: weather (clear|rain|thunder) []", + "source": "mc-commands/commands.json", + "tokens": [ + "the", + "one", + "values", + "invalid", + "ticks", + "random", + "other", + "thunderstorm", + "common", + "of:", + "type:", + "duration:", + "18000", + "thunder", + "900", + "300", + "error:", + "seconds", + "use", + "/weather", + "rain", + "rainstorm", + "thunder.", + "timed:", + "defaults", + "1000000", + "example", + "significant", + "rain:", + "command", + "is", + "storm", + "changes", + "1.21.", + "duration", + "sets", + "weather", + "are", + "clear", + "thunder:", + "600", + "no", + "in", + "clear:", + "to", + "6000" + ] + }, + { + "id": "cmd_gamemode", + "type": "command", + "title": "/gamemode", + "snippet": "/gamemode: Sets a player's game mode. Syntax: gamemode []", + "source": "mc-commands/commands.json", + "tokens": [ + "one", + "survival", + "if", + "spectator_all:", + "common", + "camelcase", + "abbreviations", + "of:", + "slingshooter08", + "gamemode", + "gamemode:", + "je", + "target", + "modes", + "valid", + "spectator", + "spectator.", + "target:", + "error:", + "omitted.", + "use", + "sp", + "game", + "player", + "and", + "defaults", + "example", + "significant", + "only", + "not", + "full", + "ids", + "adventure", + "change.", + "command", + "is", + "changes", + "1.21.", + "sets", + "are", + "executor", + "numeric", + "no", + "abbreviations.", + "creative_self:", + "/gamemode", + "in", + "words", + "to", + "survival_player:", + "je.", + "mode", + "lowercase", + "creative" + ] + }, + { + "id": "cmd_fill", + "type": "command", + "title": "/fill", + "snippet": "/fill: Fills a region with a specified block. Syntax: fill [destroy|hollow|keep|outline|replace []]", + "source": "mc-commands/commands.json", + "tokens": [ + "one", + "fire", + "including", + "common", + "minecraft:fire", + "blocks", + "unchanged", + "replaces", + "minecraft:air", + "error:", + "numbers:", + "68", + "70", + "only", + "filter:", + "block", + "25", + "notation", + "fire_replace_air:", + "with", + "74", + "fill", + "minecraft:oak_stairs", + "when", + "drop", + "80", + "invalid", + "of", + "matching", + "64", + "hollow", + "minecraft:glass", + "air", + "is", + "or", + "1.21.", + "facing", + "metadata", + "north", + "hollow_box:", + "invalid.", + "optional", + "id", + "prefix", + "/fill", + "using", + "from:", + "basic_fill:", + "opposite", + "fills", + "1.21:", + "filter.", + "volume", + "20", + "block:", + "region", + "default", + "command", + "filter", + "this", + "per", + "max", + "clear_area:", + "to:", + "no", + "in", + "on", + "to", + "region.", + "minecraft:stone", + "the", + "minecraft:", + "mode:", + "missing", + "destroy", + "numbers.", + "drops", + "60", + "replace", + "use", + "inside", + "replace_air:", + "10", + "example", + "specified", + "keep", + "from", + "states:", + "32768", + "all", + "corner", + "integers", + "items", + "...", + "outline", + "edges", + "interior" + ] + }, + { + "id": "cmd_setblock", + "type": "command", + "title": "/setblock", + "snippet": "/setblock: Places a block at a position. Syntax: setblock [destroy|keep|replace]", + "source": "mc-commands/commands.json", + "tokens": [ + "the", + "if", + "pos:", + "lower", + "minecraft:", + "common", + "mode:", + "missing", + "destroy", + "places", + "place", + "for", + "setblock", + "64", + "half", + "drops", + "minecraft:diamond_block", + "keep:", + "error:", + "replace", + "minecraft:oak_door", + "air", + "example", + "block:", + "only", + "significant", + "default", + "block", + "/setblock", + "command", + "keep", + "basic:", + "is", + "old", + "at", + "changes", + "states:", + "1.21.", + "facing", + "block.", + "with_state:", + "with", + "north", + "upper", + "minecraft:torch", + "no", + "pos", + "as", + "current", + "optional", + "to", + "on", + "position", + "in", + "id", + "item", + "prefix" + ] + }, + { + "id": "cmd_clone", + "type": "command", + "title": "/clone", + "snippet": "/clone: Copies blocks from one region to another. Syntax: clone [from ] [to ] [replace|masked] [force|move|normal]; clone [from ] [to ] filtered [force|move|normal]", + "source": "mc-commands/commands.json", + "tokens": [ + "one", + "allows", + "of", + "lower", + "equals", + "common", + "maskmode:", + "another", + "normal", + "matching", + "opposite", + "blocks", + "fills", + "northwest", + "move", + "64", + "sourcedimension", + "100", + "clone", + "error:", + "replace", + "dimension", + "masked", + "without", + "allowed", + "10", + "and", + "arguments.", + "copies", + "op", + "destination", + "air", + "1.20.2", + "/clone", + "example", + "only", + "region", + "force", + "added", + "overlap", + "default", + "command", + "targetdimension", + "from", + "region.", + "destination:", + "basic:", + "filter", + "clonemode:", + "cross", + "begin", + "source", + "corner", + "end:", + "all", + "with", + "74", + "move:", + "cloning", + "no", + "to", + "end", + "masked:", + "skip", + "begin:", + "overlapping", + "filtered", + "mode" + ] + }, + { + "id": "cmd_summon", + "type": "command", + "title": "/summon", + "snippet": "/summon: Summons an entity. Syntax: summon [] []", + "source": "mc-commands/commands.json", + "tokens": [ + "summon", + "the", + "invalid", + "count.", + "etc.", + "pos:", + "position.", + "tags", + "of", + "one", + "minecraft:", + "nbt", + "common", + "entity.", + "tnt", + "cannot", + "minecraft:tnt", + "with_nbt:", + "missing", + "for", + "65", + "isbaby:1", + "must", + "minecraft:zombie", + "at_coords:", + "customname:", + "error:", + "use", + "20", + "uses", + "nbt:", + "data", + "entities.", + "defaults", + "example", + "type", + "named:", + "entity:", + "not", + "still", + "command", + "bob", + "basic:", + "commands", + "is", + "multiple", + "namespace", + "minecraft:villager", + "append", + "entity", + "components", + "1.21.", + "creates", + "separate", + "/summon", + "executor", + "summons", + "each", + "call", + "pos", + "count", + "as", + "on", + "to", + "an", + "position", + "at.", + "id:", + "exactly", + "prefix" + ] + }, + { + "id": "cmd_worldborder", + "type": "command", + "title": "/worldborder", + "snippet": "/worldborder: Manages the world border. Syntax: worldborder add [