1200+ distilled gold examples, journal system, redstone mastery, safety awareness

Distilled Training Data (1,203 examples): - 341 initial gold (plugins, enchantments, builds, effects, god, errors) - 165 buildings + pipeline (100 structures built on dev, 65 request→query→act) - 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks) - 17 advanced logic (decanonized items, redstone gates, iterative builds) - 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock) - 7 circuit verification and diagnosis - 1 compact comparator gates - 10 redstone methodology (build→test→save→recall→learn from mistakes) - 8 player journal usage - 29 creative+uncommon+pipeline+god with full tool chains Player Journal System: - agent/tools/player_journal.py — per-player text files (1-10 lines) - journal.read + journal.write tool schemas added - Cross-contaminated: God and Sudo share same journal per player - Includes sentiment, relationship, builds, preferences, skill level Redstone Engineering: - agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules - Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top - T-junction bypass prevention: dedicated lead wire between merge and NOT block - RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate) Training Data Cleaning: - 466 @s→@p fixes, 10 template commands removed - 12 outdated refusals replaced with correct plugin commands - Data de-duped across all sources Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 20:50:52 -04:00
parent d9acb653fe
commit 9c2c9a2310
86 changed files with 34873 additions and 1676 deletions
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Clean training data — fix known bad patterns before 0.6.0 training.
+
+Fixes:
+- @s selector → @p (RCON has no executor entity)
+- Leading slash on commands
+- Template commands (remove entire example)
+- Old NBT enchant syntax
+- fill with trailing count
+- Generic bed/log → specific variants
+- steak → cooked_beef
+
+Usage:
+    python3 training/scripts/clean_training_data.py
+"""
+
+import json
+import re
+import sys
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+
+FILES = [
+    PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl",
+    PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl",
+    PROJECT_ROOT / "data" / "processed" / "tool_training_v05.jsonl",
+    PROJECT_ROOT / "data" / "processed" / "filtered_exploration.jsonl",
+]
+
+stats = {
+    "files_processed": 0,
+    "examples_in": 0,
+    "examples_out": 0,
+    "removed_template": 0,
+    "fixed_at_s": 0,
+    "fixed_leading_slash": 0,
+    "fixed_old_nbt": 0,
+    "fixed_fill_count": 0,
+    "fixed_generic_items": 0,
+}
+
+
+def fix_command(cmd: str, player: str = "slingshooter08") -> str:
+    """Fix a single command string."""
+    if not isinstance(cmd, str):
+        return cmd
+
+    # Leading slash
+    if cmd.startswith("/"):
+        cmd = cmd[1:]
+        stats["fixed_leading_slash"] += 1
+
+    # @s → @p (RCON has no executor)
+    if "@s" in cmd:
+        cmd = cmd.replace("@s", "@p")
+        stats["fixed_at_s"] += 1
+
+    # Generic items
+    if "minecraft:bed " in cmd or "minecraft:bed]" in cmd:
+        cmd = cmd.replace("minecraft:bed", "minecraft:white_bed")
+        stats["fixed_generic_items"] += 1
+    if "minecraft:log " in cmd or "minecraft:log]" in cmd:
+        cmd = cmd.replace("minecraft:log", "minecraft:oak_log")
+        stats["fixed_generic_items"] += 1
+    if "minecraft:steak" in cmd:
+        cmd = cmd.replace("minecraft:steak", "minecraft:cooked_beef")
+        stats["fixed_generic_items"] += 1
+
+    # Fill with trailing count (e.g. "fill ... minecraft:stone 1")
+    m = re.match(r'^(fill .+ minecraft:\w+(?:\[.*?\])?)\s+\d+$', cmd)
+    if m:
+        cmd = m.group(1)
+        stats["fixed_fill_count"] += 1
+
+    return cmd
+
+
+def fix_commands_in_obj(obj):
+    """Recursively fix commands in any dict/list structure."""
+    if isinstance(obj, str):
+        # Fix @s in any string content (including tool call JSON)
+        if "@s" in obj:
+            obj = obj.replace("@s", "@p")
+        return obj
+    elif isinstance(obj, list):
+        return [fix_commands_in_obj(item) for item in obj]
+    elif isinstance(obj, dict):
+        result = {}
+        for k, v in obj.items():
+            if k in ("commands", "commands_generated", "commands_executed"):
+                result[k] = [fix_command(c) for c in v] if isinstance(v, list) else v
+            elif k == "command" and isinstance(v, str):
+                result[k] = fix_command(v)
+            elif k == "content" and isinstance(v, str):
+                # Fix @s in message content (tool calls, system prompts)
+                fixed = v
+                if "@s" in fixed and "rcon" in fixed.lower():
+                    fixed = fixed.replace("@s", "@p")
+                result[k] = fixed
+            else:
+                result[k] = fix_commands_in_obj(v)
+        return result
+    return obj
+
+
+def has_template_commands(obj) -> bool:
+    """Check if this example contains template commands."""
+    text = json.dumps(obj).lower()
+    return any(t in text for t in ["template search", "template pick", "template build"])
+
+
+def process_file(path: Path):
+    """Clean one JSONL file in place."""
+    if not path.exists():
+        print(f"  SKIP: {path.name} (not found)")
+        return
+
+    examples = []
+    with open(path) as f:
+        for line in f:
+            if line.strip():
+                try:
+                    examples.append(json.loads(line))
+                except json.JSONDecodeError:
+                    pass
+
+    stats["examples_in"] += len(examples)
+    stats["files_processed"] += 1
+
+    cleaned = []
+    for ex in examples:
+        # Remove template command examples entirely
+        if has_template_commands(ex):
+            stats["removed_template"] += 1
+            continue
+
+        # Fix all commands recursively
+        fixed = fix_commands_in_obj(ex)
+        cleaned.append(fixed)
+
+    stats["examples_out"] += len(cleaned)
+
+    # Write back
+    with open(path, "w") as f:
+        for ex in cleaned:
+            f.write(json.dumps(ex, ensure_ascii=False) + "\n")
+
+    removed = len(examples) - len(cleaned)
+    print(f"  {path.name}: {len(examples)} → {len(cleaned)} ({removed} removed)")
+
+
+def main():
+    print("Cleaning training data...\n")
+
+    for path in FILES:
+        process_file(path)
+
+    print(f"\n{'='*50}")
+    print(f"Files processed: {stats['files_processed']}")
+    print(f"Examples: {stats['examples_in']} → {stats['examples_out']} ({stats['examples_in'] - stats['examples_out']} removed)")
+    print(f"\nFixes applied:")
+    print(f"  @s → @p:           {stats['fixed_at_s']}")
+    print(f"  Leading slash:      {stats['fixed_leading_slash']}")
+    print(f"  Template removed:   {stats['removed_template']}")
+    print(f"  Fill trailing count: {stats['fixed_fill_count']}")
+    print(f"  Generic items:      {stats['fixed_generic_items']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,337 @@
+#!/usr/bin/env python3
+"""
+Convert IGLU dataset to Mortdecai build training examples.
+
+IGLU provides natural language instructions paired with block placement
+coordinates. We convert these to:
+1. Direct setblock/fill commands (for simple builds)
+2. script.write + script.execute flows (for complex builds)
+
+Source: microsoft/iglu-datasets singleturn dataset
+Output: data/raw/iglu_build_training.jsonl
+
+Usage:
+    python3 training/scripts/convert_iglu_to_training.py
+"""
+
+import csv
+import json
+import os
+import random
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from agent.tools.tool_schemas import qwen3_tools_block
+from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
+
+IGLU_DIR = PROJECT_ROOT / "data" / "external" / "iglu-repo" / "datasets" / "singleturn"
+OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "iglu_build_training.jsonl"
+
+TOOLS_BLOCK = qwen3_tools_block()
+SYSTEM = (
+    "You are a Minecraft 1.21 command translator with script writing abilities.\n"
+    "For complex builds (4+ blocks), write a mcfunction script. Validate first.\n"
+    "For simple builds (1-3 blocks), use rcon.execute directly.\n"
+    "PERMISSION LEVEL: 4 (generous).\n\n"
+    "Return JSON: {\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+    + SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
+)
+
+# IGLU uses color IDs for blocks. Map to Minecraft wool colors.
+IGLU_BLOCK_MAP = {
+    57: "minecraft:blue_wool",
+    58: "minecraft:light_blue_wool",
+    59: "minecraft:green_wool",
+    60: "minecraft:red_wool",
+    61: "minecraft:orange_wool",
+    62: "minecraft:purple_wool",
+    63: "minecraft:yellow_wool",
+}
+
+# For variety, also map to concrete and terracotta
+BLOCK_VARIANTS = {
+    "wool": {
+        57: "minecraft:blue_wool", 58: "minecraft:light_blue_wool",
+        59: "minecraft:green_wool", 60: "minecraft:red_wool",
+        61: "minecraft:orange_wool", 62: "minecraft:purple_wool",
+        63: "minecraft:yellow_wool",
+    },
+    "concrete": {
+        57: "minecraft:blue_concrete", 58: "minecraft:light_blue_concrete",
+        59: "minecraft:green_concrete", 60: "minecraft:red_concrete",
+        61: "minecraft:orange_concrete", 62: "minecraft:purple_concrete",
+        63: "minecraft:yellow_concrete",
+    },
+    "terracotta": {
+        57: "minecraft:blue_terracotta", 58: "minecraft:light_blue_terracotta",
+        59: "minecraft:green_terracotta", 60: "minecraft:red_terracotta",
+        61: "minecraft:orange_terracotta", 62: "minecraft:purple_terracotta",
+        63: "minecraft:yellow_terracotta",
+    },
+}
+
+PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx", "CreeperKing99"]
+
+
+def sys_msg():
+    return {"role": "system", "content": SYSTEM}
+
+def user_msg(text):
+    return {"role": "user", "content": text}
+
+def tool_call(name, args):
+    return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
+
+def tool_result(data):
+    return {"role": "tool", "content": json.dumps(data)}
+
+def final_response(resp):
+    return {"role": "assistant", "content": json.dumps(resp)}
+
+
+def blocks_to_commands(blocks_to_place, blocks_to_remove, block_map, use_relative=True, offset=(0, 64, 0)):
+    """Convert block coordinate lists to setblock/fill commands."""
+    commands = []
+
+    # Group placed blocks by color for potential fill optimization
+    by_color = defaultdict(list)
+    for x, y, z, color_id in blocks_to_place:
+        block = block_map.get(color_id, "minecraft:white_wool")
+        by_color[block].append((x, y, z))
+
+    for block, coords in by_color.items():
+        if len(coords) == 1:
+            x, y, z = coords[0]
+            if use_relative:
+                commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
+            else:
+                commands.append(f"setblock {x} {y} {z} {block}")
+        elif len(coords) <= 3:
+            for x, y, z in coords:
+                if use_relative:
+                    commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
+                else:
+                    commands.append(f"setblock {x} {y} {z} {block}")
+        else:
+            # Try to find a bounding box for fill
+            xs = [c[0] for c in coords]
+            ys = [c[1] for c in coords]
+            zs = [c[2] for c in coords]
+            min_x, max_x = min(xs), max(xs)
+            min_y, max_y = min(ys), max(ys)
+            min_z, max_z = min(zs), max(zs)
+
+            # Check if it's a solid fill (all positions in the box are filled)
+            box_volume = (max_x - min_x + 1) * (max_y - min_y + 1) * (max_z - min_z + 1)
+            if box_volume == len(coords) and box_volume > 2:
+                if use_relative:
+                    commands.append(
+                        f"fill ~{min_x} ~{min_y-offset[1]} ~{min_z} "
+                        f"~{max_x} ~{max_y-offset[1]} ~{max_z} {block}"
+                    )
+                else:
+                    commands.append(
+                        f"fill {min_x} {min_y} {min_z} {max_x} {max_y} {max_z} {block}"
+                    )
+            else:
+                # Not a clean box — individual setblocks
+                for x, y, z in coords:
+                    if use_relative:
+                        commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
+                    else:
+                        commands.append(f"setblock {x} {y} {z} {block}")
+
+    # Remove blocks
+    for x, y, z, _ in blocks_to_remove:
+        if use_relative:
+            commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} minecraft:air")
+        else:
+            commands.append(f"setblock {x} {y} {z} minecraft:air")
+
+    return commands
+
+
+def load_iglu_pairs():
+    """Load instruction-to-build pairs from IGLU dataset."""
+    csv_path = IGLU_DIR / "clarifying_questions_train.csv"
+    if not csv_path.exists():
+        print(f"CSV not found: {csv_path}")
+        return []
+
+    # Build target state index
+    target_dir = IGLU_DIR / "target_world_states" / "builder-data"
+    targets = {}
+    if target_dir.exists():
+        for game_dir in target_dir.iterdir():
+            if game_dir.is_dir():
+                for step_file in game_dir.iterdir():
+                    if step_file.is_file():
+                        targets.setdefault(game_dir.name, []).append(step_file)
+
+    pairs = []
+    with open(csv_path) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            game_id = row['GameId'].lower().replace(' ', '')
+            instruction = row['InputInstruction'].strip()
+            if not instruction or row.get('IsInstructionClear') != 'Yes':
+                continue
+
+            init_path = IGLU_DIR / row['InitializedWorldPath']
+            if game_id in targets and init_path.exists():
+                pairs.append({
+                    'instruction': instruction,
+                    'init_path': str(init_path),
+                    'target_path': str(targets[game_id][0]),
+                    'game_id': game_id,
+                })
+
+    return pairs
+
+
+def convert_pair_to_example(pair, idx, block_variant="wool"):
+    """Convert one IGLU pair to a training example."""
+    block_map = BLOCK_VARIANTS.get(block_variant, BLOCK_VARIANTS["wool"])
+    player = random.choice(PLAYERS)
+
+    with open(pair['init_path']) as f:
+        init = json.load(f)
+    with open(pair['target_path']) as f:
+        target = json.load(f)
+
+    init_blocks = set(tuple(b) for b in init.get('worldEndingState', {}).get('blocks', []))
+    target_blocks = set(tuple(b) for b in target.get('worldEndingState', {}).get('blocks', []))
+
+    to_place = sorted(target_blocks - init_blocks)
+    to_remove = sorted(init_blocks - target_blocks)
+
+    if not to_place and not to_remove:
+        return None
+
+    total_changes = len(to_place) + len(to_remove)
+    commands = blocks_to_commands(to_place, to_remove, block_map)
+
+    if not commands:
+        return None
+
+    instruction = pair['instruction']
+    # Make it sound like a Minecraft player request
+    prefixes = [
+        f"sudo {instruction}",
+        f"sudo can you {instruction.lower()}",
+        f"sudo please {instruction.lower()}",
+        f"sudo I need you to {instruction.lower()}",
+    ]
+    prompt = random.choice(prefixes)
+
+    msgs = [sys_msg(), user_msg(f"Player {player}: {prompt}")]
+
+    if total_changes <= 4:
+        # Direct rcon.execute for small builds
+        for cmd in commands:
+            msgs.append(tool_call("rcon.execute", {"command": cmd}))
+            msgs.append(tool_result({"success": True, "result": "Changed the block"}))
+        reasoning = f"Direct block placement: {len(to_place)} placed, {len(to_remove)} removed."
+        resp = {"risk_level": 3, "commands": commands, "reasoning": reasoning}
+    else:
+        # Script workflow for larger builds
+        script_name = f"build_{idx:04d}"
+        desc = instruction[:80]
+
+        # Validate
+        msgs.append(tool_call("script.validate", {"commands": commands}))
+        msgs.append(tool_result({
+            "valid": True, "total": len(commands),
+            "passed": len(commands), "errors": [],
+        }))
+
+        # Write
+        msgs.append(tool_call("script.write", {
+            "name": script_name,
+            "commands": commands,
+            "description": desc,
+        }))
+        msgs.append(tool_result({
+            "ok": True, "path": f"mortdecai:{script_name}",
+            "lines": len(commands),
+        }))
+
+        # Execute at player
+        msgs.append(tool_call("script.execute", {
+            "name": script_name, "as_player": player,
+        }))
+        msgs.append(tool_result({
+            "ok": True,
+            "result": f"Executed {len(commands)} commands from function mortdecai:{script_name}",
+        }))
+
+        reasoning = (f"Complex build ({total_changes} block changes). "
+                     f"Wrote script '{script_name}' with {len(commands)} commands. "
+                     f"Placed {len(to_place)}, removed {len(to_remove)}.")
+        resp = {
+            "risk_level": 3,
+            "commands": [f"function mortdecai:{script_name}"],
+            "reasoning": reasoning,
+        }
+
+    msgs.append(final_response(resp))
+
+    return {
+        "id": f"iglu-build-{idx:05d}",
+        "source": "iglu_dataset",
+        "type": "build_script" if total_changes > 4 else "build_direct",
+        "block_changes": total_changes,
+        "messages": msgs,
+    }
+
+
+def main():
+    print("Loading IGLU dataset...")
+    pairs = load_iglu_pairs()
+    print(f"Found {len(pairs)} instruction-build pairs")
+
+    if not pairs:
+        print("No data found. Make sure iglu-repo is cloned in data/external/")
+        return
+
+    examples = []
+    skipped = 0
+
+    # Process with variety — use different block variants
+    variants = list(BLOCK_VARIANTS.keys())
+
+    for idx, pair in enumerate(pairs):
+        variant = variants[idx % len(variants)]
+        ex = convert_pair_to_example(pair, idx, variant)
+        if ex:
+            examples.append(ex)
+        else:
+            skipped += 1
+
+        if (idx + 1) % 500 == 0:
+            print(f"  Processed {idx+1}/{len(pairs)}, generated {len(examples)}")
+
+    # Stats
+    direct = sum(1 for e in examples if e['type'] == 'build_direct')
+    script = sum(1 for e in examples if e['type'] == 'build_script')
+    avg_blocks = sum(e['block_changes'] for e in examples) / max(len(examples), 1)
+
+    print(f"\nGenerated {len(examples)} examples (skipped {skipped} empty)")
+    print(f"  Direct (1-4 blocks): {direct}")
+    print(f"  Script (5+ blocks):  {script}")
+    print(f"  Avg block changes:   {avg_blocks:.1f}")
+
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        for ex in examples:
+            f.write(json.dumps(ex, ensure_ascii=False) + "\n")
+
+    print(f"\nWritten to {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+Filter audit log data for training quality.
+
+Keeps the full validator loop (generated → executed → RCON result) as the
+training signal. Quarantines empty outputs, system prompt leaks, and broken
+JSON/tellraw commands into a separate file for review.
+
+Usage:
+    python3 filter_audit_log.py [--input FILE] [--output FILE] [--stats]
+    python3 filter_audit_log.py --stats  # dry run, print breakdown only
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+DEFAULT_INPUT = PROJECT_ROOT / "data" / "raw" / "training_audit_dev_latest.jsonl"
+DEFAULT_OUTPUT = PROJECT_ROOT / "data" / "processed" / "filtered_audit.jsonl"
+DEFAULT_QUARANTINE = PROJECT_ROOT / "data" / "quarantine" / "audit_quarantine.jsonl"
+
+# Strings that indicate system prompt leakage in model output
+PROMPT_LEAK_PATTERNS = [
+    "you are a minecraft",
+    "command translator",
+    "player request vector",
+    "you are an ai",
+    "system prompt",
+    "Context: Player is",
+]
+
+
+def is_prompt_leak(message: str) -> bool:
+    msg_lower = message.lower()
+    return any(pat in msg_lower for pat in PROMPT_LEAK_PATTERNS)
+
+
+def has_broken_json(commands: list[str]) -> bool:
+    """Detect broken tellraw/JSON commands (bad escaping, fragments)."""
+    for cmd in commands:
+        stripped = cmd.strip()
+        # Fragments from split JSON: starts with colon, comma, or brace-colon
+        if stripped.startswith((":\\", ",\\", ":{", ",")):
+            return True
+        # Truncated tellraw with unmatched braces
+        if "tellraw" in stripped:
+            opens = stripped.count("{") + stripped.count("[")
+            closes = stripped.count("}") + stripped.count("]")
+            if abs(opens - closes) > 1:
+                return True
+    return False
+
+
+def classify(record: dict) -> tuple[str, str]:
+    """Classify a record as (keep|quarantine, reason)."""
+    mode = record.get("mode", "")
+    output = record.get("output", {})
+    message = output.get("message", "").strip()
+    cmds_gen = output.get("commands_generated", [])
+    cmds_exe = output.get("commands_executed", [])
+
+    # ── Quarantine ─────────────────────────────────────────────────
+    # System prompt leak
+    if is_prompt_leak(message):
+        return "quarantine", "prompt_leak"
+
+    # Completely empty output — teaches the model to produce nothing
+    if not message and not cmds_gen:
+        return "quarantine", "empty_output"
+
+    # Broken JSON/tellraw commands — teaches bad syntax
+    if cmds_gen and has_broken_json(cmds_gen):
+        return "quarantine", "broken_json"
+
+    # ── Keep ───────────────────────────────────────────────────────
+    # Validator correction: generated != executed (highest value)
+    if cmds_gen and cmds_exe and cmds_gen != cmds_exe:
+        return "keep", "validator_corrected"
+
+    # Generated but validator blocked — teaches what gets rejected
+    if cmds_gen and not cmds_exe:
+        return "keep", "validator_blocked"
+
+    # Commands executed (sudo or god mode with action)
+    if cmds_gen and cmds_exe:
+        return "keep", "executed"
+
+    # God mode with RP message, no commands — valid roleplay response
+    if mode == "god" and message:
+        return "keep", "god_rp"
+
+    # Catch-all: keep with flag
+    return "keep", "other"
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--input", type=Path, default=DEFAULT_INPUT)
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
+    parser.add_argument("--quarantine", type=Path, default=DEFAULT_QUARANTINE,
+                        help="Write quarantined records here for review")
+    parser.add_argument("--stats", action="store_true", help="Print stats only, don't write")
+    args = parser.parse_args()
+
+    stats = {"keep": {}, "quarantine": {}}
+    kept = []
+    quarantined = []
+
+    with open(args.input) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            record = json.loads(line)
+            action, reason = classify(record)
+            stats[action][reason] = stats[action].get(reason, 0) + 1
+            record["_filter_action"] = action
+            record["_filter_reason"] = reason
+            if action == "keep":
+                kept.append(record)
+            else:
+                quarantined.append(record)
+
+    total = len(kept) + len(quarantined)
+    print(f"Total records:    {total}")
+    print(f"Kept:             {len(kept)} ({100*len(kept)/total:.1f}%)")
+    print(f"Quarantined:      {len(quarantined)} ({100*len(quarantined)/total:.1f}%)")
+    print()
+
+    print("KEPT breakdown:")
+    for reason, count in sorted(stats["keep"].items(), key=lambda x: -x[1]):
+        print(f"  {reason:<25} {count:>5}")
+    print()
+    print("QUARANTINED breakdown:")
+    for reason, count in sorted(stats["quarantine"].items(), key=lambda x: -x[1]):
+        print(f"  {reason:<25} {count:>5}")
+
+    if args.stats:
+        return
+
+    # Write kept records (strip internal filter tags)
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    with open(args.output, "w") as f:
+        for record in kept:
+            out = {k: v for k, v in record.items() if not k.startswith("_filter_")}
+            f.write(json.dumps(out) + "\n")
+    print(f"\nWrote {len(kept)} records to {args.output}")
+
+    # Write quarantined records (keep filter tags for review)
+    args.quarantine.parent.mkdir(parents=True, exist_ok=True)
+    with open(args.quarantine, "w") as f:
+        for record in quarantined:
+            f.write(json.dumps(record) + "\n")
+    print(f"Wrote {len(quarantined)} quarantined records to {args.quarantine}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,449 @@
+#!/usr/bin/env python3
+"""
+Dataset merge pipeline for Mortdecai training.
+
+Normalizes all dataset formats into the two schemas the trainer expects:
+  1. `conversations` — [{role, content}, ...] for simple command gen
+  2. `messages` + `qwen3_text` — multi-turn tool-calling with pre-formatted text
+
+Handles deduplication, mix ratios, and outputs a single training-ready JSONL.
+
+Usage:
+    # Default merge with recommended ratios
+    python3 merge_datasets.py
+
+    # Custom ratios (multipliers per source)
+    python3 merge_datasets.py --ratios seed=2.0,tool=1.0,iglu=0.5
+
+    # Dry run — show stats without writing
+    python3 merge_datasets.py --dry-run
+
+    # Include chat app exports
+    python3 merge_datasets.py --include-chat-logs
+"""
+
+import argparse
+import json
+import hashlib
+import random
+import sys
+from pathlib import Path
+from collections import Counter
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from agent.prompts.system_prompts import get_prompt, SYNTAX_RULES, RISK_GRADIENT
+
+# ── Dataset sources ───────────────────────────────────────────────────────────
+
+SOURCES = {
+    "seed": {
+        "path": "data/processed/seed_dataset.jsonl",
+        "format": "seed",
+        "default_ratio": 2.0,  # Oversample — keep seed dominant
+        "description": "Core command gen with pos/neg pairs",
+    },
+    "tool": {
+        "path": "data/processed/tool_training.jsonl",
+        "format": "tool_messages",
+        "default_ratio": 1.0,
+        "description": "Multi-turn tool-calling examples",
+    },
+    "tool_v05": {
+        "path": "data/processed/tool_training_v05.jsonl",
+        "format": "tool_messages",
+        "default_ratio": 1.5,  # High quality, oversample
+        "description": "0.5.0 quality tool examples",
+    },
+    "iglu": {
+        "path": "data/raw/iglu_build_training.jsonl",
+        "format": "tool_messages",
+        "default_ratio": 0.8,
+        "description": "IGLU building dataset",
+    },
+    "plugin": {
+        "path": "data/raw/plugin_training.jsonl",
+        "format": "tool_messages",
+        "default_ratio": 1.5,
+        "description": "Plugin command examples",
+    },
+    "exploration": {
+        "path": "data/processed/filtered_exploration.jsonl",
+        "format": "exploration",
+        "default_ratio": 1.0,
+        "description": "Wiki-grounded exploration",
+    },
+    "self_play": {
+        "path": "data/processed/self_play.jsonl",
+        "format": "self_play",
+        "default_ratio": 0.6,  # Large set, don't let it dominate
+        "description": "Self-play generations",
+    },
+    "audit": {
+        "path": "data/processed/filtered_audit.jsonl",
+        "format": "audit",
+        "default_ratio": 0.5,  # Large set, needs dilution
+        "description": "Filtered audit log data",
+    },
+    "distilled": {
+        "path": "data/processed/distilled.jsonl",
+        "format": "seed",
+        "default_ratio": 1.5,  # Gold standard from Claude
+        "description": "Claude-distilled examples",
+    },
+    "chat_logs": {
+        "path": "data/chat_logs/training_export.jsonl",
+        "format": "audit",
+        "default_ratio": 2.0,  # Hand-curated via chat app
+        "description": "Chat app training exports",
+        "optional": True,
+    },
+}
+
+# Also include all raw training files
+RAW_TRAINING_FILES = [
+    "data/raw/advanced_commands_training.jsonl",
+    "data/raw/biome_dimension_training.jsonl",
+    "data/raw/chaos_event_training.jsonl",
+    "data/raw/chaos_gaps_training.jsonl",
+    "data/raw/command_reference_training.jsonl",
+    "data/raw/cosmetic_xp_training.jsonl",
+    "data/raw/dangerous_effects_training.jsonl",
+    "data/raw/death_environment_training.jsonl",
+    "data/raw/distance_projectile_training.jsonl",
+    "data/raw/distance_scale_training.jsonl",
+    "data/raw/enchant_order_errors.jsonl",
+    "data/raw/enchantment_training.jsonl",
+    "data/raw/entity_mob_training.jsonl",
+    "data/raw/entity_targeting_training.jsonl",
+    "data/raw/error_correction_training.jsonl",
+    "data/raw/event_trigger_training.jsonl",
+    "data/raw/execute_chain_training.jsonl",
+    "data/raw/fall_safety_training.jsonl",
+    "data/raw/gamerule_training.jsonl",
+    "data/raw/kill_radius_training.jsonl",
+    "data/raw/memory_training.jsonl",
+    "data/raw/multiplayer_training.jsonl",
+    "data/raw/multistep_training.jsonl",
+    "data/raw/paper_training.jsonl",
+    "data/raw/prod_pattern_fixes.jsonl",
+    "data/raw/quantity_training.jsonl",
+    "data/raw/recipe_training.jsonl",
+    "data/raw/redstone_training.jsonl",
+    "data/raw/revert_and_drops_training.jsonl",
+    "data/raw/revert_format_training.jsonl",
+    "data/raw/risk_hierarchy_training.jsonl",
+    "data/raw/script_tool_training.jsonl",
+    "data/raw/suffocation_training.jsonl",
+    "data/raw/worldedit_training.jsonl",
+]
+
+# ── Format converters ─────────────────────────────────────────────────────────
+
+SUDO_SYSTEM = get_prompt("sudo")
+GOD_SYSTEM = get_prompt("god")
+
+
+def _seed_to_conversations(record: dict) -> dict:
+    """Convert seed_dataset format to conversations."""
+    inp = record.get("input", {})
+    out = record.get("output", {})
+    user_msg = inp.get("user_message", "")
+    commands = out.get("commands", [])
+    reasoning = out.get("reasoning", "")
+
+    # Detect mode from prefix
+    if user_msg.lower().startswith("pray "):
+        system = GOD_SYSTEM
+        mode = "god"
+    else:
+        system = SUDO_SYSTEM
+        mode = "sudo"
+
+    # Build assistant response JSON
+    response = {"commands": commands, "reasoning": reasoning}
+    if mode == "god":
+        response["message"] = out.get("message", "")
+
+    return {
+        "conversations": [
+            {"role": "system", "content": "/no_think\n" + system},
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": json.dumps(response)},
+        ]
+    }
+
+
+def _audit_to_conversations(record: dict) -> dict:
+    """Convert audit log format to conversations."""
+    inp = record.get("input", {})
+    out = record.get("output", {})
+    mode = record.get("mode", "sudo")
+    user_msg = inp.get("user_message", "")
+    commands = out.get("commands_generated", []) or out.get("commands", [])
+    message = out.get("message", "")
+
+    system = GOD_SYSTEM if mode == "god" else SUDO_SYSTEM
+
+    response = {"commands": commands}
+    if message:
+        response["message"] = message
+
+    return {
+        "conversations": [
+            {"role": "system", "content": "/no_think\n" + system},
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": json.dumps(response)},
+        ]
+    }
+
+
+def _self_play_to_conversations(record: dict) -> dict:
+    """Convert self_play format to conversations."""
+    inp = record.get("input", {})
+    out = record.get("output", {})
+    user_msg = inp.get("user_message", "")
+    commands = out.get("commands", [])
+    reasoning = out.get("reasoning", "")
+    message = out.get("message", record.get("message", ""))
+
+    if user_msg.lower().startswith("pray "):
+        system = GOD_SYSTEM
+    else:
+        system = SUDO_SYSTEM
+
+    response = {"commands": commands, "reasoning": reasoning}
+    if message:
+        response["message"] = message
+
+    return {
+        "conversations": [
+            {"role": "system", "content": "/no_think\n" + system},
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": json.dumps(response)},
+        ]
+    }
+
+
+def _exploration_to_conversations(record: dict) -> dict:
+    """Convert exploration format to conversations."""
+    inp = record.get("input", {})
+    out = record.get("output", {})
+    user_msg = inp.get("user_message", "") if isinstance(inp, dict) else str(inp)
+    commands = out.get("commands", [])
+    reasoning = out.get("reasoning", "")
+
+    response = {"commands": commands, "reasoning": reasoning}
+
+    return {
+        "conversations": [
+            {"role": "system", "content": "/no_think\n" + SUDO_SYSTEM},
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": json.dumps(response)},
+        ]
+    }
+
+
+def _tool_messages_passthrough(record: dict) -> dict:
+    """Tool training already has messages — pass through or use qwen3_text."""
+    if "qwen3_text" in record:
+        return {"text": record["qwen3_text"]}
+    if "messages" in record:
+        return {"conversations": record["messages"]}
+    return None
+
+
+def _raw_training_to_conversations(record: dict) -> dict:
+    """Convert raw training files (same as seed format)."""
+    return _seed_to_conversations(record)
+
+
+CONVERTERS = {
+    "seed": _seed_to_conversations,
+    "tool_messages": _tool_messages_passthrough,
+    "audit": _audit_to_conversations,
+    "self_play": _self_play_to_conversations,
+    "exploration": _exploration_to_conversations,
+    "raw_training": _raw_training_to_conversations,
+}
+
+
+# ── Pipeline ──────────────────────────────────────────────────────────────────
+
+def dedup_key(record: dict) -> str:
+    """Generate a dedup key from the training content."""
+    if "text" in record:
+        content = record["text"][:500]
+    elif "conversations" in record:
+        # Use user message + first 200 chars of assistant response
+        user = ""
+        asst = ""
+        for msg in record["conversations"]:
+            if msg["role"] == "user":
+                user = msg["content"][:200]
+            elif msg["role"] == "assistant" and not asst:
+                asst = msg["content"][:200]
+        content = user + "|" + asst
+    else:
+        content = json.dumps(record)[:500]
+    return hashlib.md5(content.encode()).hexdigest()
+
+
+def load_and_convert(source_name: str, meta: dict, ratio: float) -> list:
+    """Load a source file, convert to training format, apply ratio."""
+    path = PROJECT_ROOT / meta["path"]
+    if not path.exists():
+        if meta.get("optional"):
+            return []
+        print(f"  WARNING: {path} not found, skipping {source_name}")
+        return []
+
+    converter = CONVERTERS[meta["format"]]
+    records = []
+
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                raw = json.loads(line)
+                converted = converter(raw)
+                if converted:
+                    records.append(converted)
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                continue
+
+    # Apply ratio (oversample or downsample)
+    if ratio > 1.0:
+        # Oversample: duplicate records
+        full_copies = int(ratio)
+        partial = ratio - full_copies
+        oversampled = records * full_copies
+        if partial > 0:
+            extra = random.sample(records, int(len(records) * partial))
+            oversampled.extend(extra)
+        records = oversampled
+    elif ratio < 1.0:
+        # Downsample
+        k = max(1, int(len(records) * ratio))
+        records = random.sample(records, k)
+
+    return records
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Merge datasets for Mortdecai training")
+    parser.add_argument("--output", type=Path,
+                        default=PROJECT_ROOT / "data" / "processed" / "merged_training_v06.jsonl")
+    parser.add_argument("--ratios", default="",
+                        help="Override ratios: seed=2.0,tool=1.0,iglu=0.5")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Print stats without writing output")
+    parser.add_argument("--include-chat-logs", action="store_true",
+                        help="Include chat app training exports")
+    parser.add_argument("--include-raw", action="store_true", default=True,
+                        help="Include raw training files (default: true)")
+    parser.add_argument("--seed", type=int, default=42,
+                        help="Random seed for reproducibility")
+    args = parser.parse_args()
+
+    random.seed(args.seed)
+
+    # Parse ratio overrides
+    ratio_overrides = {}
+    if args.ratios:
+        for pair in args.ratios.split(","):
+            name, val = pair.split("=")
+            ratio_overrides[name.strip()] = float(val.strip())
+
+    # Filter sources
+    active_sources = dict(SOURCES)
+    if not args.include_chat_logs:
+        active_sources.pop("chat_logs", None)
+
+    print("Mortdecai Dataset Merge Pipeline")
+    print("=" * 60)
+    print()
+
+    all_records = []
+    stats = {}
+
+    # Load named sources
+    for name, meta in active_sources.items():
+        ratio = ratio_overrides.get(name, meta["default_ratio"])
+        records = load_and_convert(name, meta, ratio)
+        raw_count = 0
+        path = PROJECT_ROOT / meta["path"]
+        if path.exists():
+            with open(path) as f:
+                raw_count = sum(1 for _ in f)
+
+        stats[name] = {"raw": raw_count, "after_ratio": len(records), "ratio": ratio}
+        all_records.extend(records)
+        print(f"  {name:<20s} {raw_count:>6} raw x{ratio:.1f} = {len(records):>7}  ({meta['description']})")
+
+    # Load raw training files
+    if args.include_raw:
+        raw_total = 0
+        for filepath in RAW_TRAINING_FILES:
+            path = PROJECT_ROOT / filepath
+            if not path.exists():
+                continue
+            converter = CONVERTERS["raw_training"]
+            count = 0
+            with open(path) as f:
+                for line in f:
+                    try:
+                        raw = json.loads(line.strip())
+                        converted = converter(raw)
+                        if converted:
+                            all_records.append(converted)
+                            count += 1
+                    except:
+                        continue
+            raw_total += count
+        stats["raw_files"] = {"raw": raw_total, "after_ratio": raw_total, "ratio": 1.0}
+        print(f"  {'raw_files':<20s} {raw_total:>6} raw x1.0 = {raw_total:>7}  ({len(RAW_TRAINING_FILES)} files)")
+
+    print()
+    print(f"  Total before dedup: {len(all_records)}")
+
+    # Deduplicate
+    seen = set()
+    deduped = []
+    for r in all_records:
+        key = dedup_key(r)
+        if key not in seen:
+            seen.add(key)
+            deduped.append(r)
+
+    dupes_removed = len(all_records) - len(deduped)
+    print(f"  Duplicates removed: {dupes_removed}")
+    print(f"  Total after dedup:  {len(deduped)}")
+
+    # Count format split
+    text_count = sum(1 for r in deduped if "text" in r)
+    conv_count = sum(1 for r in deduped if "conversations" in r)
+    print(f"  Format: {conv_count} conversations, {text_count} pre-formatted text")
+
+    # Shuffle
+    random.shuffle(deduped)
+
+    if args.dry_run:
+        print("\n  [DRY RUN] No output written.")
+        return
+
+    # Write
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    with open(args.output, "w") as f:
+        for r in deduped:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+
+    print(f"\n  Wrote {len(deduped)} examples to {args.output}")
+    print(f"  File size: {args.output.stat().st_size / 1e6:.1f} MB")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 """
-Tool-focused self-play — exercises all 14 tools on a live dev server.
+Tool-focused self-play — exercises all 17 tools on a live dev server.

 Unlike regular self-play (which tests command generation), this script
 specifically generates prompts that require tool use: script writing,
-memory operations, entity scanning, wiki lookups, and chained multi-tool
-flows. Runs on the dev server via RCON.
+memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
+and chained multi-tool flows. Runs on the dev server via RCON.

 The model responds, its tool calls get executed for real, and the full
 interaction (prompt + tool calls + results + final response) gets logged
@@ -15,7 +15,11 @@ Usage:
    python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
        --rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30

-    # Or via the scheduler preset
+    # Load extra prompts from prayer bank
+    python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
+
+    # Focus on weak categories only
+    python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
 """

 import argparse
@@ -34,10 +38,91 @@ import requests
 from agent.tools.persistent_rcon import get_rcon

 OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
+PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"

-# ── Prompt categories that exercise specific tools ─────────────────────────
+# ── Template variables for prompt expansion ────────────────────────────────

-PROMPTS = {
+TEMPLATE_VARS = {
+    "player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
+    "target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
+    "region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
+    "warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
+    "group": ["vip", "builder", "moderator", "default"],
+    "world": ["world", "world_nether", "world_the_end"],
+}
+
+
+def expand_template(prompt: str) -> str:
+    """Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
+    for key, values in TEMPLATE_VARS.items():
+        token = "{" + key + "}"
+        while token in prompt:
+            prompt = prompt.replace(token, random.choice(values), 1)
+    return prompt
+
+
+def load_prompts(prompts_dir: Path = PROMPTS_DIR,
+                 mode_filter: str = None,
+                 call_type_filter: str = None) -> dict[str, list[str]]:
+    """Load prompt templates from per-category JSONL files.
+
+    Args:
+        prompts_dir: Directory containing manifest.json and prompt JSONL files.
+        mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
+        call_type_filter: If set, only load categories matching this call type (model/gateway).
+
+    Returns:
+        Dict mapping category name -> list of prompt template strings.
+        Falls back to inline PROMPTS_FALLBACK if files don't exist.
+    """
+    manifest_path = prompts_dir / "manifest.json"
+    if not manifest_path.exists():
+        print(f"  Warning: {manifest_path} not found, using inline fallback")
+        return PROMPTS_FALLBACK
+
+    with open(manifest_path) as f:
+        manifest = json.load(f)
+
+    prompts = {}
+    for category, meta in manifest.items():
+        # Apply filters
+        if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
+            continue
+        if call_type_filter and meta.get("call_type") != call_type_filter:
+            continue
+
+        filepath = prompts_dir / meta["file"]
+        if not filepath.exists():
+            print(f"  Warning: {filepath} not found, skipping {category}")
+            continue
+        cat_prompts = []
+        with open(filepath) as f:
+            for line in f:
+                entry = json.loads(line)
+                cat_prompts.append(entry["prompt"])
+        prompts[category] = cat_prompts
+
+    print(f"  Loaded {sum(len(v) for v in prompts.values())} prompts "
+          f"from {len(prompts)} categories")
+    return prompts
+
+
+def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
+    """Load the prompt manifest with full metadata.
+
+    Used by the chat app for template selection UI.
+    Returns the raw manifest dict with mode, call_type, count per category.
+    """
+    manifest_path = prompts_dir / "manifest.json"
+    if not manifest_path.exists():
+        return {}
+    with open(manifest_path) as f:
+        return json.load(f)
+
+
+# ── Inline fallback (subset, used if prompt files missing) ─────────────────
+
+PROMPTS_FALLBACK = {
    "script_build": [
        "sudo build me a small cobblestone house with a door and windows",
        "sudo create a fighting arena with red and blue corners",
@@ -107,6 +192,37 @@ PROMPTS = {
        "sudo how does fortune work on ores?",
        "sudo what are all the copper variants?",
        "sudo how do trial spawners work?",
+        "sudo what does the breeze drop?",
+        "sudo how do you tame an armadillo?",
+        "sudo what biomes have cherry blossoms?",
+    ],
+    "plugin_docs": [
+        "sudo how do I create a WorldGuard region?",
+        "sudo what flags can I set on a region?",
+        "sudo how does CoreProtect rollback work?",
+        "sudo what's the command for LuckPerms group inheritance?",
+        "sudo how do I set up EssentialsX warps?",
+        "sudo what are the WorldEdit brush commands?",
+        "sudo how do I configure CoreProtect auto-purge?",
+        "sudo what permissions does the builder group need for WorldEdit?",
+        "sudo how do I set a WorldGuard greeting message?",
+        "sudo what's the difference between /rg flag and /rg addmember?",
+    ],
+    "changelog_lookup": [
+        "sudo what changed in 1.21?",
+        "sudo what was added in the tricky trials update?",
+        "sudo when were trial chambers added?",
+        "sudo what's new with the mace weapon?",
+        "sudo what version added the breeze mob?",
+        "sudo what got nerfed in the latest update?",
+    ],
+    "paper_docs": [
+        "sudo how do I set the view distance on Paper?",
+        "sudo what Paper config controls mob spawning rates?",
+        "sudo how do I enable async chunk loading?",
+        "sudo what's the Paper command to reload config?",
+        "sudo how do I optimize TPS on Paper?",
+        "sudo what Paper settings affect redstone performance?",
    ],
    "player_info": [
        "sudo build a wall around me",
@@ -215,9 +331,71 @@ PROMPTS = {
        "pray smite TheBigBoss for griefing",
        "pray make me a temple worthy of your glory",
    ],
+    # ── Direct command passthrough — teach faithful execution ──
+    "direct_passthrough": [
+        # WorldGuard — exact commands
+        'sudo run this exactly: rg define test-region',
+        'sudo run this exactly: rg flag test-region pvp deny',
+        'sudo run this exactly: rg flag test-region mob-spawning deny',
+        'sudo run this exactly: rg addmember test-region Ace13245',
+        'sudo run this exactly: rg removemember test-region Ace13245',
+        'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
+        'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
+        'sudo run this exactly: rg list',
+        'sudo run this exactly: rg info test-region',
+        'sudo run this exactly: rg remove test-region',
+        # CoreProtect — exact commands
+        'sudo run this exactly: co status',
+        'sudo run this exactly: co lookup u:Ace13245 t:1h',
+        'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
+        'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
+        'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
+        'sudo run this exactly: co inspect',
+        'sudo run this exactly: co lookup t:30m r:10 a:container',
+        # LuckPerms — exact commands
+        'sudo run this exactly: lp creategroup vip',
+        'sudo run this exactly: lp group vip permission set essentials.fly true',
+        'sudo run this exactly: lp group vip permission set essentials.heal true',
+        'sudo run this exactly: lp user Ace13245 parent add vip',
+        'sudo run this exactly: lp user Ace13245 parent remove vip',
+        'sudo run this exactly: lp user Ace13245 info',
+        'sudo run this exactly: lp group vip info',
+        'sudo run this exactly: lp listgroups',
+        'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
+        'sudo run this exactly: lp deletegroup vip',
+        # EssentialsX — exact commands
+        'sudo run this exactly: heal Ace13245',
+        'sudo run this exactly: feed Ace13245',
+        'sudo run this exactly: eco give Ace13245 1000',
+        'sudo run this exactly: eco take Ace13245 500',
+        'sudo run this exactly: bal Ace13245',
+        'sudo run this exactly: broadcast Welcome to the server!',
+        'sudo run this exactly: setwarp arena',
+        'sudo run this exactly: warp arena',
+        'sudo run this exactly: delwarp arena',
+        'sudo run this exactly: nick Ace13245 DragonLord',
+        # FAWE — exact commands
+        'sudo run this exactly: /worldedit version',
+    ],
+    # ── Correction examples — model should fix wrong syntax ──
+    "direct_correction": [
+        'sudo gamemode slingshooter08 creative',  # wrong arg order
+        'sudo give slingshooter08 minecraft:bed 1',  # should be white_bed
+        'sudo effect slingshooter08 night_vision',  # missing give and duration
+        'sudo weather thunderstorm',  # should be thunder
+        'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1',  # wrong enchant syntax
+        'sudo tp 100 64 100',  # missing player
+        'sudo kill zombie 50',  # wrong kill syntax
+        'sudo enchant slingshooter08 sharpness 10',  # max is 5
+        'sudo effect give slingshooter08 minecraft:haste 99999',  # duration too long
+        'sudo fill 0 0 0 100 100 100 diamond_block',  # too large, missing namespace
+        'sudo rg define',  # missing region name
+        'sudo co rollback Ace13245 1h',  # missing u: and t: prefixes
+        'sudo lp addgroup vip Ace13245',  # wrong syntax (should be lp user X parent add Y)
+    ],
 }

-PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
+PLAYERS = TEMPLATE_VARS["player"]


 def query_model(prompt, player, ollama_url, model, rcon):
@@ -225,7 +403,8 @@ def query_model(prompt, player, ollama_url, model, rcon):
    system = (
        "You are a Minecraft 1.21 command translator for a Paper server.\n"
        "Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
-        "Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
+        "Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
+        "minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
        "world.server_state, world.nearby_entities, memory.read, memory.write, "
        "script.write, script.validate, script.execute, script.read, script.list, "
        "script.delete, script.schedule.\n\n"
@@ -246,7 +425,7 @@ def query_model(prompt, player, ollama_url, model, rcon):
                {"role": "user", "content": f"Player {player}: {prompt}"},
            ],
            "stream": False, "format": "json",
-            "options": {"temperature": 0.4, "num_predict": 800},
+            "options": {"temperature": 0.85, "num_predict": 800},
        }, timeout=120)

        content = r.json()["message"]["content"]
@@ -272,9 +451,10 @@ def validate_commands(commands, rcon):
    return results


-def run_round(category, ollama_url, model, rcon, player):
+def run_round(category, ollama_url, model, rcon, player, prompts):
    """Run one self-play round for a specific tool category."""
-    prompt = random.choice(PROMPTS[category])
+    raw_prompt = random.choice(prompts[category])
+    prompt = expand_template(raw_prompt)

    print(f"  [{category:18s}] {prompt[:60]}")
    start = time.time()
@@ -336,12 +516,13 @@ def run_round(category, ollama_url, model, rcon, player):
 def main():
    parser = argparse.ArgumentParser(description="Tool-focused self-play")
    parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
-    parser.add_argument("--model", default="mortdecai:0.4.0")
-    parser.add_argument("--rcon-host", default="192.168.0.112")
+    parser.add_argument("--model", default="mortdecai:0.5.0")
+    parser.add_argument("--rcon-host", default="192.168.0.244")
    parser.add_argument("--rcon-port", type=int, default=25578)
    parser.add_argument("--rcon-pass", default="REDACTED_RCON")
    parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
    parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
+    parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
    parser.add_argument("--output", default="")
    args = parser.parse_args()

@@ -350,8 +531,23 @@ def main():

    rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)

+    # Load prompts from template files (falls back to inline)
+    prompts = load_prompts(PROMPTS_DIR)
+
+    # Load extra prompts from prompt bank (quarantine salvage, etc.)
+    if args.prompt_bank:
+        bank_path = Path(args.prompt_bank)
+        if bank_path.exists():
+            bank_prompts = []
+            with open(bank_path) as f:
+                for line in f:
+                    entry = json.loads(line)
+                    bank_prompts.append(entry["prompt"])
+            prompts["prompt_bank"] = bank_prompts
+            print(f"  Loaded {len(bank_prompts)} prompts from {bank_path}")
+
    if args.categories == "all":
-        categories = list(PROMPTS.keys())
+        categories = list(prompts.keys())
    else:
        categories = [c.strip() for c in args.categories.split(",")]

@@ -372,7 +568,7 @@ def main():

        for cat in categories:
            player = random.choice(PLAYERS)
-            example = run_round(cat, args.ollama_url, args.model, rcon, player)
+            example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)

            stats["total"] += 1
            if example is None: