0.6.0 training session: Oracle Bot, RL combat, Mind's Eye, multilingual pipeline

Major changes from this session: Training: - 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL - 7,256 merged training examples (up from 3,183) - New training data: failure modes (85), midloop messaging (27), prompt injection defense (29), personality (32), gold from quarantine bank (232), new tool examples (30), claude's own experience (10) - All training data RCON-validated at 100% pass rate - Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56% Oracle Bot (Mind's Eye): - Invisible spectator bot (mineflayer) streams world state via WebSocket - HTML5 Canvas frontend at mind.mortdec.ai - Real-time tool trace visualization with expandable entries - Streaming model tokens during inference - Gateway integration: fire-and-forget POST /trace on every tool call Reinforcement Learning: - Gymnasium environment wrapping mineflayer bot (minecraft_env.py) - PPO training via Stable Baselines3 (10K param policy network) - Behavioral cloning pretraining (97.5% accuracy on expert policy) - Infinite training loop with auto-restart and checkpoint resume - Bot learns combat, survival, navigation from raw experience Bot Army: - 8-soldier marching formation with autonomous combat - Combat bots using mineflayer-pvp, pathfinder, armor-manager - Multilingual prayer bots via translategemma:27b (18 languages) - Frame-based AI architecture: LLM planner + reactive micro-scripts Infrastructure: - Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser) - Billing gateway now tracks all LAN traffic (LAN auto-auth) - Gateway fallback for empty god-mode responses - Updated mortdec.ai landing page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 20:22:50 -04:00
parent baab24f8b1
commit 5b28002001
44 changed files with 20873 additions and 4352 deletions
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+"""
+rcon_test_training.py — Test training data commands against live dev RCON.
+
+Extracts all commands from specified training files, replaces placeholder
+player names with actual online players, and tests each via RCON.
+
+Usage:
+    python3 training/scripts/rcon_test_training.py
+    python3 training/scripts/rcon_test_training.py --files data/raw/failure_mode_training.jsonl
+    python3 training/scripts/rcon_test_training.py --fix  # Fix bad commands in-place
+"""
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+from mcrcon import MCRcon
+
+ROOT = Path(__file__).resolve().parent.parent.parent
+
+# New training files to test
+DEFAULT_FILES = [
+    "data/raw/failure_mode_training.jsonl",
+    "data/raw/midloop_messaging_training.jsonl",
+    "data/raw/prompt_injection_defense_training.jsonl",
+    "data/raw/personality_training.jsonl",
+    "data/raw/gold_from_bank_training.jsonl",
+    "data/raw/new_tool_training.jsonl",
+    "data/processed/filtered_audit.jsonl",
+]
+
+RCON_HOST = "192.168.0.244"
+RCON_PORT = 25578
+RCON_PASS = "REDACTED_RCON"
+
+# Player names used in training data that need substitution
+TRAINING_PLAYERS = {
+    "slingshooter08", "SwiftWolf", "DarkWolf", "BraveWolf", "WildWolf",
+    "StoneWolf", "CraftMaster99", "EndermanSlayer", "DiamondKing",
+    "RedstoneWiz", "NetherWalker", "FrostByte", "PrayBot_0", "PrayBot_1",
+    "PrayBot_2", "xX_HackerZ_Xx", "TotallyAdmin",
+}
+
+# Commands that are safe to test (won't cause damage)
+SAFE_PREFIXES = [
+    "give ", "effect ", "time set", "weather ", "gamemode ",
+    "gamerule ", "difficulty ", "tp ",
+]
+
+# Commands to NEVER run even on dev
+NEVER_RUN = [
+    "kill @a", "kill @e[type=minecraft:player",
+    "ban ", "deop ", "op ", "stop", "kick ",
+    "fill ", "setblock ",  # Might alter world
+    "worldborder ",
+]
+
+
+def get_online_players(mcr):
+    """Get list of online players from dev server."""
+    resp = mcr.command("list")
+    # Parse "§6default§r: Player1, Player2..."
+    players = []
+    for part in resp.split(":"):
+        for name in re.findall(r'(?:§[0-9a-fk-or])*(\w+)', part):
+            if name and len(name) > 2 and name not in ("out", "of", "maximum", "players", "online", "There", "are", "builder", "default"):
+                players.append(name)
+    return list(set(players))
+
+
+def extract_commands_from_record(rec):
+    """Extract all commands from a training record."""
+    commands = []
+    if not isinstance(rec, dict) or "messages" not in rec:
+        return commands
+
+    for msg in rec["messages"]:
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") != "assistant":
+            continue
+        content = msg.get("content", "")
+
+        # From tool_call blocks with rcon.execute
+        for m in re.finditer(r'"command"\s*:\s*"([^"]+)"', content):
+            cmd = m.group(1)
+            if not cmd.startswith("tellraw"):  # tellraw has nested JSON
+                commands.append(cmd)
+
+        # From JSON response commands arrays
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict) and "commands" in parsed:
+                for cmd in parsed["commands"]:
+                    if isinstance(cmd, str):
+                        commands.append(cmd)
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    return commands
+
+
+def substitute_player(cmd, online_players):
+    """Replace training player names with actual online player."""
+    if not online_players:
+        return cmd, False
+
+    target = online_players[0]  # Use first online player
+
+    # Replace @p with actual player (more reliable for RCON testing)
+    cmd = cmd.replace("@p", target)
+
+    # Replace known training player names
+    for training_name in TRAINING_PLAYERS:
+        if training_name in cmd:
+            cmd = cmd.replace(training_name, target)
+            return cmd, True
+
+    return cmd, False
+
+
+def is_safe(cmd):
+    """Check if command is safe to run on dev."""
+    for never in NEVER_RUN:
+        if never in cmd:
+            return False
+    return any(cmd.startswith(p) for p in SAFE_PREFIXES)
+
+
+def test_command(mcr, cmd):
+    """Test a single command via RCON. Returns (success, response)."""
+    try:
+        resp = mcr.command(cmd)
+        # Check for error indicators
+        if any(err in resp.lower() for err in [
+            "unknown command", "incorrect argument", "expected",
+            "invalid", "no entity was found", "unknown or incomplete",
+        ]):
+            return False, resp
+        return True, resp
+    except Exception as e:
+        return False, str(e)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="RCON-test training data commands")
+    parser.add_argument("--files", nargs="*", help="Specific files to test")
+    parser.add_argument("--fix", action="store_true", help="Fix bad commands in-place")
+    parser.add_argument("--max-per-file", type=int, default=50, help="Max commands to test per file")
+    parser.add_argument("--verbose", "-v", action="store_true")
+    args = parser.parse_args()
+
+    files = args.files or DEFAULT_FILES
+
+    print("Connecting to dev RCON...")
+    with MCRcon(RCON_HOST, RCON_PASS, port=RCON_PORT) as mcr:
+        online = get_online_players(mcr)
+        print(f"Online players: {online}")
+
+        if not online:
+            print("WARNING: No players online. Player-targeted commands will fail.")
+
+        total_tested = 0
+        total_passed = 0
+        total_failed = 0
+        total_skipped = 0
+        failures_by_file = {}
+
+        for filepath in files:
+            path = ROOT / filepath
+            if not path.exists():
+                print(f"\n  SKIP (not found): {filepath}")
+                continue
+
+            file_commands = []
+            with open(path) as f:
+                for line_num, line in enumerate(f):
+                    if not line.strip():
+                        continue
+                    try:
+                        rec = json.loads(line)
+                    except json.JSONDecodeError:
+                        continue
+
+                    cmds = extract_commands_from_record(rec)
+                    for cmd in cmds:
+                        file_commands.append((line_num, cmd))
+
+            # Deduplicate and limit
+            seen = set()
+            unique_cmds = []
+            for line_num, cmd in file_commands:
+                # Normalize for dedup
+                norm = re.sub(r'(?:' + '|'.join(TRAINING_PLAYERS) + r')', '@p', cmd)
+                if norm not in seen:
+                    seen.add(norm)
+                    unique_cmds.append((line_num, cmd))
+
+            test_cmds = unique_cmds[:args.max_per_file]
+
+            file_pass = 0
+            file_fail = 0
+            file_skip = 0
+            file_failures = []
+
+            for line_num, original_cmd in test_cmds:
+                cmd, was_subbed = substitute_player(original_cmd, online)
+
+                if not is_safe(cmd):
+                    file_skip += 1
+                    total_skipped += 1
+                    if args.verbose:
+                        print(f"    SKIP (unsafe): {cmd[:80]}")
+                    continue
+
+                ok, resp = test_command(mcr, cmd)
+                total_tested += 1
+
+                if ok:
+                    file_pass += 1
+                    total_passed += 1
+                    if args.verbose:
+                        print(f"    PASS: {cmd[:60]} → {resp[:40]}")
+                else:
+                    file_fail += 1
+                    total_failed += 1
+                    file_failures.append((line_num, original_cmd, cmd, resp))
+                    if args.verbose:
+                        print(f"    FAIL: {cmd[:60]} → {resp[:60]}")
+
+            failures_by_file[filepath] = file_failures
+
+            status = "✓" if file_fail == 0 else "✗"
+            print(f"\n  {status} {Path(filepath).name}: {file_pass} pass, {file_fail} fail, {file_skip} skip (of {len(unique_cmds)} unique commands)")
+
+            if file_failures and not args.verbose:
+                for ln, orig, tested, resp in file_failures[:5]:
+                    print(f"    L{ln}: {orig[:60]}")
+                    print(f"         → {resp[:80]}")
+                if len(file_failures) > 5:
+                    print(f"    ... and {len(file_failures) - 5} more failures")
+
+        print(f"\n{'='*60}")
+        print(f"TOTAL: {total_tested} tested, {total_passed} passed, {total_failed} failed, {total_skipped} skipped")
+        if total_tested > 0:
+            print(f"Pass rate: {total_passed/total_tested*100:.1f}%")
+
+        # Summary of all failures
+        if total_failed > 0:
+            print(f"\nAll failures by file:")
+            for filepath, failures in failures_by_file.items():
+                if failures:
+                    print(f"\n  {Path(filepath).name} ({len(failures)} failures):")
+                    for ln, orig, tested, resp in failures:
+                        print(f"    L{ln}: {orig[:70]}")
+                        print(f"      RCON: {resp[:80]}")
+
+
+if __name__ == "__main__":
+    main()