0.6.0 training session: Oracle Bot, RL combat, Mind's Eye, multilingual pipeline

Major changes from this session:

Training:
- 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL
- 7,256 merged training examples (up from 3,183)
- New training data: failure modes (85), midloop messaging (27),
  prompt injection defense (29), personality (32), gold from quarantine
  bank (232), new tool examples (30), claude's own experience (10)
- All training data RCON-validated at 100% pass rate
- Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56%

Oracle Bot (Mind's Eye):
- Invisible spectator bot (mineflayer) streams world state via WebSocket
- HTML5 Canvas frontend at mind.mortdec.ai
- Real-time tool trace visualization with expandable entries
- Streaming model tokens during inference
- Gateway integration: fire-and-forget POST /trace on every tool call

Reinforcement Learning:
- Gymnasium environment wrapping mineflayer bot (minecraft_env.py)
- PPO training via Stable Baselines3 (10K param policy network)
- Behavioral cloning pretraining (97.5% accuracy on expert policy)
- Infinite training loop with auto-restart and checkpoint resume
- Bot learns combat, survival, navigation from raw experience

Bot Army:
- 8-soldier marching formation with autonomous combat
- Combat bots using mineflayer-pvp, pathfinder, armor-manager
- Multilingual prayer bots via translategemma:27b (18 languages)
- Frame-based AI architecture: LLM planner + reactive micro-scripts

Infrastructure:
- Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser)
- Billing gateway now tracks all LAN traffic (LAN auto-auth)
- Gateway fallback for empty god-mode responses
- Updated mortdec.ai landing page

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Seth
2026-03-22 20:22:50 -04:00
parent baab24f8b1
commit 5b28002001
44 changed files with 20873 additions and 4352 deletions
+262
View File
@@ -0,0 +1,262 @@
#!/usr/bin/env python3
"""
rcon_test_training.py — Test training data commands against live dev RCON.
Extracts all commands from specified training files, replaces placeholder
player names with actual online players, and tests each via RCON.
Usage:
python3 training/scripts/rcon_test_training.py
python3 training/scripts/rcon_test_training.py --files data/raw/failure_mode_training.jsonl
python3 training/scripts/rcon_test_training.py --fix # Fix bad commands in-place
"""
import argparse
import json
import re
import sys
from pathlib import Path
from mcrcon import MCRcon
ROOT = Path(__file__).resolve().parent.parent.parent
# New training files to test
DEFAULT_FILES = [
"data/raw/failure_mode_training.jsonl",
"data/raw/midloop_messaging_training.jsonl",
"data/raw/prompt_injection_defense_training.jsonl",
"data/raw/personality_training.jsonl",
"data/raw/gold_from_bank_training.jsonl",
"data/raw/new_tool_training.jsonl",
"data/processed/filtered_audit.jsonl",
]
RCON_HOST = "192.168.0.244"
RCON_PORT = 25578
RCON_PASS = "REDACTED_RCON"
# Player names used in training data that need substitution
TRAINING_PLAYERS = {
"slingshooter08", "SwiftWolf", "DarkWolf", "BraveWolf", "WildWolf",
"StoneWolf", "CraftMaster99", "EndermanSlayer", "DiamondKing",
"RedstoneWiz", "NetherWalker", "FrostByte", "PrayBot_0", "PrayBot_1",
"PrayBot_2", "xX_HackerZ_Xx", "TotallyAdmin",
}
# Commands that are safe to test (won't cause damage)
SAFE_PREFIXES = [
"give ", "effect ", "time set", "weather ", "gamemode ",
"gamerule ", "difficulty ", "tp ",
]
# Commands to NEVER run even on dev
NEVER_RUN = [
"kill @a", "kill @e[type=minecraft:player",
"ban ", "deop ", "op ", "stop", "kick ",
"fill ", "setblock ", # Might alter world
"worldborder ",
]
def get_online_players(mcr):
"""Get list of online players from dev server."""
resp = mcr.command("list")
# Parse "§6default§r: Player1, Player2..."
players = []
for part in resp.split(":"):
for name in re.findall(r'(?:§[0-9a-fk-or])*(\w+)', part):
if name and len(name) > 2 and name not in ("out", "of", "maximum", "players", "online", "There", "are", "builder", "default"):
players.append(name)
return list(set(players))
def extract_commands_from_record(rec):
"""Extract all commands from a training record."""
commands = []
if not isinstance(rec, dict) or "messages" not in rec:
return commands
for msg in rec["messages"]:
if not isinstance(msg, dict):
continue
if msg.get("role") != "assistant":
continue
content = msg.get("content", "")
# From tool_call blocks with rcon.execute
for m in re.finditer(r'"command"\s*:\s*"([^"]+)"', content):
cmd = m.group(1)
if not cmd.startswith("tellraw"): # tellraw has nested JSON
commands.append(cmd)
# From JSON response commands arrays
try:
parsed = json.loads(content)
if isinstance(parsed, dict) and "commands" in parsed:
for cmd in parsed["commands"]:
if isinstance(cmd, str):
commands.append(cmd)
except (json.JSONDecodeError, TypeError):
pass
return commands
def substitute_player(cmd, online_players):
"""Replace training player names with actual online player."""
if not online_players:
return cmd, False
target = online_players[0] # Use first online player
# Replace @p with actual player (more reliable for RCON testing)
cmd = cmd.replace("@p", target)
# Replace known training player names
for training_name in TRAINING_PLAYERS:
if training_name in cmd:
cmd = cmd.replace(training_name, target)
return cmd, True
return cmd, False
def is_safe(cmd):
"""Check if command is safe to run on dev."""
for never in NEVER_RUN:
if never in cmd:
return False
return any(cmd.startswith(p) for p in SAFE_PREFIXES)
def test_command(mcr, cmd):
"""Test a single command via RCON. Returns (success, response)."""
try:
resp = mcr.command(cmd)
# Check for error indicators
if any(err in resp.lower() for err in [
"unknown command", "incorrect argument", "expected",
"invalid", "no entity was found", "unknown or incomplete",
]):
return False, resp
return True, resp
except Exception as e:
return False, str(e)
def main():
parser = argparse.ArgumentParser(description="RCON-test training data commands")
parser.add_argument("--files", nargs="*", help="Specific files to test")
parser.add_argument("--fix", action="store_true", help="Fix bad commands in-place")
parser.add_argument("--max-per-file", type=int, default=50, help="Max commands to test per file")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
files = args.files or DEFAULT_FILES
print("Connecting to dev RCON...")
with MCRcon(RCON_HOST, RCON_PASS, port=RCON_PORT) as mcr:
online = get_online_players(mcr)
print(f"Online players: {online}")
if not online:
print("WARNING: No players online. Player-targeted commands will fail.")
total_tested = 0
total_passed = 0
total_failed = 0
total_skipped = 0
failures_by_file = {}
for filepath in files:
path = ROOT / filepath
if not path.exists():
print(f"\n SKIP (not found): {filepath}")
continue
file_commands = []
with open(path) as f:
for line_num, line in enumerate(f):
if not line.strip():
continue
try:
rec = json.loads(line)
except json.JSONDecodeError:
continue
cmds = extract_commands_from_record(rec)
for cmd in cmds:
file_commands.append((line_num, cmd))
# Deduplicate and limit
seen = set()
unique_cmds = []
for line_num, cmd in file_commands:
# Normalize for dedup
norm = re.sub(r'(?:' + '|'.join(TRAINING_PLAYERS) + r')', '@p', cmd)
if norm not in seen:
seen.add(norm)
unique_cmds.append((line_num, cmd))
test_cmds = unique_cmds[:args.max_per_file]
file_pass = 0
file_fail = 0
file_skip = 0
file_failures = []
for line_num, original_cmd in test_cmds:
cmd, was_subbed = substitute_player(original_cmd, online)
if not is_safe(cmd):
file_skip += 1
total_skipped += 1
if args.verbose:
print(f" SKIP (unsafe): {cmd[:80]}")
continue
ok, resp = test_command(mcr, cmd)
total_tested += 1
if ok:
file_pass += 1
total_passed += 1
if args.verbose:
print(f" PASS: {cmd[:60]}{resp[:40]}")
else:
file_fail += 1
total_failed += 1
file_failures.append((line_num, original_cmd, cmd, resp))
if args.verbose:
print(f" FAIL: {cmd[:60]}{resp[:60]}")
failures_by_file[filepath] = file_failures
status = "" if file_fail == 0 else ""
print(f"\n {status} {Path(filepath).name}: {file_pass} pass, {file_fail} fail, {file_skip} skip (of {len(unique_cmds)} unique commands)")
if file_failures and not args.verbose:
for ln, orig, tested, resp in file_failures[:5]:
print(f" L{ln}: {orig[:60]}")
print(f"{resp[:80]}")
if len(file_failures) > 5:
print(f" ... and {len(file_failures) - 5} more failures")
print(f"\n{'='*60}")
print(f"TOTAL: {total_tested} tested, {total_passed} passed, {total_failed} failed, {total_skipped} skipped")
if total_tested > 0:
print(f"Pass rate: {total_passed/total_tested*100:.1f}%")
# Summary of all failures
if total_failed > 0:
print(f"\nAll failures by file:")
for filepath, failures in failures_by_file.items():
if failures:
print(f"\n {Path(filepath).name} ({len(failures)} failures):")
for ln, orig, tested, resp in failures:
print(f" L{ln}: {orig[:70]}")
print(f" RCON: {resp[:80]}")
if __name__ == "__main__":
main()