#!/usr/bin/env python3 """ rcon_test_training.py — Test training data commands against live dev RCON. Extracts all commands from specified training files, replaces placeholder player names with actual online players, and tests each via RCON. Usage: python3 training/scripts/rcon_test_training.py python3 training/scripts/rcon_test_training.py --files data/raw/failure_mode_training.jsonl python3 training/scripts/rcon_test_training.py --fix # Fix bad commands in-place """ import argparse import json import re import sys from pathlib import Path from mcrcon import MCRcon ROOT = Path(__file__).resolve().parent.parent.parent # New training files to test DEFAULT_FILES = [ "data/raw/failure_mode_training.jsonl", "data/raw/midloop_messaging_training.jsonl", "data/raw/prompt_injection_defense_training.jsonl", "data/raw/personality_training.jsonl", "data/raw/gold_from_bank_training.jsonl", "data/raw/new_tool_training.jsonl", "data/processed/filtered_audit.jsonl", ] RCON_HOST = "192.168.0.244" RCON_PORT = 25578 RCON_PASS = "REDACTED_RCON" # Player names used in training data that need substitution TRAINING_PLAYERS = { "slingshooter08", "SwiftWolf", "DarkWolf", "BraveWolf", "WildWolf", "StoneWolf", "CraftMaster99", "EndermanSlayer", "DiamondKing", "RedstoneWiz", "NetherWalker", "FrostByte", "PrayBot_0", "PrayBot_1", "PrayBot_2", "xX_HackerZ_Xx", "TotallyAdmin", } # Commands that are safe to test (won't cause damage) SAFE_PREFIXES = [ "give ", "effect ", "time set", "weather ", "gamemode ", "gamerule ", "difficulty ", "tp ", ] # Commands to NEVER run even on dev NEVER_RUN = [ "kill @a", "kill @e[type=minecraft:player", "ban ", "deop ", "op ", "stop", "kick ", "fill ", "setblock ", # Might alter world "worldborder ", ] def get_online_players(mcr): """Get list of online players from dev server.""" resp = mcr.command("list") # Parse "§6default§r: Player1, Player2..." players = [] for part in resp.split(":"): for name in re.findall(r'(?:§[0-9a-fk-or])*(\w+)', part): if name and len(name) > 2 and name not in ("out", "of", "maximum", "players", "online", "There", "are", "builder", "default"): players.append(name) return list(set(players)) def extract_commands_from_record(rec): """Extract all commands from a training record.""" commands = [] if not isinstance(rec, dict) or "messages" not in rec: return commands for msg in rec["messages"]: if not isinstance(msg, dict): continue if msg.get("role") != "assistant": continue content = msg.get("content", "") # From tool_call blocks with rcon.execute for m in re.finditer(r'"command"\s*:\s*"([^"]+)"', content): cmd = m.group(1) if not cmd.startswith("tellraw"): # tellraw has nested JSON commands.append(cmd) # From JSON response commands arrays try: parsed = json.loads(content) if isinstance(parsed, dict) and "commands" in parsed: for cmd in parsed["commands"]: if isinstance(cmd, str): commands.append(cmd) except (json.JSONDecodeError, TypeError): pass return commands def substitute_player(cmd, online_players): """Replace training player names with actual online player.""" if not online_players: return cmd, False target = online_players[0] # Use first online player # Replace @p with actual player (more reliable for RCON testing) cmd = cmd.replace("@p", target) # Replace known training player names for training_name in TRAINING_PLAYERS: if training_name in cmd: cmd = cmd.replace(training_name, target) return cmd, True return cmd, False def is_safe(cmd): """Check if command is safe to run on dev.""" for never in NEVER_RUN: if never in cmd: return False return any(cmd.startswith(p) for p in SAFE_PREFIXES) def test_command(mcr, cmd): """Test a single command via RCON. Returns (success, response).""" try: resp = mcr.command(cmd) # Check for error indicators if any(err in resp.lower() for err in [ "unknown command", "incorrect argument", "expected", "invalid", "no entity was found", "unknown or incomplete", ]): return False, resp return True, resp except Exception as e: return False, str(e) def main(): parser = argparse.ArgumentParser(description="RCON-test training data commands") parser.add_argument("--files", nargs="*", help="Specific files to test") parser.add_argument("--fix", action="store_true", help="Fix bad commands in-place") parser.add_argument("--max-per-file", type=int, default=50, help="Max commands to test per file") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() files = args.files or DEFAULT_FILES print("Connecting to dev RCON...") with MCRcon(RCON_HOST, RCON_PASS, port=RCON_PORT) as mcr: online = get_online_players(mcr) print(f"Online players: {online}") if not online: print("WARNING: No players online. Player-targeted commands will fail.") total_tested = 0 total_passed = 0 total_failed = 0 total_skipped = 0 failures_by_file = {} for filepath in files: path = ROOT / filepath if not path.exists(): print(f"\n SKIP (not found): {filepath}") continue file_commands = [] with open(path) as f: for line_num, line in enumerate(f): if not line.strip(): continue try: rec = json.loads(line) except json.JSONDecodeError: continue cmds = extract_commands_from_record(rec) for cmd in cmds: file_commands.append((line_num, cmd)) # Deduplicate and limit seen = set() unique_cmds = [] for line_num, cmd in file_commands: # Normalize for dedup norm = re.sub(r'(?:' + '|'.join(TRAINING_PLAYERS) + r')', '@p', cmd) if norm not in seen: seen.add(norm) unique_cmds.append((line_num, cmd)) test_cmds = unique_cmds[:args.max_per_file] file_pass = 0 file_fail = 0 file_skip = 0 file_failures = [] for line_num, original_cmd in test_cmds: cmd, was_subbed = substitute_player(original_cmd, online) if not is_safe(cmd): file_skip += 1 total_skipped += 1 if args.verbose: print(f" SKIP (unsafe): {cmd[:80]}") continue ok, resp = test_command(mcr, cmd) total_tested += 1 if ok: file_pass += 1 total_passed += 1 if args.verbose: print(f" PASS: {cmd[:60]} → {resp[:40]}") else: file_fail += 1 total_failed += 1 file_failures.append((line_num, original_cmd, cmd, resp)) if args.verbose: print(f" FAIL: {cmd[:60]} → {resp[:60]}") failures_by_file[filepath] = file_failures status = "✓" if file_fail == 0 else "✗" print(f"\n {status} {Path(filepath).name}: {file_pass} pass, {file_fail} fail, {file_skip} skip (of {len(unique_cmds)} unique commands)") if file_failures and not args.verbose: for ln, orig, tested, resp in file_failures[:5]: print(f" L{ln}: {orig[:60]}") print(f" → {resp[:80]}") if len(file_failures) > 5: print(f" ... and {len(file_failures) - 5} more failures") print(f"\n{'='*60}") print(f"TOTAL: {total_tested} tested, {total_passed} passed, {total_failed} failed, {total_skipped} skipped") if total_tested > 0: print(f"Pass rate: {total_passed/total_tested*100:.1f}%") # Summary of all failures if total_failed > 0: print(f"\nAll failures by file:") for filepath, failures in failures_by_file.items(): if failures: print(f"\n {Path(filepath).name} ({len(failures)} failures):") for ln, orig, tested, resp in failures: print(f" L{ln}: {orig[:70]}") print(f" RCON: {resp[:80]}") if __name__ == "__main__": main()