Files
Mortdecai 9c2c9a2310 1200+ distilled gold examples, journal system, redstone mastery, safety awareness
Distilled Training Data (1,203 examples):
- 341 initial gold (plugins, enchantments, builds, effects, god, errors)
- 165 buildings + pipeline (100 structures built on dev, 65 request→query→act)
- 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks)
- 17 advanced logic (decanonized items, redstone gates, iterative builds)
- 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock)
- 7 circuit verification and diagnosis
- 1 compact comparator gates
- 10 redstone methodology (build→test→save→recall→learn from mistakes)
- 8 player journal usage
- 29 creative+uncommon+pipeline+god with full tool chains

Player Journal System:
- agent/tools/player_journal.py — per-player text files (1-10 lines)
- journal.read + journal.write tool schemas added
- Cross-contaminated: God and Sudo share same journal per player
- Includes sentiment, relationship, builds, preferences, skill level

Redstone Engineering:
- agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules
- Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top
- T-junction bypass prevention: dedicated lead wire between merge and NOT block
- RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate)

Training Data Cleaning:
- 466 @s→@p fixes, 10 template commands removed
- 12 outdated refusals replaced with correct plugin commands
- Data de-duped across all sources

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 20:50:52 -04:00

611 lines
25 KiB
Python

#!/usr/bin/env python3
"""
Tool-focused self-play — exercises all 17 tools on a live dev server.
Unlike regular self-play (which tests command generation), this script
specifically generates prompts that require tool use: script writing,
memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
and chained multi-tool flows. Runs on the dev server via RCON.
The model responds, its tool calls get executed for real, and the full
interaction (prompt + tool calls + results + final response) gets logged
as training data.
Usage:
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
# Load extra prompts from prayer bank
python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
# Focus on weak categories only
python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
"""
import argparse
import json
import os
import random
import re
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
import requests
from agent.tools.persistent_rcon import get_rcon
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"
# ── Template variables for prompt expansion ────────────────────────────────
TEMPLATE_VARS = {
"player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
"target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
"region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
"warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
"group": ["vip", "builder", "moderator", "default"],
"world": ["world", "world_nether", "world_the_end"],
}
def expand_template(prompt: str) -> str:
"""Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
for key, values in TEMPLATE_VARS.items():
token = "{" + key + "}"
while token in prompt:
prompt = prompt.replace(token, random.choice(values), 1)
return prompt
def load_prompts(prompts_dir: Path = PROMPTS_DIR,
mode_filter: str = None,
call_type_filter: str = None) -> dict[str, list[str]]:
"""Load prompt templates from per-category JSONL files.
Args:
prompts_dir: Directory containing manifest.json and prompt JSONL files.
mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
call_type_filter: If set, only load categories matching this call type (model/gateway).
Returns:
Dict mapping category name -> list of prompt template strings.
Falls back to inline PROMPTS_FALLBACK if files don't exist.
"""
manifest_path = prompts_dir / "manifest.json"
if not manifest_path.exists():
print(f" Warning: {manifest_path} not found, using inline fallback")
return PROMPTS_FALLBACK
with open(manifest_path) as f:
manifest = json.load(f)
prompts = {}
for category, meta in manifest.items():
# Apply filters
if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
continue
if call_type_filter and meta.get("call_type") != call_type_filter:
continue
filepath = prompts_dir / meta["file"]
if not filepath.exists():
print(f" Warning: {filepath} not found, skipping {category}")
continue
cat_prompts = []
with open(filepath) as f:
for line in f:
entry = json.loads(line)
cat_prompts.append(entry["prompt"])
prompts[category] = cat_prompts
print(f" Loaded {sum(len(v) for v in prompts.values())} prompts "
f"from {len(prompts)} categories")
return prompts
def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
"""Load the prompt manifest with full metadata.
Used by the chat app for template selection UI.
Returns the raw manifest dict with mode, call_type, count per category.
"""
manifest_path = prompts_dir / "manifest.json"
if not manifest_path.exists():
return {}
with open(manifest_path) as f:
return json.load(f)
# ── Inline fallback (subset, used if prompt files missing) ─────────────────
PROMPTS_FALLBACK = {
"script_build": [
"sudo build me a small cobblestone house with a door and windows",
"sudo create a fighting arena with red and blue corners",
"sudo make a nether portal room with soul lanterns",
"sudo build a watchtower 15 blocks tall with a ladder",
"sudo create a 9x9 wheat farm with water in the center",
"sudo build an enchanting setup with bookshelves",
"sudo make a mob grinder platform with water channels",
"sudo create a trophy room with item frames",
"sudo build a bridge 30 blocks long over this ravine",
"sudo make a lighthouse with a glowstone top",
"sudo create a hedge maze using oak leaves",
"sudo build a dock with oak wood extending into the water",
"sudo make an underground bunker with iron doors",
"sudo create a garden with flowers and paths",
"sudo build a market stall with a counter and signs",
],
"script_schedule": [
"sudo make cherry leaf particles fall around spawn forever",
"sudo set up a scoreboard that tracks deaths on server load",
"sudo make ambient campfire smoke particles at spawn every tick",
"sudo create a function that heals everyone every 5 minutes",
"sudo make it always rain XP orbs at spawn",
],
"script_manage": [
"sudo show me all scripts",
"sudo what scripts are running on tick?",
"sudo delete the test script",
"sudo read me the arena script",
"sudo what did I build last?",
],
"memory_write": [
"sudo remember this as my home",
"sudo save this location as my base",
"sudo remember my nether portal is here",
"sudo my favorite item is a diamond pickaxe, remember that",
"sudo save this as my farm",
"sudo remember that Ace13245 is my friend",
"sudo mark this spot as the village center",
"sudo save this as my fishing spot",
],
"memory_read": [
"sudo tp me home",
"sudo take me to my base",
"sudo where's my nether portal?",
"sudo what do you know about me?",
"sudo tp me to my farm",
"sudo where was I building?",
"sudo do I have any saved locations?",
],
"nearby_entities": [
"sudo what mobs are near me?",
"sudo kill all the zombies around me",
"sudo how many animals are nearby?",
"sudo clear hostile mobs in a 50 block radius",
"sudo are there any creepers close to me?",
"sudo kill the nearest skeleton",
"sudo count everything within 30 blocks",
"sudo protect me from nearby hostiles",
],
"wiki_lookup": [
"sudo what enchantments can go on a mace?",
"sudo how do I craft a lodestone?",
"sudo what food gives the best saturation?",
"sudo what are the new 1.21 armor trim patterns?",
"sudo what's the difference between smite and sharpness?",
"sudo how does fortune work on ores?",
"sudo what are all the copper variants?",
"sudo how do trial spawners work?",
"sudo what does the breeze drop?",
"sudo how do you tame an armadillo?",
"sudo what biomes have cherry blossoms?",
],
"plugin_docs": [
"sudo how do I create a WorldGuard region?",
"sudo what flags can I set on a region?",
"sudo how does CoreProtect rollback work?",
"sudo what's the command for LuckPerms group inheritance?",
"sudo how do I set up EssentialsX warps?",
"sudo what are the WorldEdit brush commands?",
"sudo how do I configure CoreProtect auto-purge?",
"sudo what permissions does the builder group need for WorldEdit?",
"sudo how do I set a WorldGuard greeting message?",
"sudo what's the difference between /rg flag and /rg addmember?",
],
"changelog_lookup": [
"sudo what changed in 1.21?",
"sudo what was added in the tricky trials update?",
"sudo when were trial chambers added?",
"sudo what's new with the mace weapon?",
"sudo what version added the breeze mob?",
"sudo what got nerfed in the latest update?",
],
"paper_docs": [
"sudo how do I set the view distance on Paper?",
"sudo what Paper config controls mob spawning rates?",
"sudo how do I enable async chunk loading?",
"sudo what's the Paper command to reload config?",
"sudo how do I optimize TPS on Paper?",
"sudo what Paper settings affect redstone performance?",
],
"player_info": [
"sudo build a wall around me",
"sudo teleport me 50 blocks up",
"sudo place torches around me",
"sudo create a beacon at my location",
"sudo surround me with glass",
"sudo set my spawn here",
"sudo light up this cave around me",
],
"server_state": [
"sudo if it's night, make it day",
"sudo give everyone online a golden apple",
"sudo how many people are playing right now?",
"sudo announce the current time and weather",
"sudo clear the weather if it's raining",
],
"chained": [
"sudo save this location as home, then build a marker here",
"sudo check what's near me and kill all hostiles, then give me resistance",
"sudo look up the best sword enchantments and give me one",
"sudo tp me home and heal me",
"sudo build an arena and save it as a script I can rerun",
"sudo check my health, if low heal me and give me food",
"sudo what scripts do I have? run the arena one at my position",
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
],
# ── Plugin categories ──
"worldguard": [
"sudo protect this area as my base",
"sudo make a no-pvp zone around spawn",
"sudo prevent mob spawning in the village",
"sudo add Ace13245 as a member of my region",
"sudo block entry for non-members in the vault",
"sudo allow TNT in the arena region",
"sudo set a greeting message for my base region",
"sudo list all protected regions",
"sudo prevent creeper explosions globally",
"sudo create a healing zone at spawn",
"sudo remove the old-test region",
"sudo make a safe zone with no fire spread",
],
"coreprotect": [
"sudo check who broke blocks near me",
"sudo rollback griefing from the last hour",
"sudo rollback what TheBigBoss did recently",
"sudo who placed blocks around here today?",
"sudo undo TNT damage from the last 2 hours",
"sudo rollback all container theft recently",
"sudo restore what was rolled back",
"sudo check CoreProtect status",
"sudo rollback fire damage near spawn",
"sudo lookup what Ace13245 did in the last day",
],
"essentialsx": [
"sudo set my home here",
"sudo tp me to my home",
"sudo create a warp called arena",
"sudo tp me to the arena warp",
"sudo give Ace 1000 coins",
"sudo check my balance",
"sudo heal me",
"sudo feed me",
"sudo repair what I'm holding",
"sudo set my nickname to DragonLord",
"sudo give me god mode",
"sudo toggle fly for me",
"sudo broadcast a server message",
"sudo set spawn point here",
"sudo check when Ace was last online",
],
"luckperms": [
"sudo give me permission to fly",
"sudo create a VIP group",
"sudo add Ace to the VIP group",
"sudo give VIP access to fly and heal",
"sudo give me temporary VIP for 1 day",
"sudo set VIP chat prefix to gold",
"sudo create a builder group with WorldEdit",
"sudo list all permission groups",
"sudo check what permissions I have",
"sudo remove TheBigBoss from VIP",
],
"fawe": [
"sudo make a glass sphere 10 blocks wide",
"sudo hollow sphere of stone",
"sudo cylinder of quartz 5 wide 10 tall",
"sudo replace all stone with deepslate in my selection",
"sudo smooth the terrain around here",
"sudo drain all water within 20 blocks",
"sudo build a sandstone pyramid 10 tall",
"sudo hollow out the selected area",
"sudo make walls around my selection with stone bricks",
"sudo fill with a checkerboard pattern",
"sudo stack my selection 5 times north",
"sudo undo my last WorldEdit action",
],
"plugin_combined": [
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
"sudo rollback Ace's griefing and revoke his builder perms",
"sudo set up a VIP lounge — build it, protect it, make a warp",
"sudo give TheBigBoss a reward: money, items, and temp VIP",
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
"pray someone destroyed my house, please restore it",
"pray protect my village from monsters",
"pray smite TheBigBoss for griefing",
"pray make me a temple worthy of your glory",
],
# ── Direct command passthrough — teach faithful execution ──
"direct_passthrough": [
# WorldGuard — exact commands
'sudo run this exactly: rg define test-region',
'sudo run this exactly: rg flag test-region pvp deny',
'sudo run this exactly: rg flag test-region mob-spawning deny',
'sudo run this exactly: rg addmember test-region Ace13245',
'sudo run this exactly: rg removemember test-region Ace13245',
'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
'sudo run this exactly: rg list',
'sudo run this exactly: rg info test-region',
'sudo run this exactly: rg remove test-region',
# CoreProtect — exact commands
'sudo run this exactly: co status',
'sudo run this exactly: co lookup u:Ace13245 t:1h',
'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
'sudo run this exactly: co inspect',
'sudo run this exactly: co lookup t:30m r:10 a:container',
# LuckPerms — exact commands
'sudo run this exactly: lp creategroup vip',
'sudo run this exactly: lp group vip permission set essentials.fly true',
'sudo run this exactly: lp group vip permission set essentials.heal true',
'sudo run this exactly: lp user Ace13245 parent add vip',
'sudo run this exactly: lp user Ace13245 parent remove vip',
'sudo run this exactly: lp user Ace13245 info',
'sudo run this exactly: lp group vip info',
'sudo run this exactly: lp listgroups',
'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
'sudo run this exactly: lp deletegroup vip',
# EssentialsX — exact commands
'sudo run this exactly: heal Ace13245',
'sudo run this exactly: feed Ace13245',
'sudo run this exactly: eco give Ace13245 1000',
'sudo run this exactly: eco take Ace13245 500',
'sudo run this exactly: bal Ace13245',
'sudo run this exactly: broadcast Welcome to the server!',
'sudo run this exactly: setwarp arena',
'sudo run this exactly: warp arena',
'sudo run this exactly: delwarp arena',
'sudo run this exactly: nick Ace13245 DragonLord',
# FAWE — exact commands
'sudo run this exactly: /worldedit version',
],
# ── Correction examples — model should fix wrong syntax ──
"direct_correction": [
'sudo gamemode slingshooter08 creative', # wrong arg order
'sudo give slingshooter08 minecraft:bed 1', # should be white_bed
'sudo effect slingshooter08 night_vision', # missing give and duration
'sudo weather thunderstorm', # should be thunder
'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1', # wrong enchant syntax
'sudo tp 100 64 100', # missing player
'sudo kill zombie 50', # wrong kill syntax
'sudo enchant slingshooter08 sharpness 10', # max is 5
'sudo effect give slingshooter08 minecraft:haste 99999', # duration too long
'sudo fill 0 0 0 100 100 100 diamond_block', # too large, missing namespace
'sudo rg define', # missing region name
'sudo co rollback Ace13245 1h', # missing u: and t: prefixes
'sudo lp addgroup vip Ace13245', # wrong syntax (should be lp user X parent add Y)
],
}
PLAYERS = TEMPLATE_VARS["player"]
def query_model(prompt, player, ollama_url, model, rcon):
"""Send a prompt to the model and capture the full interaction."""
system = (
"You are a Minecraft 1.21 command translator for a Paper server.\n"
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
"Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
"minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
"world.server_state, world.nearby_entities, memory.read, memory.write, "
"script.write, script.validate, script.execute, script.read, script.list, "
"script.delete, script.schedule.\n\n"
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
"/lp user/group (LuckPerms).\n\n"
"For complex builds (4+ commands), write a mcfunction script. "
"For simple tasks, use rcon.execute directly.\n\n"
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
"Use /no_think mode."
)
try:
r = requests.post(f"{ollama_url}/api/chat", json={
"model": model,
"messages": [
{"role": "system", "content": "/no_think\n" + system},
{"role": "user", "content": f"Player {player}: {prompt}"},
],
"stream": False, "format": "json",
"options": {"temperature": 0.85, "num_predict": 800},
}, timeout=120)
content = r.json()["message"]["content"]
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
parsed = json.loads(content)
return parsed
except Exception as e:
return {"error": str(e), "raw": content if 'content' in dir() else ""}
def validate_commands(commands, rcon):
"""Run commands through RCON and capture results."""
results = []
for cmd in commands[:12]:
if not isinstance(cmd, str) or not cmd.strip():
continue
try:
result = rcon.command(cmd)
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
except Exception as e:
results.append({"cmd": cmd, "result": str(e), "ok": False})
return results
def run_round(category, ollama_url, model, rcon, player, prompts):
"""Run one self-play round for a specific tool category."""
raw_prompt = random.choice(prompts[category])
prompt = expand_template(raw_prompt)
print(f" [{category:18s}] {prompt[:60]}")
start = time.time()
response = query_model(prompt, player, ollama_url, model, rcon)
elapsed = time.time() - start
if "error" in response:
print(f" ERROR: {response['error'][:80]}")
return None
commands = response.get("commands", [])
message = response.get("message", "")
reasoning = response.get("reasoning", "")
# Validate commands via RCON
rcon_results = []
if commands and all(isinstance(c, str) for c in commands):
rcon_results = validate_commands(commands, rcon)
success = all(r["ok"] for r in rcon_results)
else:
success = False
ok_count = sum(1 for r in rcon_results if r["ok"])
fail_count = sum(1 for r in rcon_results if not r["ok"])
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
print(f"{len(commands)} cmds, {status}, {elapsed:.1f}s")
# Build training example
example = {
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
"source": "tool_self_play",
"category": category,
"input": {
"user_message": prompt,
"server_context": {
"server_type": "paper",
"version": "1.21.x",
"online_players": [player],
},
},
"output": {
"commands": commands,
"message": message,
"reasoning": reasoning,
},
"metadata": {
"rcon_results": rcon_results,
"all_success": success,
"elapsed_seconds": round(elapsed, 2),
"model": model,
"tool_category": category,
},
}
return example
def main():
parser = argparse.ArgumentParser(description="Tool-focused self-play")
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
parser.add_argument("--model", default="mortdecai:0.5.0")
parser.add_argument("--rcon-host", default="192.168.0.244")
parser.add_argument("--rcon-port", type=int, default=25578)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
parser.add_argument("--output", default="")
args = parser.parse_args()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
# Load prompts from template files (falls back to inline)
prompts = load_prompts(PROMPTS_DIR)
# Load extra prompts from prompt bank (quarantine salvage, etc.)
if args.prompt_bank:
bank_path = Path(args.prompt_bank)
if bank_path.exists():
bank_prompts = []
with open(bank_path) as f:
for line in f:
entry = json.loads(line)
bank_prompts.append(entry["prompt"])
prompts["prompt_bank"] = bank_prompts
print(f" Loaded {len(bank_prompts)} prompts from {bank_path}")
if args.categories == "all":
categories = list(prompts.keys())
else:
categories = [c.strip() for c in args.categories.split(",")]
print(f"Tool Self-Play")
print(f" Model: {args.model} on {args.ollama_url}")
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
print(f" Categories: {', '.join(categories)}")
print(f" Rounds per category: {args.rounds}")
print(f" Output: {output_path}")
print()
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
examples = []
for round_num in range(args.rounds):
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
random.shuffle(categories)
for cat in categories:
player = random.choice(PLAYERS)
example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)
stats["total"] += 1
if example is None:
stats["error"] += 1
continue
if example["metadata"]["all_success"]:
stats["success"] += 1
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
stats["partial"] += 1
else:
stats["fail"] += 1
examples.append(example)
# Write incrementally
with open(output_path, "a") as f:
f.write(json.dumps(example, ensure_ascii=False) + "\n")
time.sleep(0.3)
# Progress report
if (round_num + 1) % 5 == 0:
rate = stats["success"] / max(stats["total"], 1) * 100
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
print(f"\n{'='*60}")
print(f"Tool Self-Play Complete")
print(f" Total: {stats['total']}")
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
print(f" Partial: {stats['partial']}")
print(f" Fail: {stats['fail']}")
print(f" Error: {stats['error']}")
print(f" Output: {output_path} ({len(examples)} examples)")
if __name__ == "__main__":
main()