Files
Mortdecai/training/scripts/regenerate_tool_data_v05.py
T
Mortdecai f5118505b1 0.5.0 bake-off results, knowledge lookup tools, training progress chart
Bake-off (0.5.0 vs 0.4.0):
- Overall: 46.8% vs 45.2% (+1.6%), 0 errors vs 2
- Enchantments: +47% (20% → 67%)
- EssentialsX: +60% (0% → 60%)
- Effects: +25% (0% → 25%)
- Regressions: fill_build -67%, world -20%

Knowledge Lookup Tools (4 new):
- plugin.docs_lookup: WorldGuard, WorldEdit, CoreProtect, EssentialsX, LuckPerms docs
- minecraft.changelog_lookup: version history from Minecraft Wiki
- paper.docs_lookup: Paper server-specific documentation
- Wired into gateway model-driven tool loop and exploration self-play

Exploration Self-Play:
- General (vanilla MC) and plugins focus modes
- Wiki-grounded: model researches before acting, validates through RCON
- 2,243 exploration examples generated, 150 kept after quality filtering

Training Progress Chart:
- SVG chart showing training examples and inverse loss across versions
- Added to MODEL_CARD.md for Gitea display

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 15:28:09 -04:00

371 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Regenerate tool-calling training data using mortdecai:0.5.0.
Uses the model-driven tool loop: sends prompts to 0.5.0, lets it decide
which tools to call, executes via RCON, and captures the full multi-turn
conversation as training data. Only keeps examples where all commands succeed.
This produces "distilled" data — the model's best outputs, validated by RCON.
"""
import json
import random
import re
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
import requests
from agent.tools.persistent_rcon import get_rcon
from agent.tools.tool_schemas import qwen3_tools_block
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training_v05.jsonl"
TOOLS_BLOCK = qwen3_tools_block()
SYSTEM = (
"/no_think\n"
"You are a Minecraft 1.21 command translator for a Paper server.\n"
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n\n"
"You have tools. To call one:\n"
"<tool_call>\n{\"name\": \"tool_name\", \"arguments\": {...}}\n</tool_call>\n\n"
"Available: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
"minecraft.changelog_lookup, world.player_info, world.server_state, "
"world.nearby_entities, memory.read, memory.write, "
"script.write, script.validate, script.execute, script.read, script.list, "
"script.delete, script.schedule.\n\n"
"After tool calls, respond with JSON:\n"
"{\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
"PERMISSION LEVEL: 4 (generous).\n" + SYNTAX_RULES + RISK_GRADIENT
)
SYSTEM_GOD = (
"/no_think\n"
"You are God in a Minecraft server with full tool access.\n"
"Return JSON: {\"risk_level\": <0-5>, \"message\": \"...\", \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
# Comprehensive prompt set — every category we need good data for
PROMPTS = {
"basic_commands": [
"sudo give me a diamond sword",
"sudo give me 64 golden apples",
"sudo give me a stack of oak planks",
"sudo give me an elytra",
"sudo give me a spyglass",
"sudo give me a recovery compass",
"sudo give me a bundle",
"sudo set time to noon",
"sudo set time to midnight",
"sudo clear weather for a week",
"sudo make it thunder",
"sudo kill all hostile mobs",
"sudo kill all items on the ground",
"sudo gamemode creative",
"sudo gamemode survival",
"sudo gamemode spectator",
],
"enchanted_gear": [
"sudo give me a diamond sword with sharpness 5, unbreaking 3, mending, and looting 3",
"sudo give me a netherite pickaxe with efficiency 5, fortune 3, unbreaking 3, mending",
"sudo give me a bow with power 5, infinity, flame, punch 2",
"sudo full netherite armor with protection 4, unbreaking 3, mending on every piece",
"sudo give me boots with feather falling 4, depth strider 3, soul speed 3",
"sudo give me a trident with loyalty 3 and channeling",
"sudo give me a trident with riptide 3",
"sudo give me a crossbow with multishot and quick charge 3",
"sudo give me a mace with density 5 and wind burst 3",
"sudo best fishing rod possible",
"sudo give me a shield with unbreaking 3 and mending",
],
"effects": [
"sudo give me speed 3 for 10 minutes",
"sudo night vision permanently",
"sudo make me invisible for 5 minutes",
"sudo give me fire resistance for an hour",
"sudo give everyone online regeneration 2",
"sudo give me haste 2 for 10 minutes",
"sudo slow falling for 60 seconds",
"sudo give me water breathing forever",
"sudo give me strength 2 and resistance 2 for 5 minutes",
"sudo clear all my effects",
],
"teleport_position": [
"sudo tp me to 0 100 0",
"sudo tp me to the nether",
"sudo tp everyone to spawn",
"sudo teleport me 100 blocks north",
"sudo tp me up 50 blocks",
"sudo set my spawn point here",
],
"building": [
"sudo fill a 10x10 platform of stone under me",
"sudo place a beacon at my location",
"sudo build a small cobblestone room around me",
"sudo fill the area below me with water",
"sudo make a glass dome over me",
"sudo place 4 lanterns around me",
"sudo clear a 20 block area above me",
],
"entities": [
"sudo summon a horse with a saddle",
"sudo summon 5 cows near me",
"sudo summon a villager",
"sudo spawn an iron golem",
"sudo summon a warden 20 blocks away",
"sudo summon a wither",
"sudo kill all zombies within 50 blocks",
"sudo kill all creepers near me",
],
"worldguard": [
"sudo create a region called my-base and set pvp deny",
"sudo prevent mob spawning in the spawn region",
"sudo set a greeting message for spawn: Welcome to the server!",
"sudo deny entry to non-members in the vault region",
"sudo list all regions",
"sudo allow TNT in the arena",
"sudo prevent fire spread globally",
"sudo make a healing zone at spawn",
],
"coreprotect": [
"sudo enable block inspector",
"sudo rollback the last hour of changes",
"sudo rollback what TheBigBoss did in the last 30 minutes",
"sudo lookup who placed blocks near me today",
"sudo rollback TNT damage from the last 2 hours",
"sudo check coreprotect status",
"sudo restore what was rolled back",
],
"essentialsx": [
"sudo set my home here",
"sudo create a warp called arena",
"sudo give Ace13245 1000 coins",
"sudo check my balance",
"sudo heal me",
"sudo feed me",
"sudo repair my held item",
"sudo set my nickname to DragonLord",
"sudo broadcast Welcome to the server!",
"sudo god mode on",
"sudo fly mode on",
],
"luckperms": [
"sudo create a VIP group",
"sudo add Ace13245 to VIP",
"sudo give VIP permission to fly",
"sudo give me temporary VIP for 24 hours",
"sudo set VIP prefix to gold [VIP]",
"sudo list all permission groups",
"sudo create a builder group with worldedit access",
],
"fawe": [
"sudo make a glass sphere radius 8",
"sudo hollow stone sphere radius 10",
"sudo cylinder of quartz 5 wide 12 tall",
"sudo replace all stone with deepslate in selection",
"sudo smooth the terrain 5 iterations",
"sudo drain water within 20 blocks",
"sudo sandstone pyramid 8 tall",
"sudo undo my last worldedit operation",
],
"god_prayers": [
"pray oh great one, bless me with diamonds",
"pray lord, protect me from the monsters of the night",
"pray I offer this sacrifice of 64 wheat, grant me your favor",
"pray god please make it stop raining",
"pray smite the wicked TheBigBoss for griefing my base",
"pray heal me, I am near death",
"pray give me the strength to slay the ender dragon",
"pray I am lost in a cave, guide me to the surface",
],
"error_prone": [
"sudo give me a bed",
"sudo give me steak",
"sudo give me cooked beef",
"sudo effect give me speed",
"sudo give me a log",
"sudo fill with stone 10",
"sudo tp me to spawn",
"sudo give @s diamond 1",
],
"complex_multi": [
"sudo gear me up for the nether: armor, weapons, food, fire resistance",
"sudo prepare me for the end fight: bow, arrows, blocks, pearls, slow falling",
"sudo set up a new player kit: stone tools, food, bed, torches",
"sudo create a mob farm: platform, water channels, collection hopper",
],
}
def query_model_with_tools(prompt, player, ollama_url, model, rcon, mode="sudo", max_steps=6):
"""Send prompt to model, let it call tools, execute them, capture full chain."""
system = SYSTEM_GOD if mode == "god" else SYSTEM
messages = [
{"role": "system", "content": system},
{"role": "user", "content": f"Player {player}: {prompt}"},
]
tool_trace = []
all_rcon_results = []
for step in range(max_steps):
try:
r = requests.post(f"{ollama_url}/api/chat", json={
"model": model,
"messages": messages,
"stream": False,
"options": {"temperature": 0.2, "num_predict": 800},
}, timeout=90)
raw = r.json()["message"]["content"]
except Exception as e:
return None
raw = re.sub(r'<think>[\s\S]*?</think>\s*', '', raw)
# Check for tool calls
tool_matches = re.findall(r'<tool_call>\s*(\{.*?\})\s*</tool_call>', raw, re.DOTALL)
if not tool_matches:
# Final response
try:
parsed = json.loads(raw)
return {
"messages": messages + [{"role": "assistant", "content": raw}],
"commands": parsed.get("commands", []),
"message": parsed.get("message", ""),
"reasoning": parsed.get("reasoning", ""),
"tool_trace": tool_trace,
"rcon_results": all_rcon_results,
}
except json.JSONDecodeError:
return None
for tc_json in tool_matches:
try:
tc = json.loads(tc_json)
tool_name = tc.get("name", "")
tool_args = tc.get("arguments", {})
except json.JSONDecodeError:
continue
# Execute tool
if tool_name == "rcon.execute":
cmd = tool_args.get("command", "")
try:
result_text = rcon.command(cmd)
is_err = any(e in result_text for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
result = {"success": not is_err, "result": result_text[:300]}
all_rcon_results.append({"cmd": cmd, "ok": not is_err, "result": result_text[:200]})
except Exception as e:
result = {"success": False, "result": str(e)}
all_rcon_results.append({"cmd": cmd, "ok": False, "result": str(e)})
elif tool_name == "minecraft.wiki_lookup":
try:
from agent.tools.knowledge_lookup import handle_knowledge_tool
result = handle_knowledge_tool(tool_name, tool_args)
except Exception:
result = {"content": "Wiki unavailable", "url": "", "ok": False}
elif tool_name in ("plugin.docs_lookup", "minecraft.changelog_lookup", "paper.docs_lookup"):
try:
from agent.tools.knowledge_lookup import handle_knowledge_tool
result = handle_knowledge_tool(tool_name, tool_args)
except Exception:
result = {"content": "Docs unavailable", "url": "", "ok": False}
else:
result = {"ok": True, "result": "simulated"}
tool_trace.append({"tool": tool_name, "args": str(tool_args)[:100], "step": step})
messages.append({"role": "assistant", "content": f"<tool_call>\n{json.dumps(tc)}\n</tool_call>"})
messages.append({"role": "tool", "content": json.dumps(result)[:2000]})
return None # Ran out of steps
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--ollama-url", default="http://localhost:11434")
parser.add_argument("--model", default="mortdecai:0.5.0")
parser.add_argument("--rcon-host", default="192.168.0.244")
parser.add_argument("--rcon-port", type=int, default=25578)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
args = parser.parse_args()
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
print(f"Regenerating tool data with {args.model}")
print(f"RCON: {args.rcon_host}:{args.rcon_port}")
all_examples = []
stats = {"total": 0, "kept": 0, "failed": 0, "no_response": 0}
for category, prompts in PROMPTS.items():
print(f"\n── {category} ({len(prompts)} prompts) ──")
for prompt in prompts:
player = random.choice(PLAYERS)
mode = "god" if prompt.startswith("pray ") else "sudo"
result = query_model_with_tools(prompt, player, args.ollama_url, args.model, rcon, mode)
stats["total"] += 1
if not result:
stats["no_response"] += 1
print(f" SKIP: {prompt[:50]} (no response)")
continue
rcon_ok = sum(1 for r in result["rcon_results"] if r["ok"])
rcon_total = len(result["rcon_results"])
tools_used = len(result["tool_trace"])
if rcon_total == 0 and tools_used == 0:
stats["no_response"] += 1
print(f" SKIP: {prompt[:50]} (empty)")
continue
all_success = rcon_total > 0 and all(r["ok"] for r in result["rcon_results"])
if all_success or (rcon_ok > 0 and rcon_ok >= rcon_total * 0.7):
stats["kept"] += 1
example = {
"id": f"v05-regen-{stats['total']:04d}",
"source": "model_distillation_v05",
"type": f"tool_{category}",
"messages": result["messages"],
"metadata": {
"model": args.model,
"category": category,
"tools_used": tools_used,
"rcon_total": rcon_total,
"rcon_success": rcon_ok,
"all_success": all_success,
},
}
all_examples.append(example)
print(f" KEPT: {prompt[:50]} ({rcon_ok}/{rcon_total} cmds, {tools_used} tools)")
else:
stats["failed"] += 1
print(f" FAIL: {prompt[:50]} ({rcon_ok}/{rcon_total} cmds)")
time.sleep(0.2)
print(f"\n{'='*60}")
print(f"Total: {stats['total']}, Kept: {stats['kept']}, Failed: {stats['failed']}, Empty: {stats['no_response']}")
print(f"Quality: {100*stats['kept']//max(stats['total'],1)}%")
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in all_examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"Written to {OUTPUT_PATH}")
if __name__ == "__main__":
main()