0.5.0 bake-off results, knowledge lookup tools, training progress chart
Bake-off (0.5.0 vs 0.4.0): - Overall: 46.8% vs 45.2% (+1.6%), 0 errors vs 2 - Enchantments: +47% (20% → 67%) - EssentialsX: +60% (0% → 60%) - Effects: +25% (0% → 25%) - Regressions: fill_build -67%, world -20% Knowledge Lookup Tools (4 new): - plugin.docs_lookup: WorldGuard, WorldEdit, CoreProtect, EssentialsX, LuckPerms docs - minecraft.changelog_lookup: version history from Minecraft Wiki - paper.docs_lookup: Paper server-specific documentation - Wired into gateway model-driven tool loop and exploration self-play Exploration Self-Play: - General (vanilla MC) and plugins focus modes - Wiki-grounded: model researches before acting, validates through RCON - 2,243 exploration examples generated, 150 kept after quality filtering Training Progress Chart: - SVG chart showing training examples and inverse loss across versions - Added to MODEL_CARD.md for Gitea display Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Regenerate tool-calling training data using mortdecai:0.5.0.
|
||||
|
||||
Uses the model-driven tool loop: sends prompts to 0.5.0, lets it decide
|
||||
which tools to call, executes via RCON, and captures the full multi-turn
|
||||
conversation as training data. Only keeps examples where all commands succeed.
|
||||
|
||||
This produces "distilled" data — the model's best outputs, validated by RCON.
|
||||
"""
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
import requests
|
||||
from agent.tools.persistent_rcon import get_rcon
|
||||
from agent.tools.tool_schemas import qwen3_tools_block
|
||||
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
|
||||
|
||||
OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training_v05.jsonl"
|
||||
|
||||
TOOLS_BLOCK = qwen3_tools_block()
|
||||
SYSTEM = (
|
||||
"/no_think\n"
|
||||
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
||||
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n\n"
|
||||
"You have tools. To call one:\n"
|
||||
"<tool_call>\n{\"name\": \"tool_name\", \"arguments\": {...}}\n</tool_call>\n\n"
|
||||
"Available: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
|
||||
"minecraft.changelog_lookup, world.player_info, world.server_state, "
|
||||
"world.nearby_entities, memory.read, memory.write, "
|
||||
"script.write, script.validate, script.execute, script.read, script.list, "
|
||||
"script.delete, script.schedule.\n\n"
|
||||
"After tool calls, respond with JSON:\n"
|
||||
"{\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
|
||||
"PERMISSION LEVEL: 4 (generous).\n" + SYNTAX_RULES + RISK_GRADIENT
|
||||
)
|
||||
|
||||
SYSTEM_GOD = (
|
||||
"/no_think\n"
|
||||
"You are God in a Minecraft server with full tool access.\n"
|
||||
"Return JSON: {\"risk_level\": <0-5>, \"message\": \"...\", \"commands\": [...], \"reasoning\": \"...\"}\n\n"
|
||||
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
|
||||
)
|
||||
|
||||
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
|
||||
|
||||
# Comprehensive prompt set — every category we need good data for
|
||||
PROMPTS = {
|
||||
"basic_commands": [
|
||||
"sudo give me a diamond sword",
|
||||
"sudo give me 64 golden apples",
|
||||
"sudo give me a stack of oak planks",
|
||||
"sudo give me an elytra",
|
||||
"sudo give me a spyglass",
|
||||
"sudo give me a recovery compass",
|
||||
"sudo give me a bundle",
|
||||
"sudo set time to noon",
|
||||
"sudo set time to midnight",
|
||||
"sudo clear weather for a week",
|
||||
"sudo make it thunder",
|
||||
"sudo kill all hostile mobs",
|
||||
"sudo kill all items on the ground",
|
||||
"sudo gamemode creative",
|
||||
"sudo gamemode survival",
|
||||
"sudo gamemode spectator",
|
||||
],
|
||||
"enchanted_gear": [
|
||||
"sudo give me a diamond sword with sharpness 5, unbreaking 3, mending, and looting 3",
|
||||
"sudo give me a netherite pickaxe with efficiency 5, fortune 3, unbreaking 3, mending",
|
||||
"sudo give me a bow with power 5, infinity, flame, punch 2",
|
||||
"sudo full netherite armor with protection 4, unbreaking 3, mending on every piece",
|
||||
"sudo give me boots with feather falling 4, depth strider 3, soul speed 3",
|
||||
"sudo give me a trident with loyalty 3 and channeling",
|
||||
"sudo give me a trident with riptide 3",
|
||||
"sudo give me a crossbow with multishot and quick charge 3",
|
||||
"sudo give me a mace with density 5 and wind burst 3",
|
||||
"sudo best fishing rod possible",
|
||||
"sudo give me a shield with unbreaking 3 and mending",
|
||||
],
|
||||
"effects": [
|
||||
"sudo give me speed 3 for 10 minutes",
|
||||
"sudo night vision permanently",
|
||||
"sudo make me invisible for 5 minutes",
|
||||
"sudo give me fire resistance for an hour",
|
||||
"sudo give everyone online regeneration 2",
|
||||
"sudo give me haste 2 for 10 minutes",
|
||||
"sudo slow falling for 60 seconds",
|
||||
"sudo give me water breathing forever",
|
||||
"sudo give me strength 2 and resistance 2 for 5 minutes",
|
||||
"sudo clear all my effects",
|
||||
],
|
||||
"teleport_position": [
|
||||
"sudo tp me to 0 100 0",
|
||||
"sudo tp me to the nether",
|
||||
"sudo tp everyone to spawn",
|
||||
"sudo teleport me 100 blocks north",
|
||||
"sudo tp me up 50 blocks",
|
||||
"sudo set my spawn point here",
|
||||
],
|
||||
"building": [
|
||||
"sudo fill a 10x10 platform of stone under me",
|
||||
"sudo place a beacon at my location",
|
||||
"sudo build a small cobblestone room around me",
|
||||
"sudo fill the area below me with water",
|
||||
"sudo make a glass dome over me",
|
||||
"sudo place 4 lanterns around me",
|
||||
"sudo clear a 20 block area above me",
|
||||
],
|
||||
"entities": [
|
||||
"sudo summon a horse with a saddle",
|
||||
"sudo summon 5 cows near me",
|
||||
"sudo summon a villager",
|
||||
"sudo spawn an iron golem",
|
||||
"sudo summon a warden 20 blocks away",
|
||||
"sudo summon a wither",
|
||||
"sudo kill all zombies within 50 blocks",
|
||||
"sudo kill all creepers near me",
|
||||
],
|
||||
"worldguard": [
|
||||
"sudo create a region called my-base and set pvp deny",
|
||||
"sudo prevent mob spawning in the spawn region",
|
||||
"sudo set a greeting message for spawn: Welcome to the server!",
|
||||
"sudo deny entry to non-members in the vault region",
|
||||
"sudo list all regions",
|
||||
"sudo allow TNT in the arena",
|
||||
"sudo prevent fire spread globally",
|
||||
"sudo make a healing zone at spawn",
|
||||
],
|
||||
"coreprotect": [
|
||||
"sudo enable block inspector",
|
||||
"sudo rollback the last hour of changes",
|
||||
"sudo rollback what TheBigBoss did in the last 30 minutes",
|
||||
"sudo lookup who placed blocks near me today",
|
||||
"sudo rollback TNT damage from the last 2 hours",
|
||||
"sudo check coreprotect status",
|
||||
"sudo restore what was rolled back",
|
||||
],
|
||||
"essentialsx": [
|
||||
"sudo set my home here",
|
||||
"sudo create a warp called arena",
|
||||
"sudo give Ace13245 1000 coins",
|
||||
"sudo check my balance",
|
||||
"sudo heal me",
|
||||
"sudo feed me",
|
||||
"sudo repair my held item",
|
||||
"sudo set my nickname to DragonLord",
|
||||
"sudo broadcast Welcome to the server!",
|
||||
"sudo god mode on",
|
||||
"sudo fly mode on",
|
||||
],
|
||||
"luckperms": [
|
||||
"sudo create a VIP group",
|
||||
"sudo add Ace13245 to VIP",
|
||||
"sudo give VIP permission to fly",
|
||||
"sudo give me temporary VIP for 24 hours",
|
||||
"sudo set VIP prefix to gold [VIP]",
|
||||
"sudo list all permission groups",
|
||||
"sudo create a builder group with worldedit access",
|
||||
],
|
||||
"fawe": [
|
||||
"sudo make a glass sphere radius 8",
|
||||
"sudo hollow stone sphere radius 10",
|
||||
"sudo cylinder of quartz 5 wide 12 tall",
|
||||
"sudo replace all stone with deepslate in selection",
|
||||
"sudo smooth the terrain 5 iterations",
|
||||
"sudo drain water within 20 blocks",
|
||||
"sudo sandstone pyramid 8 tall",
|
||||
"sudo undo my last worldedit operation",
|
||||
],
|
||||
"god_prayers": [
|
||||
"pray oh great one, bless me with diamonds",
|
||||
"pray lord, protect me from the monsters of the night",
|
||||
"pray I offer this sacrifice of 64 wheat, grant me your favor",
|
||||
"pray god please make it stop raining",
|
||||
"pray smite the wicked TheBigBoss for griefing my base",
|
||||
"pray heal me, I am near death",
|
||||
"pray give me the strength to slay the ender dragon",
|
||||
"pray I am lost in a cave, guide me to the surface",
|
||||
],
|
||||
"error_prone": [
|
||||
"sudo give me a bed",
|
||||
"sudo give me steak",
|
||||
"sudo give me cooked beef",
|
||||
"sudo effect give me speed",
|
||||
"sudo give me a log",
|
||||
"sudo fill with stone 10",
|
||||
"sudo tp me to spawn",
|
||||
"sudo give @s diamond 1",
|
||||
],
|
||||
"complex_multi": [
|
||||
"sudo gear me up for the nether: armor, weapons, food, fire resistance",
|
||||
"sudo prepare me for the end fight: bow, arrows, blocks, pearls, slow falling",
|
||||
"sudo set up a new player kit: stone tools, food, bed, torches",
|
||||
"sudo create a mob farm: platform, water channels, collection hopper",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def query_model_with_tools(prompt, player, ollama_url, model, rcon, mode="sudo", max_steps=6):
|
||||
"""Send prompt to model, let it call tools, execute them, capture full chain."""
|
||||
system = SYSTEM_GOD if mode == "god" else SYSTEM
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Player {player}: {prompt}"},
|
||||
]
|
||||
|
||||
tool_trace = []
|
||||
all_rcon_results = []
|
||||
|
||||
for step in range(max_steps):
|
||||
try:
|
||||
r = requests.post(f"{ollama_url}/api/chat", json={
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.2, "num_predict": 800},
|
||||
}, timeout=90)
|
||||
raw = r.json()["message"]["content"]
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
raw = re.sub(r'<think>[\s\S]*?</think>\s*', '', raw)
|
||||
|
||||
# Check for tool calls
|
||||
tool_matches = re.findall(r'<tool_call>\s*(\{.*?\})\s*</tool_call>', raw, re.DOTALL)
|
||||
|
||||
if not tool_matches:
|
||||
# Final response
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
return {
|
||||
"messages": messages + [{"role": "assistant", "content": raw}],
|
||||
"commands": parsed.get("commands", []),
|
||||
"message": parsed.get("message", ""),
|
||||
"reasoning": parsed.get("reasoning", ""),
|
||||
"tool_trace": tool_trace,
|
||||
"rcon_results": all_rcon_results,
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
for tc_json in tool_matches:
|
||||
try:
|
||||
tc = json.loads(tc_json)
|
||||
tool_name = tc.get("name", "")
|
||||
tool_args = tc.get("arguments", {})
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Execute tool
|
||||
if tool_name == "rcon.execute":
|
||||
cmd = tool_args.get("command", "")
|
||||
try:
|
||||
result_text = rcon.command(cmd)
|
||||
is_err = any(e in result_text for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
|
||||
result = {"success": not is_err, "result": result_text[:300]}
|
||||
all_rcon_results.append({"cmd": cmd, "ok": not is_err, "result": result_text[:200]})
|
||||
except Exception as e:
|
||||
result = {"success": False, "result": str(e)}
|
||||
all_rcon_results.append({"cmd": cmd, "ok": False, "result": str(e)})
|
||||
elif tool_name == "minecraft.wiki_lookup":
|
||||
try:
|
||||
from agent.tools.knowledge_lookup import handle_knowledge_tool
|
||||
result = handle_knowledge_tool(tool_name, tool_args)
|
||||
except Exception:
|
||||
result = {"content": "Wiki unavailable", "url": "", "ok": False}
|
||||
elif tool_name in ("plugin.docs_lookup", "minecraft.changelog_lookup", "paper.docs_lookup"):
|
||||
try:
|
||||
from agent.tools.knowledge_lookup import handle_knowledge_tool
|
||||
result = handle_knowledge_tool(tool_name, tool_args)
|
||||
except Exception:
|
||||
result = {"content": "Docs unavailable", "url": "", "ok": False}
|
||||
else:
|
||||
result = {"ok": True, "result": "simulated"}
|
||||
|
||||
tool_trace.append({"tool": tool_name, "args": str(tool_args)[:100], "step": step})
|
||||
messages.append({"role": "assistant", "content": f"<tool_call>\n{json.dumps(tc)}\n</tool_call>"})
|
||||
messages.append({"role": "tool", "content": json.dumps(result)[:2000]})
|
||||
|
||||
return None # Ran out of steps
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--ollama-url", default="http://localhost:11434")
|
||||
parser.add_argument("--model", default="mortdecai:0.5.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.244")
|
||||
parser.add_argument("--rcon-port", type=int, default=25578)
|
||||
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
||||
args = parser.parse_args()
|
||||
|
||||
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
||||
print(f"Regenerating tool data with {args.model}")
|
||||
print(f"RCON: {args.rcon_host}:{args.rcon_port}")
|
||||
|
||||
all_examples = []
|
||||
stats = {"total": 0, "kept": 0, "failed": 0, "no_response": 0}
|
||||
|
||||
for category, prompts in PROMPTS.items():
|
||||
print(f"\n── {category} ({len(prompts)} prompts) ──")
|
||||
for prompt in prompts:
|
||||
player = random.choice(PLAYERS)
|
||||
mode = "god" if prompt.startswith("pray ") else "sudo"
|
||||
|
||||
result = query_model_with_tools(prompt, player, args.ollama_url, args.model, rcon, mode)
|
||||
stats["total"] += 1
|
||||
|
||||
if not result:
|
||||
stats["no_response"] += 1
|
||||
print(f" SKIP: {prompt[:50]} (no response)")
|
||||
continue
|
||||
|
||||
rcon_ok = sum(1 for r in result["rcon_results"] if r["ok"])
|
||||
rcon_total = len(result["rcon_results"])
|
||||
tools_used = len(result["tool_trace"])
|
||||
|
||||
if rcon_total == 0 and tools_used == 0:
|
||||
stats["no_response"] += 1
|
||||
print(f" SKIP: {prompt[:50]} (empty)")
|
||||
continue
|
||||
|
||||
all_success = rcon_total > 0 and all(r["ok"] for r in result["rcon_results"])
|
||||
|
||||
if all_success or (rcon_ok > 0 and rcon_ok >= rcon_total * 0.7):
|
||||
stats["kept"] += 1
|
||||
example = {
|
||||
"id": f"v05-regen-{stats['total']:04d}",
|
||||
"source": "model_distillation_v05",
|
||||
"type": f"tool_{category}",
|
||||
"messages": result["messages"],
|
||||
"metadata": {
|
||||
"model": args.model,
|
||||
"category": category,
|
||||
"tools_used": tools_used,
|
||||
"rcon_total": rcon_total,
|
||||
"rcon_success": rcon_ok,
|
||||
"all_success": all_success,
|
||||
},
|
||||
}
|
||||
all_examples.append(example)
|
||||
print(f" KEPT: {prompt[:50]} ({rcon_ok}/{rcon_total} cmds, {tools_used} tools)")
|
||||
else:
|
||||
stats["failed"] += 1
|
||||
print(f" FAIL: {prompt[:50]} ({rcon_ok}/{rcon_total} cmds)")
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Total: {stats['total']}, Kept: {stats['kept']}, Failed: {stats['failed']}, Empty: {stats['no_response']}")
|
||||
print(f"Quality: {100*stats['kept']//max(stats['total'],1)}%")
|
||||
|
||||
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as f:
|
||||
for ex in all_examples:
|
||||
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
|
||||
print(f"Written to {OUTPUT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user