Tool-calling training: 1,159 multi-turn examples with error correction

Tool schemas (agent/tools/tool_schemas.py): - rcon.execute: execute commands, get success/error results - minecraft.wiki_lookup: look up syntax and item info - world.player_info: player health, position, inventory - world.server_state: time, weather, online players - 10 RCON error patterns with corrections - 12 common error scenarios for training Training data generator (training/scripts/generate_tool_training.py): - Converts seed dataset to multi-turn tool conversations - Error correction: model tries wrong command → gets error → self-corrects - Wiki/player/server lookups for uncertainty scenarios - Qwen3 native tool-calling format with <tool_call> tags 1,159 examples: 1043 success, 79 error correction, 24 error scenarios, 13 tool lookups. Ready for v4 training. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 18:49:08 -04:00
parent 4e83da39fd
commit ee764cd22a
3 changed files with 2417 additions and 0 deletions
@@ -0,0 +1,850 @@
+#!/usr/bin/env python3
+"""
+Generate multi-turn tool-calling training data for the Minecraft AI God model.
+
+Reads data/processed/seed_dataset.jsonl and produces data/processed/tool_training.jsonl
+with Qwen3-format multi-turn conversations that teach the model to:
+  1. Call rcon.execute and handle success/error results
+  2. Self-correct on RCON errors (retry with fixed command)
+  3. Use minecraft.wiki_lookup when unsure about syntax
+  4. Use world.player_info / world.server_state for context-dependent actions
+
+Usage:
+    python training/scripts/generate_tool_training.py
+"""
+
+import json
+import random
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+# Ensure project root is importable
+PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from agent.tools.tool_schemas import (
+    QWEN3_TOOLS,
+    RCON_ERROR_PATTERNS,
+    ERROR_SCENARIOS,
+    qwen3_tools_block,
+)
+from agent.prompts.system_prompts import (
+    SUDO_SYSTEM_PROMPT,
+    GOD_SYSTEM_PROMPT,
+    SYNTAX_RULES,
+    RISK_GRADIENT,
+)
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+SEED_PATH = PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl"
+OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl"
+
+# ---------------------------------------------------------------------------
+# System prompt with tools block for Qwen3 format
+# ---------------------------------------------------------------------------
+TOOLS_BLOCK = qwen3_tools_block()
+
+SUDO_TOOL_SYSTEM = (
+    "You are a Minecraft 1.21 command translator for a server admin. "
+    "You receive natural language requests and return valid RCON commands.\n\n"
+    "PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
+    "You have access to tools. Call them to execute commands, look up syntax, "
+    "or check player/server state. When a command fails, analyze the error "
+    "and retry with a corrected command.\n\n"
+    "After all tool calls resolve, respond with JSON:\n"
+    '{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+    + SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
+)
+
+GOD_TOOL_SYSTEM = (
+    "You are God in a Minecraft server. Players pray to you and you respond "
+    "with divine judgment.\n\n"
+    "You have access to tools. Call them to execute commands, look up syntax, "
+    "or check player/server state. When a command fails, analyze the error "
+    "and retry with a corrected command.\n\n"
+    "After all tool calls resolve, respond with JSON:\n"
+    '{"risk_level": <int 0-5>, "message": "Your divine response", '
+    '"commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+    + SYNTAX_RULES + "\n" + TOOLS_BLOCK
+)
+
+# ---------------------------------------------------------------------------
+# Player names used across the dataset
+# ---------------------------------------------------------------------------
+DEFAULT_PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
+
+# ---------------------------------------------------------------------------
+# Simulated success responses for common commands
+# ---------------------------------------------------------------------------
+
+def simulate_rcon_success(command: str, player: str = "slingshooter08") -> str:
+    """Generate a plausible RCON success message for a command."""
+    cmd = command.strip()
+    if cmd.startswith("give "):
+        # Parse item from give command
+        parts = cmd.split()
+        item = parts[2] if len(parts) > 2 else "item"
+        count = parts[3] if len(parts) > 3 else "1"
+        item_display = item.replace("minecraft:", "").replace("_", " ").title()
+        if "[" in item_display:
+            item_display = item_display.split("[")[0].strip()
+        return f"Gave {count} [{item_display}] to {player}"
+    elif cmd.startswith("effect give "):
+        parts = cmd.split()
+        effect = parts[2] if len(parts) > 2 else ""
+        eff_name = parts[3].replace("minecraft:", "") if len(parts) > 3 else "effect"
+        return f"Applied effect {eff_name} to {effect}"
+    elif cmd.startswith("effect clear "):
+        target = cmd.split()[2] if len(cmd.split()) > 2 else player
+        return f"Removed every effect from {target}"
+    elif cmd.startswith("tp ") or cmd.startswith("teleport "):
+        return f"Teleported {player}"
+    elif cmd.startswith("weather "):
+        weather_type = cmd.split()[1] if len(cmd.split()) > 1 else "clear"
+        return f"Changing to {weather_type}"
+    elif cmd.startswith("time set "):
+        val = cmd.split("time set ")[1] if "time set " in cmd else "day"
+        return f"Set the time to {val}"
+    elif cmd.startswith("gamemode "):
+        mode = cmd.split()[1] if len(cmd.split()) > 1 else "creative"
+        return f"Set own game mode to {mode.title()}"
+    elif cmd.startswith("kill "):
+        return "Killed entity"
+    elif cmd.startswith("summon "):
+        return "Summoned new entity"
+    elif cmd.startswith("setblock "):
+        return "Changed the block at ..."
+    elif cmd.startswith("fill "):
+        return "Successfully filled ... blocks"
+    elif cmd.startswith("clear "):
+        return f"Removed items from {player}"
+    elif cmd.startswith("xp "):
+        return f"Gave experience to {player}"
+    elif cmd.startswith("execute "):
+        return "Executed command"
+    elif cmd.startswith("playsound "):
+        return f"Played sound to {player}"
+    elif cmd.startswith("title "):
+        return "Title displayed"
+    elif cmd.startswith("particle "):
+        return "Showing particle"
+    elif cmd.startswith("enchant "):
+        return f"Enchanted item for {player}"
+    elif cmd.startswith("spreadplayers "):
+        return "Spread players"
+    else:
+        return "Command executed successfully"
+
+
+def simulate_player_info(player: str) -> Dict[str, Any]:
+    """Generate plausible player info."""
+    return {
+        "health": round(random.uniform(10.0, 20.0), 1),
+        "position": {
+            "x": random.randint(-500, 500),
+            "y": random.randint(60, 120),
+            "z": random.randint(-500, 500),
+        },
+        "inventory_summary": random.choice([
+            "Diamond sword, iron armor, 32 steak, 14 torches",
+            "Netherite pickaxe, diamond armor, 64 cobblestone, bow with 28 arrows",
+            "Stone tools, leather armor, 12 bread, 3 oak logs",
+            "Full diamond gear, 8 golden apples, ender pearls x16",
+            "Iron sword, chainmail chest, 24 cooked porkchop, shield",
+        ])
+    }
+
+
+def simulate_server_state() -> Dict[str, Any]:
+    """Generate plausible server state."""
+    ticks = random.randint(0, 24000)
+    if ticks < 6000:
+        tod = "morning"
+    elif ticks < 12000:
+        tod = "noon"
+    elif ticks < 18000:
+        tod = "evening"
+    else:
+        tod = "night"
+    return {
+        "time_of_day": tod,
+        "weather": random.choice(["clear", "rain", "thunder"]),
+        "online_players": random.sample(DEFAULT_PLAYERS, k=random.randint(1, 3)),
+        "world_border": 60000000.0,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Conversation builders
+# ---------------------------------------------------------------------------
+
+def build_system_message(mode: str) -> Dict[str, str]:
+    """Return the system message for the given mode."""
+    if mode == "god":
+        return {"role": "system", "content": GOD_TOOL_SYSTEM}
+    return {"role": "system", "content": SUDO_TOOL_SYSTEM}
+
+
+def build_user_message(user_text: str, context: Optional[Dict] = None) -> Dict[str, str]:
+    """Build the user turn."""
+    content = user_text
+    if context and context.get("online_players"):
+        content += f"\n\n[Server context: players online: {', '.join(context['online_players'])}]"
+    return {"role": "user", "content": content}
+
+
+def build_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, str]:
+    """Build an assistant turn that makes a tool call."""
+    tc = json.dumps({"name": tool_name, "arguments": arguments})
+    return {"role": "assistant", "content": f"<tool_call>\n{tc}\n</tool_call>"}
+
+
+def build_tool_result(result: Dict[str, Any]) -> Dict[str, str]:
+    """Build a tool result turn."""
+    return {"role": "tool", "content": json.dumps(result)}
+
+
+def build_assistant_final(response: Dict[str, Any]) -> Dict[str, str]:
+    """Build the final assistant JSON response."""
+    return {"role": "assistant", "content": json.dumps(response)}
+
+
+# ---------------------------------------------------------------------------
+# Example generators
+# ---------------------------------------------------------------------------
+
+def gen_command_success(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a multi-turn conversation for a command_gen example where
+    commands succeed on the first try.
+    """
+    inp = example["input"]
+    out = example["output"]
+    player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
+    commands = out.get("commands", [])
+    risk_level = example.get("metadata", {}).get("risk_level", 3)
+
+    if not commands:
+        return None
+
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
+
+    # Execute each command via rcon.execute
+    for cmd in commands:
+        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+        success_msg = simulate_rcon_success(cmd, player)
+        messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    # Final response
+    final = {"risk_level": risk_level, "commands": commands, "reasoning": out.get("reasoning", "")}
+    if mode == "god" or out.get("message"):
+        final["message"] = out.get("message", "It is done.")
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-{example['id']}-success",
+        "source": "tool_training",
+        "type": "command_success",
+        "messages": messages,
+    }
+
+
+def gen_error_correction_from_negative(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a multi-turn conversation from an example that has negative_output:
+    model tries wrong command -> error -> retries with correct command -> success.
+    """
+    inp = example["input"]
+    out = example["output"]
+    neg = example.get("negative_output")
+    if not neg:
+        return None
+
+    player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
+    wrong_commands = neg.get("commands", [])
+    correct_commands = out.get("commands", [])
+    error_text = neg.get("error", "Unknown or incomplete command")
+    risk_level = example.get("metadata", {}).get("risk_level", 3)
+
+    if not wrong_commands or not correct_commands:
+        return None
+
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
+
+    # First attempt: wrong command fails
+    wrong_cmd = wrong_commands[0]
+    messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
+    messages.append(build_tool_result({"success": False, "result": error_text}))
+
+    # Retry with correct command(s)
+    for cmd in correct_commands:
+        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+        success_msg = simulate_rcon_success(cmd, player)
+        messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    # Final response with reasoning about the correction
+    reasoning = out.get("reasoning", "")
+    if neg.get("error"):
+        reasoning = f"First attempt failed: {neg['error']}. {reasoning}"
+
+    final = {"risk_level": risk_level, "commands": correct_commands, "reasoning": reasoning}
+    if mode == "god" or out.get("message"):
+        final["message"] = out.get("message", "It is done.")
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-{example['id']}-errorcorrect",
+        "source": "tool_training",
+        "type": "error_correction",
+        "messages": messages,
+    }
+
+
+def gen_error_scenario(scenario: Dict[str, Any], user_request: str,
+                       player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a multi-turn error correction example from a predefined error scenario.
+    """
+    wrong_cmd = scenario["wrong_command"].format(player=player)
+    correct_cmd = scenario["correct_command"].format(player=player)
+    risk_level = 3
+
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(user_request))
+
+    # Wrong attempt
+    messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
+    messages.append(build_tool_result({"success": False, "result": scenario["error_message"]}))
+
+    # Corrected attempt
+    messages.append(build_tool_call("rcon.execute", {"command": correct_cmd}))
+    success_msg = simulate_rcon_success(correct_cmd, player)
+    messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    final = {
+        "risk_level": risk_level,
+        "commands": [correct_cmd],
+        "reasoning": scenario["reasoning"],
+    }
+    if mode == "god":
+        final["message"] = "It is done."
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-scenario-{scenario['id']}",
+        "source": "tool_training",
+        "type": "error_scenario",
+        "messages": messages,
+    }
+
+
+def gen_wiki_lookup(user_request: str, query: str, wiki_content: str,
+                    wiki_url: str, resulting_commands: List[str],
+                    reasoning: str, player: str = "slingshooter08",
+                    mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a conversation where the model looks up wiki info before executing.
+    """
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(user_request))
+
+    # Wiki lookup
+    messages.append(build_tool_call("minecraft.wiki_lookup", {"query": query}))
+    messages.append(build_tool_result({"content": wiki_content, "url": wiki_url}))
+
+    # Execute commands
+    for cmd in resulting_commands:
+        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+        success_msg = simulate_rcon_success(cmd, player)
+        messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-wiki-{hash(user_request) % 10000:04d}",
+        "source": "tool_training",
+        "type": "wiki_lookup",
+        "messages": messages,
+    }
+
+
+def gen_player_info_lookup(user_request: str, player: str,
+                           resulting_commands: List[str], reasoning: str,
+                           mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a conversation where the model checks player info before acting.
+    """
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(user_request))
+
+    # Get player info
+    messages.append(build_tool_call("world.player_info", {"player": player}))
+    pinfo = simulate_player_info(player)
+    messages.append(build_tool_result(pinfo))
+
+    # Use the position in commands (substitute coordinates)
+    pos = pinfo["position"]
+    resolved_cmds = []
+    for cmd in resulting_commands:
+        resolved = cmd.format(x=pos["x"], y=pos["y"], z=pos["z"], player=player)
+        resolved_cmds.append(resolved)
+
+    for cmd in resolved_cmds:
+        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+        success_msg = simulate_rcon_success(cmd, player)
+        messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-playerinfo-{hash(user_request) % 10000:04d}",
+        "source": "tool_training",
+        "type": "player_info_lookup",
+        "messages": messages,
+    }
+
+
+def gen_server_state_check(user_request: str, resulting_commands: List[str],
+                           reasoning: str, player: str = "slingshooter08",
+                           mode: str = "sudo") -> Dict[str, Any]:
+    """
+    Generate a conversation where the model checks server state before acting.
+    """
+    messages = [build_system_message(mode)]
+    messages.append(build_user_message(user_request))
+
+    # Check server state
+    messages.append(build_tool_call("world.server_state", {}))
+    state = simulate_server_state()
+    messages.append(build_tool_result(state))
+
+    for cmd in resulting_commands:
+        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+        success_msg = simulate_rcon_success(cmd, player)
+        messages.append(build_tool_result({"success": True, "result": success_msg}))
+
+    final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
+    messages.append(build_assistant_final(final))
+
+    return {
+        "id": f"tool-serverstate-{hash(user_request) % 10000:04d}",
+        "source": "tool_training",
+        "type": "server_state_check",
+        "messages": messages,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Handcrafted wiki/player/server lookup examples
+# ---------------------------------------------------------------------------
+
+WIKI_LOOKUP_EXAMPLES = [
+    {
+        "user_request": "sudo enchant my trident with the best enchants",
+        "query": "trident enchantments 1.21",
+        "wiki_content": (
+            "Trident enchantments in Minecraft 1.21:\n"
+            "- Loyalty (1-3): Trident returns after being thrown\n"
+            "- Channeling (1): Summons lightning during thunderstorms\n"
+            "- Riptide (1-3): Launches player when thrown in water/rain\n"
+            "- Impaling (1-5): Extra damage to aquatic mobs\n"
+            "- Unbreaking (1-3): Increases durability\n"
+            "- Mending (1): Repairs with XP\n"
+            "Note: Loyalty and Riptide are mutually exclusive. Channeling and Riptide are mutually exclusive."
+        ),
+        "wiki_url": "https://minecraft.wiki/w/Trident#Enchantments",
+        "commands": [
+            "give {player} minecraft:trident[enchantments={{loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}}] 1"
+        ],
+        "reasoning": "Looked up trident enchantments. Used loyalty+channeling (not riptide, since loyalty and riptide are mutually exclusive). Max levels on all.",
+    },
+    {
+        "user_request": "sudo what's the command to set a custom player head?",
+        "query": "player head command give 1.21",
+        "wiki_content": (
+            "To give a player head with a custom skin in 1.21:\n"
+            "give <player> minecraft:player_head[profile={name:\"<player_name>\"}] 1\n"
+            "This creates a head with the skin of the specified player."
+        ),
+        "wiki_url": "https://minecraft.wiki/w/Player_Head",
+        "commands": [
+            'give {player} minecraft:player_head[profile={{name:"{player}"}}] 1'
+        ],
+        "reasoning": "Looked up player head syntax for 1.21 component format.",
+    },
+    {
+        "user_request": "sudo how do I make a custom spawner?",
+        "query": "spawner command setblock 1.21",
+        "wiki_content": (
+            "To place a mob spawner in 1.21:\n"
+            "setblock <x> <y> <z> minecraft:spawner{SpawnData:{entity:{id:\"minecraft:<mob>\"}},Delay:20}\n"
+            "Or use /give for a spawner item."
+        ),
+        "wiki_url": "https://minecraft.wiki/w/Monster_Spawner",
+        "commands": [
+            'setblock ~ ~1 ~ minecraft:spawner{{SpawnData:{{entity:{{id:"minecraft:zombie"}}}},Delay:20}}'
+        ],
+        "reasoning": "Looked up spawner syntax. Using setblock with SpawnData NBT for zombie spawner.",
+    },
+    {
+        "user_request": "sudo what potion effects can I use for a speed build challenge?",
+        "query": "speed and haste effects minecraft 1.21",
+        "wiki_content": (
+            "Relevant effects for speed building:\n"
+            "- minecraft:speed (1-255): Increases movement speed\n"
+            "- minecraft:haste (1-255): Increases mining/attack speed\n"
+            "- minecraft:jump_boost (1-255): Increases jump height\n"
+            "Duration is in seconds. Amplifier 0 = level 1."
+        ),
+        "wiki_url": "https://minecraft.wiki/w/Effect",
+        "commands": [
+            "effect give {player} minecraft:speed 600 2",
+            "effect give {player} minecraft:haste 600 2",
+            "effect give {player} minecraft:jump_boost 600 1",
+        ],
+        "reasoning": "Looked up speed-related effects. Gave speed 3, haste 3, and jump boost 2 for 10 minutes.",
+    },
+    {
+        "user_request": "sudo give me a crossbow with multishot and quick charge",
+        "query": "crossbow enchantments multishot quick charge 1.21",
+        "wiki_content": (
+            "Crossbow enchantments in 1.21:\n"
+            "- Quick Charge (1-3): Reduces reload time\n"
+            "- Multishot (1): Fires 3 arrows at once\n"
+            "- Piercing (1-4): Arrows pass through entities\n"
+            "- Unbreaking (1-3): Durability\n"
+            "- Mending (1): XP repair\n"
+            "Note: Multishot and Piercing are mutually exclusive."
+        ),
+        "wiki_url": "https://minecraft.wiki/w/Crossbow#Enchantments",
+        "commands": [
+            "give {player} minecraft:crossbow[enchantments={{multishot:1,quick_charge:3,unbreaking:3,mending:1}}] 1"
+        ],
+        "reasoning": "Looked up crossbow enchantments. Multishot and piercing are exclusive; chose multishot as requested. Max quick charge.",
+    },
+]
+
+PLAYER_INFO_EXAMPLES = [
+    {
+        "user_request": "sudo build a diamond block tower where I'm standing",
+        "player": "slingshooter08",
+        "commands": [
+            "setblock {x} {y} {z} minecraft:diamond_block",
+            "setblock {x} {y_1} {z} minecraft:diamond_block",
+            "setblock {x} {y_2} {z} minecraft:diamond_block",
+            "setblock {x} {y_3} {z} minecraft:diamond_block",
+            "setblock {x} {y_4} {z} minecraft:diamond_block",
+        ],
+        "reasoning": "Got player position, then placed 5 diamond blocks in a tower at their location.",
+    },
+    {
+        "user_request": "sudo teleport me 100 blocks north",
+        "player": "slingshooter08",
+        "commands": ["tp {player} {x} {y} {z_minus_100}"],
+        "reasoning": "Got player position, then teleported 100 blocks north (negative Z direction).",
+    },
+    {
+        "user_request": "sudo put a glass dome over my head",
+        "player": "slingshooter08",
+        "commands": [
+            "fill {x_m5} {y} {z_m5} {x_p5} {y_p10} {z_p5} minecraft:glass hollow",
+        ],
+        "reasoning": "Got player position to calculate dome coordinates. Used fill hollow with glass.",
+    },
+    {
+        "user_request": "sudo light up the area around me with glowstone",
+        "player": "slingshooter08",
+        "commands": [
+            "setblock {x_p3} {y_p5} {z} minecraft:glowstone",
+            "setblock {x_m3} {y_p5} {z} minecraft:glowstone",
+            "setblock {x} {y_p5} {z_p3} minecraft:glowstone",
+            "setblock {x} {y_p5} {z_m3} minecraft:glowstone",
+        ],
+        "reasoning": "Got player position, placed glowstone lights at 4 cardinal points above the player.",
+    },
+]
+
+SERVER_STATE_EXAMPLES = [
+    {
+        "user_request": "sudo make it daytime if it's nighttime",
+        "commands": ["time set day"],
+        "reasoning": "Checked server state: it was nighttime, so set time to day.",
+    },
+    {
+        "user_request": "sudo clear weather if it's raining",
+        "commands": ["weather clear"],
+        "reasoning": "Checked server state: weather was rain, so cleared it.",
+    },
+    {
+        "user_request": "sudo give everyone online a diamond",
+        "commands": [
+            "give {p} minecraft:diamond 1"
+        ],
+        "reasoning": "Checked server state to get online player list, then gave each player a diamond.",
+    },
+    {
+        "user_request": "sudo heal everyone on the server",
+        "commands": [
+            "effect give {p} minecraft:instant_health 1 5"
+        ],
+        "reasoning": "Checked server state for online players, then healed each one.",
+    },
+]
+
+# Requests that map to error scenarios for natural phrasing
+ERROR_SCENARIO_REQUESTS = {
+    "missing_prefix": "sudo give me a diamond sword",
+    "old_nbt_enchantments": "sudo give me a max enchanted diamond sword",
+    "invalid_effect_name": "sudo give me haste effect",
+    "wrong_item_bed": "sudo give me a bed",
+    "wrong_item_log": "sudo give me a stack of logs",
+    "count_wrong_position": "sudo give me 64 diamonds",
+    "effect_missing_give": "sudo give me speed",
+    "weather_storm": "sudo make it storm",
+    "gamemode_abbreviation": "sudo put me in creative",
+    "wrong_item_grass": "sudo give me some grass",
+    "summon_no_prefix": "sudo spawn a zombie near me",
+    "old_zombie_pigman": "sudo summon a zombie pigman",
+}
+
+
+# ---------------------------------------------------------------------------
+# Format to Qwen3 chat template string
+# ---------------------------------------------------------------------------
+
+def format_qwen3(messages: List[Dict[str, str]]) -> str:
+    """Convert messages list to Qwen3 chat template format."""
+    parts = []
+    for msg in messages:
+        role = msg["role"]
+        content = msg["content"]
+        parts.append(f"<|im_start|>{role}\n{content}\n<|im_end|>")
+    return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Main generation pipeline
+# ---------------------------------------------------------------------------
+
+def load_seed_data() -> List[Dict[str, Any]]:
+    """Load the seed dataset."""
+    examples = []
+    with open(SEED_PATH) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                examples.append(json.loads(line))
+    return examples
+
+
+def generate_all() -> List[Dict[str, Any]]:
+    """Generate all tool-calling training examples."""
+    random.seed(42)
+    seed_data = load_seed_data()
+    all_examples = []
+    counts = {
+        "command_success": 0,
+        "error_correction": 0,
+        "error_scenario": 0,
+        "wiki_lookup": 0,
+        "player_info_lookup": 0,
+        "server_state_check": 0,
+    }
+
+    # --- 1. Command success examples from seed data ---
+    for ex in seed_data:
+        cat = ex.get("category", "")
+        if cat in ("command_gen", "safety", "troubleshoot"):
+            commands = ex.get("output", {}).get("commands", [])
+            if not commands:
+                continue
+            # Decide mode: if it has a "message" field, treat as god
+            mode = "god" if ex.get("output", {}).get("message") else "sudo"
+            result = gen_command_success(ex, mode=mode)
+            if result:
+                all_examples.append(result)
+                counts["command_success"] += 1
+
+    # --- 2. Error correction from negative_output examples ---
+    for ex in seed_data:
+        neg = ex.get("negative_output")
+        if neg and neg.get("commands"):
+            mode = "god" if ex.get("output", {}).get("message") else "sudo"
+            result = gen_error_correction_from_negative(ex, mode=mode)
+            if result:
+                all_examples.append(result)
+                counts["error_correction"] += 1
+
+    # --- 3. Error scenario examples ---
+    for scenario in ERROR_SCENARIOS:
+        user_req = ERROR_SCENARIO_REQUESTS.get(scenario["id"], "sudo do something")
+        player = random.choice(DEFAULT_PLAYERS)
+        # Generate both sudo and god mode variants for each scenario
+        for mode in ["sudo", "god"]:
+            result = gen_error_scenario(scenario, user_req, player=player, mode=mode)
+            if result:
+                result["id"] += f"-{mode}"
+                all_examples.append(result)
+                counts["error_scenario"] += 1
+
+    # --- 4. Wiki lookup examples ---
+    for wex in WIKI_LOOKUP_EXAMPLES:
+        player = random.choice(DEFAULT_PLAYERS)
+        resolved_cmds = [c.format(player=player) for c in wex["commands"]]
+        result = gen_wiki_lookup(
+            user_request=wex["user_request"],
+            query=wex["query"],
+            wiki_content=wex["wiki_content"],
+            wiki_url=wex["wiki_url"],
+            resulting_commands=resolved_cmds,
+            reasoning=wex["reasoning"],
+            player=player,
+        )
+        if result:
+            all_examples.append(result)
+            counts["wiki_lookup"] += 1
+
+    # --- 5. Player info lookup examples ---
+    for pex in PLAYER_INFO_EXAMPLES:
+        player = pex.get("player", "slingshooter08")
+
+        # We need to generate plausible coordinates for the commands
+        pos = simulate_player_info(player)["position"]
+        x, y, z = pos["x"], pos["y"], pos["z"]
+
+        # Build command templates with coordinates
+        resolved_cmds = []
+        for cmd in pex["commands"]:
+            resolved = cmd.format(
+                player=player, x=x, y=y, z=z,
+                y_1=y+1, y_2=y+2, y_3=y+3, y_4=y+4,
+                z_minus_100=z-100,
+                x_m5=x-5, x_p5=x+5, z_m5=z-5, z_p5=z+5,
+                y_p5=y+5, y_p10=y+10,
+                x_p3=x+3, x_m3=x-3, z_p3=z+3, z_m3=z-3,
+            )
+            resolved_cmds.append(resolved)
+
+        messages = [build_system_message("sudo")]
+        messages.append(build_user_message(pex["user_request"]))
+        messages.append(build_tool_call("world.player_info", {"player": player}))
+        messages.append(build_tool_result({
+            "health": 20.0,
+            "position": pos,
+            "inventory_summary": "Diamond gear, various items",
+        }))
+
+        for cmd in resolved_cmds:
+            messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+            messages.append(build_tool_result({
+                "success": True,
+                "result": simulate_rcon_success(cmd, player),
+            }))
+
+        final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": pex["reasoning"]}
+        messages.append(build_assistant_final(final))
+
+        result = {
+            "id": f"tool-playerinfo-{hash(pex['user_request']) % 10000:04d}",
+            "source": "tool_training",
+            "type": "player_info_lookup",
+            "messages": messages,
+        }
+        all_examples.append(result)
+        counts["player_info_lookup"] += 1
+
+    # --- 6. Server state check examples ---
+    for sex in SERVER_STATE_EXAMPLES:
+        state = simulate_server_state()
+        players = state["online_players"]
+
+        messages = [build_system_message("sudo")]
+        messages.append(build_user_message(sex["user_request"]))
+        messages.append(build_tool_call("world.server_state", {}))
+        messages.append(build_tool_result(state))
+
+        # Resolve commands that reference {p} for each player
+        resolved_cmds = []
+        for cmd in sex["commands"]:
+            if "{p}" in cmd:
+                for p in players:
+                    resolved_cmds.append(cmd.format(p=p))
+            else:
+                resolved_cmds.append(cmd)
+
+        for cmd in resolved_cmds:
+            messages.append(build_tool_call("rcon.execute", {"command": cmd}))
+            messages.append(build_tool_result({
+                "success": True,
+                "result": simulate_rcon_success(cmd, players[0] if players else "player"),
+            }))
+
+        final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": sex["reasoning"]}
+        messages.append(build_assistant_final(final))
+
+        result = {
+            "id": f"tool-serverstate-{hash(sex['user_request']) % 10000:04d}",
+            "source": "tool_training",
+            "type": "server_state_check",
+            "messages": messages,
+        }
+        all_examples.append(result)
+        counts["server_state_check"] += 1
+
+    return all_examples, counts
+
+
+def main():
+    print("=" * 60)
+    print("Tool-Calling Training Data Generator")
+    print("=" * 60)
+    print(f"\nSeed dataset: {SEED_PATH}")
+    print(f"Output:       {OUTPUT_PATH}")
+
+    if not SEED_PATH.exists():
+        print(f"\nERROR: Seed dataset not found at {SEED_PATH}")
+        sys.exit(1)
+
+    all_examples, counts = generate_all()
+
+    # Write output
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        for ex in all_examples:
+            # Store both the structured messages and the formatted Qwen3 text
+            ex["qwen3_text"] = format_qwen3(ex["messages"])
+            f.write(json.dumps(ex) + "\n")
+
+    # Summary
+    total = len(all_examples)
+    print(f"\nGenerated {total} tool-calling training examples:\n")
+    print(f"  {'Type':<25} {'Count':>6}")
+    print(f"  {'-'*25} {'-'*6}")
+    for typ, count in sorted(counts.items()):
+        print(f"  {typ:<25} {count:>6}")
+    print(f"  {'-'*25} {'-'*6}")
+    print(f"  {'TOTAL':<25} {total:>6}")
+
+    # Validate a sample
+    print(f"\n--- Sample validation ---")
+    sample = random.choice(all_examples)
+    print(f"  ID:   {sample['id']}")
+    print(f"  Type: {sample['type']}")
+    print(f"  Turns: {len(sample['messages'])}")
+    roles = [m['role'] for m in sample['messages']]
+    print(f"  Roles: {' -> '.join(roles)}")
+    print(f"\n  Qwen3 text preview (first 500 chars):")
+    print(f"  {sample['qwen3_text'][:500]}")
+
+    print(f"\nOutput written to: {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()