Mortdecai/training/scripts/generate_tool_training.py

#!/usr/bin/env python3
"""
Generate multi-turn tool-calling training data for the Minecraft AI God model.

Reads data/processed/seed_dataset.jsonl and produces data/processed/tool_training.jsonl
with Qwen3-format multi-turn conversations that teach the model to:
  1. Call rcon.execute and handle success/error results
  2. Self-correct on RCON errors (retry with fixed command)
  3. Use minecraft.wiki_lookup when unsure about syntax
  4. Use world.player_info / world.server_state for context-dependent actions

Usage:
    python training/scripts/generate_tool_training.py
"""

import json
import random
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional

# Ensure project root is importable
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

from agent.tools.tool_schemas import (
    QWEN3_TOOLS,
    RCON_ERROR_PATTERNS,
    ERROR_SCENARIOS,
    qwen3_tools_block,
)
from agent.prompts.system_prompts import (
    SUDO_SYSTEM_PROMPT,
    GOD_SYSTEM_PROMPT,
    SYNTAX_RULES,
    RISK_GRADIENT,
)

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
SEED_PATH = PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl"
OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl"

# ---------------------------------------------------------------------------
# System prompt with tools block for Qwen3 format
# ---------------------------------------------------------------------------
TOOLS_BLOCK = qwen3_tools_block()

SUDO_TOOL_SYSTEM = (
    "You are a Minecraft 1.21 command translator for a server admin. "
    "You receive natural language requests and return valid RCON commands.\n\n"
    "PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
    "You have access to tools. Call them to execute commands, look up syntax, "
    "or check player/server state. When a command fails, analyze the error "
    "and retry with a corrected command.\n\n"
    "After all tool calls resolve, respond with JSON:\n"
    '{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
    + SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)

GOD_TOOL_SYSTEM = (
    "You are God in a Minecraft server. Players pray to you and you respond "
    "with divine judgment.\n\n"
    "You have access to tools. Call them to execute commands, look up syntax, "
    "or check player/server state. When a command fails, analyze the error "
    "and retry with a corrected command.\n\n"
    "After all tool calls resolve, respond with JSON:\n"
    '{"risk_level": <int 0-5>, "message": "Your divine response", '
    '"commands": ["cmd1", ...], "reasoning": "why"}\n\n'
    + SYNTAX_RULES + "\n" + TOOLS_BLOCK
)

# ---------------------------------------------------------------------------
# Player names used across the dataset
# ---------------------------------------------------------------------------
DEFAULT_PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]

# ---------------------------------------------------------------------------
# Simulated success responses for common commands
# ---------------------------------------------------------------------------

def simulate_rcon_success(command: str, player: str = "slingshooter08") -> str:
    """Generate a plausible RCON success message for a command."""
    cmd = command.strip()
    if cmd.startswith("give "):
        # Parse item from give command
        parts = cmd.split()
        item = parts[2] if len(parts) > 2 else "item"
        count = parts[3] if len(parts) > 3 else "1"
        item_display = item.replace("minecraft:", "").replace("_", " ").title()
        if "[" in item_display:
            item_display = item_display.split("[")[0].strip()
        return f"Gave {count} [{item_display}] to {player}"
    elif cmd.startswith("effect give "):
        parts = cmd.split()
        effect = parts[2] if len(parts) > 2 else ""
        eff_name = parts[3].replace("minecraft:", "") if len(parts) > 3 else "effect"
        return f"Applied effect {eff_name} to {effect}"
    elif cmd.startswith("effect clear "):
        target = cmd.split()[2] if len(cmd.split()) > 2 else player
        return f"Removed every effect from {target}"
    elif cmd.startswith("tp ") or cmd.startswith("teleport "):
        return f"Teleported {player}"
    elif cmd.startswith("weather "):
        weather_type = cmd.split()[1] if len(cmd.split()) > 1 else "clear"
        return f"Changing to {weather_type}"
    elif cmd.startswith("time set "):
        val = cmd.split("time set ")[1] if "time set " in cmd else "day"
        return f"Set the time to {val}"
    elif cmd.startswith("gamemode "):
        mode = cmd.split()[1] if len(cmd.split()) > 1 else "creative"
        return f"Set own game mode to {mode.title()}"
    elif cmd.startswith("kill "):
        return "Killed entity"
    elif cmd.startswith("summon "):
        return "Summoned new entity"
    elif cmd.startswith("setblock "):
        return "Changed the block at ..."
    elif cmd.startswith("fill "):
        return "Successfully filled ... blocks"
    elif cmd.startswith("clear "):
        return f"Removed items from {player}"
    elif cmd.startswith("xp "):
        return f"Gave experience to {player}"
    elif cmd.startswith("execute "):
        return "Executed command"
    elif cmd.startswith("playsound "):
        return f"Played sound to {player}"
    elif cmd.startswith("title "):
        return "Title displayed"
    elif cmd.startswith("particle "):
        return "Showing particle"
    elif cmd.startswith("enchant "):
        return f"Enchanted item for {player}"
    elif cmd.startswith("spreadplayers "):
        return "Spread players"
    else:
        return "Command executed successfully"


def simulate_player_info(player: str) -> Dict[str, Any]:
    """Generate plausible player info."""
    return {
        "health": round(random.uniform(10.0, 20.0), 1),
        "position": {
            "x": random.randint(-500, 500),
            "y": random.randint(60, 120),
            "z": random.randint(-500, 500),
        },
        "inventory_summary": random.choice([
            "Diamond sword, iron armor, 32 steak, 14 torches",
            "Netherite pickaxe, diamond armor, 64 cobblestone, bow with 28 arrows",
            "Stone tools, leather armor, 12 bread, 3 oak logs",
            "Full diamond gear, 8 golden apples, ender pearls x16",
            "Iron sword, chainmail chest, 24 cooked porkchop, shield",
        ])
    }


def simulate_server_state() -> Dict[str, Any]:
    """Generate plausible server state."""
    ticks = random.randint(0, 24000)
    if ticks < 6000:
        tod = "morning"
    elif ticks < 12000:
        tod = "noon"
    elif ticks < 18000:
        tod = "evening"
    else:
        tod = "night"
    return {
        "time_of_day": tod,
        "weather": random.choice(["clear", "rain", "thunder"]),
        "online_players": random.sample(DEFAULT_PLAYERS, k=random.randint(1, 3)),
        "world_border": 60000000.0,
    }


# ---------------------------------------------------------------------------
# Conversation builders
# ---------------------------------------------------------------------------

def build_system_message(mode: str) -> Dict[str, str]:
    """Return the system message for the given mode."""
    if mode == "god":
        return {"role": "system", "content": GOD_TOOL_SYSTEM}
    return {"role": "system", "content": SUDO_TOOL_SYSTEM}


def build_user_message(user_text: str, context: Optional[Dict] = None) -> Dict[str, str]:
    """Build the user turn."""
    content = user_text
    if context and context.get("online_players"):
        content += f"\n\n[Server context: players online: {', '.join(context['online_players'])}]"
    return {"role": "user", "content": content}


def build_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, str]:
    """Build an assistant turn that makes a tool call."""
    tc = json.dumps({"name": tool_name, "arguments": arguments})
    return {"role": "assistant", "content": f"<tool_call>\n{tc}\n</tool_call>"}


def build_tool_result(result: Dict[str, Any]) -> Dict[str, str]:
    """Build a tool result turn."""
    return {"role": "tool", "content": json.dumps(result)}


def build_assistant_final(response: Dict[str, Any]) -> Dict[str, str]:
    """Build the final assistant JSON response."""
    return {"role": "assistant", "content": json.dumps(response)}


# ---------------------------------------------------------------------------
# Example generators
# ---------------------------------------------------------------------------

def gen_command_success(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a multi-turn conversation for a command_gen example where
    commands succeed on the first try.
    """
    inp = example["input"]
    out = example["output"]
    player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
    commands = out.get("commands", [])
    risk_level = example.get("metadata", {}).get("risk_level", 3)

    if not commands:
        return None

    messages = [build_system_message(mode)]
    messages.append(build_user_message(inp["user_message"], inp.get("server_context")))

    # Execute each command via rcon.execute
    for cmd in commands:
        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
        success_msg = simulate_rcon_success(cmd, player)
        messages.append(build_tool_result({"success": True, "result": success_msg}))

    # Final response
    final = {"risk_level": risk_level, "commands": commands, "reasoning": out.get("reasoning", "")}
    if mode == "god" or out.get("message"):
        final["message"] = out.get("message", "It is done.")
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-{example['id']}-success",
        "source": "tool_training",
        "type": "command_success",
        "messages": messages,
    }


def gen_error_correction_from_negative(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a multi-turn conversation from an example that has negative_output:
    model tries wrong command -> error -> retries with correct command -> success.
    """
    inp = example["input"]
    out = example["output"]
    neg = example.get("negative_output")
    if not neg:
        return None

    player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
    wrong_commands = neg.get("commands", [])
    correct_commands = out.get("commands", [])
    error_text = neg.get("error", "Unknown or incomplete command")
    risk_level = example.get("metadata", {}).get("risk_level", 3)

    if not wrong_commands or not correct_commands:
        return None

    messages = [build_system_message(mode)]
    messages.append(build_user_message(inp["user_message"], inp.get("server_context")))

    # First attempt: wrong command fails
    wrong_cmd = wrong_commands[0]
    messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
    messages.append(build_tool_result({"success": False, "result": error_text}))

    # Retry with correct command(s)
    for cmd in correct_commands:
        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
        success_msg = simulate_rcon_success(cmd, player)
        messages.append(build_tool_result({"success": True, "result": success_msg}))

    # Final response with reasoning about the correction
    reasoning = out.get("reasoning", "")
    if neg.get("error"):
        reasoning = f"First attempt failed: {neg['error']}. {reasoning}"

    final = {"risk_level": risk_level, "commands": correct_commands, "reasoning": reasoning}
    if mode == "god" or out.get("message"):
        final["message"] = out.get("message", "It is done.")
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-{example['id']}-errorcorrect",
        "source": "tool_training",
        "type": "error_correction",
        "messages": messages,
    }


def gen_error_scenario(scenario: Dict[str, Any], user_request: str,
                       player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a multi-turn error correction example from a predefined error scenario.
    """
    wrong_cmd = scenario["wrong_command"].format(player=player)
    correct_cmd = scenario["correct_command"].format(player=player)
    risk_level = 3

    messages = [build_system_message(mode)]
    messages.append(build_user_message(user_request))

    # Wrong attempt
    messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
    messages.append(build_tool_result({"success": False, "result": scenario["error_message"]}))

    # Corrected attempt
    messages.append(build_tool_call("rcon.execute", {"command": correct_cmd}))
    success_msg = simulate_rcon_success(correct_cmd, player)
    messages.append(build_tool_result({"success": True, "result": success_msg}))

    final = {
        "risk_level": risk_level,
        "commands": [correct_cmd],
        "reasoning": scenario["reasoning"],
    }
    if mode == "god":
        final["message"] = "It is done."
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-scenario-{scenario['id']}",
        "source": "tool_training",
        "type": "error_scenario",
        "messages": messages,
    }


def gen_wiki_lookup(user_request: str, query: str, wiki_content: str,
                    wiki_url: str, resulting_commands: List[str],
                    reasoning: str, player: str = "slingshooter08",
                    mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a conversation where the model looks up wiki info before executing.
    """
    messages = [build_system_message(mode)]
    messages.append(build_user_message(user_request))

    # Wiki lookup
    messages.append(build_tool_call("minecraft.wiki_lookup", {"query": query}))
    messages.append(build_tool_result({"content": wiki_content, "url": wiki_url}))

    # Execute commands
    for cmd in resulting_commands:
        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
        success_msg = simulate_rcon_success(cmd, player)
        messages.append(build_tool_result({"success": True, "result": success_msg}))

    final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-wiki-{hash(user_request) % 10000:04d}",
        "source": "tool_training",
        "type": "wiki_lookup",
        "messages": messages,
    }


def gen_player_info_lookup(user_request: str, player: str,
                           resulting_commands: List[str], reasoning: str,
                           mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a conversation where the model checks player info before acting.
    """
    messages = [build_system_message(mode)]
    messages.append(build_user_message(user_request))

    # Get player info
    messages.append(build_tool_call("world.player_info", {"player": player}))
    pinfo = simulate_player_info(player)
    messages.append(build_tool_result(pinfo))

    # Use the position in commands (substitute coordinates)
    pos = pinfo["position"]
    resolved_cmds = []
    for cmd in resulting_commands:
        resolved = cmd.format(x=pos["x"], y=pos["y"], z=pos["z"], player=player)
        resolved_cmds.append(resolved)

    for cmd in resolved_cmds:
        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
        success_msg = simulate_rcon_success(cmd, player)
        messages.append(build_tool_result({"success": True, "result": success_msg}))

    final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-playerinfo-{hash(user_request) % 10000:04d}",
        "source": "tool_training",
        "type": "player_info_lookup",
        "messages": messages,
    }


def gen_server_state_check(user_request: str, resulting_commands: List[str],
                           reasoning: str, player: str = "slingshooter08",
                           mode: str = "sudo") -> Dict[str, Any]:
    """
    Generate a conversation where the model checks server state before acting.
    """
    messages = [build_system_message(mode)]
    messages.append(build_user_message(user_request))

    # Check server state
    messages.append(build_tool_call("world.server_state", {}))
    state = simulate_server_state()
    messages.append(build_tool_result(state))

    for cmd in resulting_commands:
        messages.append(build_tool_call("rcon.execute", {"command": cmd}))
        success_msg = simulate_rcon_success(cmd, player)
        messages.append(build_tool_result({"success": True, "result": success_msg}))

    final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
    messages.append(build_assistant_final(final))

    return {
        "id": f"tool-serverstate-{hash(user_request) % 10000:04d}",
        "source": "tool_training",
        "type": "server_state_check",
        "messages": messages,
    }


# ---------------------------------------------------------------------------
# Handcrafted wiki/player/server lookup examples
# ---------------------------------------------------------------------------

WIKI_LOOKUP_EXAMPLES = [
    {
        "user_request": "sudo enchant my trident with the best enchants",
        "query": "trident enchantments 1.21",
        "wiki_content": (
            "Trident enchantments in Minecraft 1.21:\n"
            "- Loyalty (1-3): Trident returns after being thrown\n"
            "- Channeling (1): Summons lightning during thunderstorms\n"
            "- Riptide (1-3): Launches player when thrown in water/rain\n"
            "- Impaling (1-5): Extra damage to aquatic mobs\n"
            "- Unbreaking (1-3): Increases durability\n"
            "- Mending (1): Repairs with XP\n"
            "Note: Loyalty and Riptide are mutually exclusive. Channeling and Riptide are mutually exclusive."
        ),
        "wiki_url": "https://minecraft.wiki/w/Trident#Enchantments",
        "commands": [
            "give {player} minecraft:trident[enchantments={{loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}}] 1"
        ],
        "reasoning": "Looked up trident enchantments. Used loyalty+channeling (not riptide, since loyalty and riptide are mutually exclusive). Max levels on all.",
    },
    {
        "user_request": "sudo what's the command to set a custom player head?",
        "query": "player head command give 1.21",
        "wiki_content": (
            "To give a player head with a custom skin in 1.21:\n"
            "give <player> minecraft:player_head[profile={name:\"<player_name>\"}] 1\n"
            "This creates a head with the skin of the specified player."
        ),
        "wiki_url": "https://minecraft.wiki/w/Player_Head",
        "commands": [
            'give {player} minecraft:player_head[profile={{name:"{player}"}}] 1'
        ],
        "reasoning": "Looked up player head syntax for 1.21 component format.",
    },
    {
        "user_request": "sudo how do I make a custom spawner?",
        "query": "spawner command setblock 1.21",
        "wiki_content": (
            "To place a mob spawner in 1.21:\n"
            "setblock <x> <y> <z> minecraft:spawner{SpawnData:{entity:{id:\"minecraft:<mob>\"}},Delay:20}\n"
            "Or use /give for a spawner item."
        ),
        "wiki_url": "https://minecraft.wiki/w/Monster_Spawner",
        "commands": [
            'setblock ~ ~1 ~ minecraft:spawner{{SpawnData:{{entity:{{id:"minecraft:zombie"}}}},Delay:20}}'
        ],
        "reasoning": "Looked up spawner syntax. Using setblock with SpawnData NBT for zombie spawner.",
    },
    {
        "user_request": "sudo what potion effects can I use for a speed build challenge?",
        "query": "speed and haste effects minecraft 1.21",
        "wiki_content": (
            "Relevant effects for speed building:\n"
            "- minecraft:speed (1-255): Increases movement speed\n"
            "- minecraft:haste (1-255): Increases mining/attack speed\n"
            "- minecraft:jump_boost (1-255): Increases jump height\n"
            "Duration is in seconds. Amplifier 0 = level 1."
        ),
        "wiki_url": "https://minecraft.wiki/w/Effect",
        "commands": [
            "effect give {player} minecraft:speed 600 2",
            "effect give {player} minecraft:haste 600 2",
            "effect give {player} minecraft:jump_boost 600 1",
        ],
        "reasoning": "Looked up speed-related effects. Gave speed 3, haste 3, and jump boost 2 for 10 minutes.",
    },
    {
        "user_request": "sudo give me a crossbow with multishot and quick charge",
        "query": "crossbow enchantments multishot quick charge 1.21",
        "wiki_content": (
            "Crossbow enchantments in 1.21:\n"
            "- Quick Charge (1-3): Reduces reload time\n"
            "- Multishot (1): Fires 3 arrows at once\n"
            "- Piercing (1-4): Arrows pass through entities\n"
            "- Unbreaking (1-3): Durability\n"
            "- Mending (1): XP repair\n"
            "Note: Multishot and Piercing are mutually exclusive."
        ),
        "wiki_url": "https://minecraft.wiki/w/Crossbow#Enchantments",
        "commands": [
            "give {player} minecraft:crossbow[enchantments={{multishot:1,quick_charge:3,unbreaking:3,mending:1}}] 1"
        ],
        "reasoning": "Looked up crossbow enchantments. Multishot and piercing are exclusive; chose multishot as requested. Max quick charge.",
    },
]

PLAYER_INFO_EXAMPLES = [
    {
        "user_request": "sudo build a diamond block tower where I'm standing",
        "player": "slingshooter08",
        "commands": [
            "setblock {x} {y} {z} minecraft:diamond_block",
            "setblock {x} {y_1} {z} minecraft:diamond_block",
            "setblock {x} {y_2} {z} minecraft:diamond_block",
            "setblock {x} {y_3} {z} minecraft:diamond_block",
            "setblock {x} {y_4} {z} minecraft:diamond_block",
        ],
        "reasoning": "Got player position, then placed 5 diamond blocks in a tower at their location.",
    },
    {
        "user_request": "sudo teleport me 100 blocks north",
        "player": "slingshooter08",
        "commands": ["tp {player} {x} {y} {z_minus_100}"],
        "reasoning": "Got player position, then teleported 100 blocks north (negative Z direction).",
    },
    {
        "user_request": "sudo put a glass dome over my head",
        "player": "slingshooter08",
        "commands": [
            "fill {x_m5} {y} {z_m5} {x_p5} {y_p10} {z_p5} minecraft:glass hollow",
        ],
        "reasoning": "Got player position to calculate dome coordinates. Used fill hollow with glass.",
    },
    {
        "user_request": "sudo light up the area around me with glowstone",
        "player": "slingshooter08",
        "commands": [
            "setblock {x_p3} {y_p5} {z} minecraft:glowstone",
            "setblock {x_m3} {y_p5} {z} minecraft:glowstone",
            "setblock {x} {y_p5} {z_p3} minecraft:glowstone",
            "setblock {x} {y_p5} {z_m3} minecraft:glowstone",
        ],
        "reasoning": "Got player position, placed glowstone lights at 4 cardinal points above the player.",
    },
]

SERVER_STATE_EXAMPLES = [
    {
        "user_request": "sudo make it daytime if it's nighttime",
        "commands": ["time set day"],
        "reasoning": "Checked server state: it was nighttime, so set time to day.",
    },
    {
        "user_request": "sudo clear weather if it's raining",
        "commands": ["weather clear"],
        "reasoning": "Checked server state: weather was rain, so cleared it.",
    },
    {
        "user_request": "sudo give everyone online a diamond",
        "commands": [
            "give {p} minecraft:diamond 1"
        ],
        "reasoning": "Checked server state to get online player list, then gave each player a diamond.",
    },
    {
        "user_request": "sudo heal everyone on the server",
        "commands": [
            "effect give {p} minecraft:instant_health 1 5"
        ],
        "reasoning": "Checked server state for online players, then healed each one.",
    },
]

# Requests that map to error scenarios for natural phrasing
ERROR_SCENARIO_REQUESTS = {
    "missing_prefix": "sudo give me a diamond sword",
    "old_nbt_enchantments": "sudo give me a max enchanted diamond sword",
    "invalid_effect_name": "sudo give me haste effect",
    "wrong_item_bed": "sudo give me a bed",
    "wrong_item_log": "sudo give me a stack of logs",
    "count_wrong_position": "sudo give me 64 diamonds",
    "effect_missing_give": "sudo give me speed",
    "weather_storm": "sudo make it storm",
    "gamemode_abbreviation": "sudo put me in creative",
    "wrong_item_grass": "sudo give me some grass",
    "summon_no_prefix": "sudo spawn a zombie near me",
    "old_zombie_pigman": "sudo summon a zombie pigman",
}


# ---------------------------------------------------------------------------
# Format to Qwen3 chat template string
# ---------------------------------------------------------------------------

def format_qwen3(messages: List[Dict[str, str]]) -> str:
    """Convert messages list to Qwen3 chat template format."""
    parts = []
    for msg in messages:
        role = msg["role"]
        content = msg["content"]
        parts.append(f"<|im_start|>{role}\n{content}\n<|im_end|>")
    return "\n".join(parts)


# ---------------------------------------------------------------------------
# Main generation pipeline
# ---------------------------------------------------------------------------

def load_seed_data() -> List[Dict[str, Any]]:
    """Load the seed dataset."""
    examples = []
    with open(SEED_PATH) as f:
        for line in f:
            line = line.strip()
            if line:
                examples.append(json.loads(line))
    return examples


def generate_all() -> List[Dict[str, Any]]:
    """Generate all tool-calling training examples."""
    random.seed(42)
    seed_data = load_seed_data()
    all_examples = []
    counts = {
        "command_success": 0,
        "error_correction": 0,
        "error_scenario": 0,
        "wiki_lookup": 0,
        "player_info_lookup": 0,
        "server_state_check": 0,
    }

    # --- 1. Command success examples from seed data ---
    for ex in seed_data:
        cat = ex.get("category", "")
        if cat in ("command_gen", "safety", "troubleshoot"):
            commands = ex.get("output", {}).get("commands", [])
            if not commands:
                continue
            # Decide mode: if it has a "message" field, treat as god
            mode = "god" if ex.get("output", {}).get("message") else "sudo"
            result = gen_command_success(ex, mode=mode)
            if result:
                all_examples.append(result)
                counts["command_success"] += 1

    # --- 2. Error correction from negative_output examples ---
    for ex in seed_data:
        neg = ex.get("negative_output")
        if neg and neg.get("commands"):
            mode = "god" if ex.get("output", {}).get("message") else "sudo"
            result = gen_error_correction_from_negative(ex, mode=mode)
            if result:
                all_examples.append(result)
                counts["error_correction"] += 1

    # --- 3. Error scenario examples ---
    for scenario in ERROR_SCENARIOS:
        user_req = ERROR_SCENARIO_REQUESTS.get(scenario["id"], "sudo do something")
        player = random.choice(DEFAULT_PLAYERS)
        # Generate both sudo and god mode variants for each scenario
        for mode in ["sudo", "god"]:
            result = gen_error_scenario(scenario, user_req, player=player, mode=mode)
            if result:
                result["id"] += f"-{mode}"
                all_examples.append(result)
                counts["error_scenario"] += 1

    # --- 4. Wiki lookup examples ---
    for wex in WIKI_LOOKUP_EXAMPLES:
        player = random.choice(DEFAULT_PLAYERS)
        resolved_cmds = [c.format(player=player) for c in wex["commands"]]
        result = gen_wiki_lookup(
            user_request=wex["user_request"],
            query=wex["query"],
            wiki_content=wex["wiki_content"],
            wiki_url=wex["wiki_url"],
            resulting_commands=resolved_cmds,
            reasoning=wex["reasoning"],
            player=player,
        )
        if result:
            all_examples.append(result)
            counts["wiki_lookup"] += 1

    # --- 5. Player info lookup examples ---
    for pex in PLAYER_INFO_EXAMPLES:
        player = pex.get("player", "slingshooter08")

        # We need to generate plausible coordinates for the commands
        pos = simulate_player_info(player)["position"]
        x, y, z = pos["x"], pos["y"], pos["z"]

        # Build command templates with coordinates
        resolved_cmds = []
        for cmd in pex["commands"]:
            resolved = cmd.format(
                player=player, x=x, y=y, z=z,
                y_1=y+1, y_2=y+2, y_3=y+3, y_4=y+4,
                z_minus_100=z-100,
                x_m5=x-5, x_p5=x+5, z_m5=z-5, z_p5=z+5,
                y_p5=y+5, y_p10=y+10,
                x_p3=x+3, x_m3=x-3, z_p3=z+3, z_m3=z-3,
            )
            resolved_cmds.append(resolved)

        messages = [build_system_message("sudo")]
        messages.append(build_user_message(pex["user_request"]))
        messages.append(build_tool_call("world.player_info", {"player": player}))
        messages.append(build_tool_result({
            "health": 20.0,
            "position": pos,
            "inventory_summary": "Diamond gear, various items",
        }))

        for cmd in resolved_cmds:
            messages.append(build_tool_call("rcon.execute", {"command": cmd}))
            messages.append(build_tool_result({
                "success": True,
                "result": simulate_rcon_success(cmd, player),
            }))

        final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": pex["reasoning"]}
        messages.append(build_assistant_final(final))

        result = {
            "id": f"tool-playerinfo-{hash(pex['user_request']) % 10000:04d}",
            "source": "tool_training",
            "type": "player_info_lookup",
            "messages": messages,
        }
        all_examples.append(result)
        counts["player_info_lookup"] += 1

    # --- 6. Server state check examples ---
    for sex in SERVER_STATE_EXAMPLES:
        state = simulate_server_state()
        players = state["online_players"]

        messages = [build_system_message("sudo")]
        messages.append(build_user_message(sex["user_request"]))
        messages.append(build_tool_call("world.server_state", {}))
        messages.append(build_tool_result(state))

        # Resolve commands that reference {p} for each player
        resolved_cmds = []
        for cmd in sex["commands"]:
            if "{p}" in cmd:
                for p in players:
                    resolved_cmds.append(cmd.format(p=p))
            else:
                resolved_cmds.append(cmd)

        for cmd in resolved_cmds:
            messages.append(build_tool_call("rcon.execute", {"command": cmd}))
            messages.append(build_tool_result({
                "success": True,
                "result": simulate_rcon_success(cmd, players[0] if players else "player"),
            }))

        final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": sex["reasoning"]}
        messages.append(build_assistant_final(final))

        result = {
            "id": f"tool-serverstate-{hash(sex['user_request']) % 10000:04d}",
            "source": "tool_training",
            "type": "server_state_check",
            "messages": messages,
        }
        all_examples.append(result)
        counts["server_state_check"] += 1

    return all_examples, counts


def main():
    print("=" * 60)
    print("Tool-Calling Training Data Generator")
    print("=" * 60)
    print(f"\nSeed dataset: {SEED_PATH}")
    print(f"Output:       {OUTPUT_PATH}")

    if not SEED_PATH.exists():
        print(f"\nERROR: Seed dataset not found at {SEED_PATH}")
        sys.exit(1)

    all_examples, counts = generate_all()

    # Write output
    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
    with open(OUTPUT_PATH, "w") as f:
        for ex in all_examples:
            # Store both the structured messages and the formatted Qwen3 text
            ex["qwen3_text"] = format_qwen3(ex["messages"])
            f.write(json.dumps(ex) + "\n")

    # Summary
    total = len(all_examples)
    print(f"\nGenerated {total} tool-calling training examples:\n")
    print(f"  {'Type':<25} {'Count':>6}")
    print(f"  {'-'*25} {'-'*6}")
    for typ, count in sorted(counts.items()):
        print(f"  {typ:<25} {count:>6}")
    print(f"  {'-'*25} {'-'*6}")
    print(f"  {'TOTAL':<25} {total:>6}")

    # Validate a sample
    print(f"\n--- Sample validation ---")
    sample = random.choice(all_examples)
    print(f"  ID:   {sample['id']}")
    print(f"  Type: {sample['type']}")
    print(f"  Turns: {len(sample['messages'])}")
    roles = [m['role'] for m in sample['messages']]
    print(f"  Roles: {' -> '.join(roles)}")
    print(f"\n  Qwen3 text preview (first 500 chars):")
    print(f"  {sample['qwen3_text'][:500]}")

    print(f"\nOutput written to: {OUTPUT_PATH}")


if __name__ == "__main__":
    main()