Tool-calling training: 1,159 multi-turn examples with error correction

Tool schemas (agent/tools/tool_schemas.py):
- rcon.execute: execute commands, get success/error results
- minecraft.wiki_lookup: look up syntax and item info
- world.player_info: player health, position, inventory
- world.server_state: time, weather, online players
- 10 RCON error patterns with corrections
- 12 common error scenarios for training

Training data generator (training/scripts/generate_tool_training.py):
- Converts seed dataset to multi-turn tool conversations
- Error correction: model tries wrong command → gets error → self-corrects
- Wiki/player/server lookups for uncertainty scenarios
- Qwen3 native tool-calling format with <tool_call> tags

1,159 examples: 1043 success, 79 error correction, 24 error scenarios,
13 tool lookups. Ready for v4 training.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 18:49:08 -04:00
parent 4e83da39fd
commit ee764cd22a
3 changed files with 2417 additions and 0 deletions
+850
View File
@@ -0,0 +1,850 @@
#!/usr/bin/env python3
"""
Generate multi-turn tool-calling training data for the Minecraft AI God model.
Reads data/processed/seed_dataset.jsonl and produces data/processed/tool_training.jsonl
with Qwen3-format multi-turn conversations that teach the model to:
1. Call rcon.execute and handle success/error results
2. Self-correct on RCON errors (retry with fixed command)
3. Use minecraft.wiki_lookup when unsure about syntax
4. Use world.player_info / world.server_state for context-dependent actions
Usage:
python training/scripts/generate_tool_training.py
"""
import json
import random
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
# Ensure project root is importable
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.tool_schemas import (
QWEN3_TOOLS,
RCON_ERROR_PATTERNS,
ERROR_SCENARIOS,
qwen3_tools_block,
)
from agent.prompts.system_prompts import (
SUDO_SYSTEM_PROMPT,
GOD_SYSTEM_PROMPT,
SYNTAX_RULES,
RISK_GRADIENT,
)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
SEED_PATH = PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl"
OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl"
# ---------------------------------------------------------------------------
# System prompt with tools block for Qwen3 format
# ---------------------------------------------------------------------------
TOOLS_BLOCK = qwen3_tools_block()
SUDO_TOOL_SYSTEM = (
"You are a Minecraft 1.21 command translator for a server admin. "
"You receive natural language requests and return valid RCON commands.\n\n"
"PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
GOD_TOOL_SYSTEM = (
"You are God in a Minecraft server. Players pray to you and you respond "
"with divine judgment.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "message": "Your divine response", '
'"commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
# ---------------------------------------------------------------------------
# Player names used across the dataset
# ---------------------------------------------------------------------------
DEFAULT_PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
# ---------------------------------------------------------------------------
# Simulated success responses for common commands
# ---------------------------------------------------------------------------
def simulate_rcon_success(command: str, player: str = "slingshooter08") -> str:
"""Generate a plausible RCON success message for a command."""
cmd = command.strip()
if cmd.startswith("give "):
# Parse item from give command
parts = cmd.split()
item = parts[2] if len(parts) > 2 else "item"
count = parts[3] if len(parts) > 3 else "1"
item_display = item.replace("minecraft:", "").replace("_", " ").title()
if "[" in item_display:
item_display = item_display.split("[")[0].strip()
return f"Gave {count} [{item_display}] to {player}"
elif cmd.startswith("effect give "):
parts = cmd.split()
effect = parts[2] if len(parts) > 2 else ""
eff_name = parts[3].replace("minecraft:", "") if len(parts) > 3 else "effect"
return f"Applied effect {eff_name} to {effect}"
elif cmd.startswith("effect clear "):
target = cmd.split()[2] if len(cmd.split()) > 2 else player
return f"Removed every effect from {target}"
elif cmd.startswith("tp ") or cmd.startswith("teleport "):
return f"Teleported {player}"
elif cmd.startswith("weather "):
weather_type = cmd.split()[1] if len(cmd.split()) > 1 else "clear"
return f"Changing to {weather_type}"
elif cmd.startswith("time set "):
val = cmd.split("time set ")[1] if "time set " in cmd else "day"
return f"Set the time to {val}"
elif cmd.startswith("gamemode "):
mode = cmd.split()[1] if len(cmd.split()) > 1 else "creative"
return f"Set own game mode to {mode.title()}"
elif cmd.startswith("kill "):
return "Killed entity"
elif cmd.startswith("summon "):
return "Summoned new entity"
elif cmd.startswith("setblock "):
return "Changed the block at ..."
elif cmd.startswith("fill "):
return "Successfully filled ... blocks"
elif cmd.startswith("clear "):
return f"Removed items from {player}"
elif cmd.startswith("xp "):
return f"Gave experience to {player}"
elif cmd.startswith("execute "):
return "Executed command"
elif cmd.startswith("playsound "):
return f"Played sound to {player}"
elif cmd.startswith("title "):
return "Title displayed"
elif cmd.startswith("particle "):
return "Showing particle"
elif cmd.startswith("enchant "):
return f"Enchanted item for {player}"
elif cmd.startswith("spreadplayers "):
return "Spread players"
else:
return "Command executed successfully"
def simulate_player_info(player: str) -> Dict[str, Any]:
"""Generate plausible player info."""
return {
"health": round(random.uniform(10.0, 20.0), 1),
"position": {
"x": random.randint(-500, 500),
"y": random.randint(60, 120),
"z": random.randint(-500, 500),
},
"inventory_summary": random.choice([
"Diamond sword, iron armor, 32 steak, 14 torches",
"Netherite pickaxe, diamond armor, 64 cobblestone, bow with 28 arrows",
"Stone tools, leather armor, 12 bread, 3 oak logs",
"Full diamond gear, 8 golden apples, ender pearls x16",
"Iron sword, chainmail chest, 24 cooked porkchop, shield",
])
}
def simulate_server_state() -> Dict[str, Any]:
"""Generate plausible server state."""
ticks = random.randint(0, 24000)
if ticks < 6000:
tod = "morning"
elif ticks < 12000:
tod = "noon"
elif ticks < 18000:
tod = "evening"
else:
tod = "night"
return {
"time_of_day": tod,
"weather": random.choice(["clear", "rain", "thunder"]),
"online_players": random.sample(DEFAULT_PLAYERS, k=random.randint(1, 3)),
"world_border": 60000000.0,
}
# ---------------------------------------------------------------------------
# Conversation builders
# ---------------------------------------------------------------------------
def build_system_message(mode: str) -> Dict[str, str]:
"""Return the system message for the given mode."""
if mode == "god":
return {"role": "system", "content": GOD_TOOL_SYSTEM}
return {"role": "system", "content": SUDO_TOOL_SYSTEM}
def build_user_message(user_text: str, context: Optional[Dict] = None) -> Dict[str, str]:
"""Build the user turn."""
content = user_text
if context and context.get("online_players"):
content += f"\n\n[Server context: players online: {', '.join(context['online_players'])}]"
return {"role": "user", "content": content}
def build_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, str]:
"""Build an assistant turn that makes a tool call."""
tc = json.dumps({"name": tool_name, "arguments": arguments})
return {"role": "assistant", "content": f"<tool_call>\n{tc}\n</tool_call>"}
def build_tool_result(result: Dict[str, Any]) -> Dict[str, str]:
"""Build a tool result turn."""
return {"role": "tool", "content": json.dumps(result)}
def build_assistant_final(response: Dict[str, Any]) -> Dict[str, str]:
"""Build the final assistant JSON response."""
return {"role": "assistant", "content": json.dumps(response)}
# ---------------------------------------------------------------------------
# Example generators
# ---------------------------------------------------------------------------
def gen_command_success(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn conversation for a command_gen example where
commands succeed on the first try.
"""
inp = example["input"]
out = example["output"]
player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
commands = out.get("commands", [])
risk_level = example.get("metadata", {}).get("risk_level", 3)
if not commands:
return None
messages = [build_system_message(mode)]
messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
# Execute each command via rcon.execute
for cmd in commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
# Final response
final = {"risk_level": risk_level, "commands": commands, "reasoning": out.get("reasoning", "")}
if mode == "god" or out.get("message"):
final["message"] = out.get("message", "It is done.")
messages.append(build_assistant_final(final))
return {
"id": f"tool-{example['id']}-success",
"source": "tool_training",
"type": "command_success",
"messages": messages,
}
def gen_error_correction_from_negative(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn conversation from an example that has negative_output:
model tries wrong command -> error -> retries with correct command -> success.
"""
inp = example["input"]
out = example["output"]
neg = example.get("negative_output")
if not neg:
return None
player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
wrong_commands = neg.get("commands", [])
correct_commands = out.get("commands", [])
error_text = neg.get("error", "Unknown or incomplete command")
risk_level = example.get("metadata", {}).get("risk_level", 3)
if not wrong_commands or not correct_commands:
return None
messages = [build_system_message(mode)]
messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
# First attempt: wrong command fails
wrong_cmd = wrong_commands[0]
messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
messages.append(build_tool_result({"success": False, "result": error_text}))
# Retry with correct command(s)
for cmd in correct_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
# Final response with reasoning about the correction
reasoning = out.get("reasoning", "")
if neg.get("error"):
reasoning = f"First attempt failed: {neg['error']}. {reasoning}"
final = {"risk_level": risk_level, "commands": correct_commands, "reasoning": reasoning}
if mode == "god" or out.get("message"):
final["message"] = out.get("message", "It is done.")
messages.append(build_assistant_final(final))
return {
"id": f"tool-{example['id']}-errorcorrect",
"source": "tool_training",
"type": "error_correction",
"messages": messages,
}
def gen_error_scenario(scenario: Dict[str, Any], user_request: str,
player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn error correction example from a predefined error scenario.
"""
wrong_cmd = scenario["wrong_command"].format(player=player)
correct_cmd = scenario["correct_command"].format(player=player)
risk_level = 3
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Wrong attempt
messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
messages.append(build_tool_result({"success": False, "result": scenario["error_message"]}))
# Corrected attempt
messages.append(build_tool_call("rcon.execute", {"command": correct_cmd}))
success_msg = simulate_rcon_success(correct_cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {
"risk_level": risk_level,
"commands": [correct_cmd],
"reasoning": scenario["reasoning"],
}
if mode == "god":
final["message"] = "It is done."
messages.append(build_assistant_final(final))
return {
"id": f"tool-scenario-{scenario['id']}",
"source": "tool_training",
"type": "error_scenario",
"messages": messages,
}
def gen_wiki_lookup(user_request: str, query: str, wiki_content: str,
wiki_url: str, resulting_commands: List[str],
reasoning: str, player: str = "slingshooter08",
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model looks up wiki info before executing.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Wiki lookup
messages.append(build_tool_call("minecraft.wiki_lookup", {"query": query}))
messages.append(build_tool_result({"content": wiki_content, "url": wiki_url}))
# Execute commands
for cmd in resulting_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-wiki-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "wiki_lookup",
"messages": messages,
}
def gen_player_info_lookup(user_request: str, player: str,
resulting_commands: List[str], reasoning: str,
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model checks player info before acting.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Get player info
messages.append(build_tool_call("world.player_info", {"player": player}))
pinfo = simulate_player_info(player)
messages.append(build_tool_result(pinfo))
# Use the position in commands (substitute coordinates)
pos = pinfo["position"]
resolved_cmds = []
for cmd in resulting_commands:
resolved = cmd.format(x=pos["x"], y=pos["y"], z=pos["z"], player=player)
resolved_cmds.append(resolved)
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-playerinfo-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "player_info_lookup",
"messages": messages,
}
def gen_server_state_check(user_request: str, resulting_commands: List[str],
reasoning: str, player: str = "slingshooter08",
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model checks server state before acting.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Check server state
messages.append(build_tool_call("world.server_state", {}))
state = simulate_server_state()
messages.append(build_tool_result(state))
for cmd in resulting_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-serverstate-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "server_state_check",
"messages": messages,
}
# ---------------------------------------------------------------------------
# Handcrafted wiki/player/server lookup examples
# ---------------------------------------------------------------------------
WIKI_LOOKUP_EXAMPLES = [
{
"user_request": "sudo enchant my trident with the best enchants",
"query": "trident enchantments 1.21",
"wiki_content": (
"Trident enchantments in Minecraft 1.21:\n"
"- Loyalty (1-3): Trident returns after being thrown\n"
"- Channeling (1): Summons lightning during thunderstorms\n"
"- Riptide (1-3): Launches player when thrown in water/rain\n"
"- Impaling (1-5): Extra damage to aquatic mobs\n"
"- Unbreaking (1-3): Increases durability\n"
"- Mending (1): Repairs with XP\n"
"Note: Loyalty and Riptide are mutually exclusive. Channeling and Riptide are mutually exclusive."
),
"wiki_url": "https://minecraft.wiki/w/Trident#Enchantments",
"commands": [
"give {player} minecraft:trident[enchantments={{loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}}] 1"
],
"reasoning": "Looked up trident enchantments. Used loyalty+channeling (not riptide, since loyalty and riptide are mutually exclusive). Max levels on all.",
},
{
"user_request": "sudo what's the command to set a custom player head?",
"query": "player head command give 1.21",
"wiki_content": (
"To give a player head with a custom skin in 1.21:\n"
"give <player> minecraft:player_head[profile={name:\"<player_name>\"}] 1\n"
"This creates a head with the skin of the specified player."
),
"wiki_url": "https://minecraft.wiki/w/Player_Head",
"commands": [
'give {player} minecraft:player_head[profile={{name:"{player}"}}] 1'
],
"reasoning": "Looked up player head syntax for 1.21 component format.",
},
{
"user_request": "sudo how do I make a custom spawner?",
"query": "spawner command setblock 1.21",
"wiki_content": (
"To place a mob spawner in 1.21:\n"
"setblock <x> <y> <z> minecraft:spawner{SpawnData:{entity:{id:\"minecraft:<mob>\"}},Delay:20}\n"
"Or use /give for a spawner item."
),
"wiki_url": "https://minecraft.wiki/w/Monster_Spawner",
"commands": [
'setblock ~ ~1 ~ minecraft:spawner{{SpawnData:{{entity:{{id:"minecraft:zombie"}}}},Delay:20}}'
],
"reasoning": "Looked up spawner syntax. Using setblock with SpawnData NBT for zombie spawner.",
},
{
"user_request": "sudo what potion effects can I use for a speed build challenge?",
"query": "speed and haste effects minecraft 1.21",
"wiki_content": (
"Relevant effects for speed building:\n"
"- minecraft:speed (1-255): Increases movement speed\n"
"- minecraft:haste (1-255): Increases mining/attack speed\n"
"- minecraft:jump_boost (1-255): Increases jump height\n"
"Duration is in seconds. Amplifier 0 = level 1."
),
"wiki_url": "https://minecraft.wiki/w/Effect",
"commands": [
"effect give {player} minecraft:speed 600 2",
"effect give {player} minecraft:haste 600 2",
"effect give {player} minecraft:jump_boost 600 1",
],
"reasoning": "Looked up speed-related effects. Gave speed 3, haste 3, and jump boost 2 for 10 minutes.",
},
{
"user_request": "sudo give me a crossbow with multishot and quick charge",
"query": "crossbow enchantments multishot quick charge 1.21",
"wiki_content": (
"Crossbow enchantments in 1.21:\n"
"- Quick Charge (1-3): Reduces reload time\n"
"- Multishot (1): Fires 3 arrows at once\n"
"- Piercing (1-4): Arrows pass through entities\n"
"- Unbreaking (1-3): Durability\n"
"- Mending (1): XP repair\n"
"Note: Multishot and Piercing are mutually exclusive."
),
"wiki_url": "https://minecraft.wiki/w/Crossbow#Enchantments",
"commands": [
"give {player} minecraft:crossbow[enchantments={{multishot:1,quick_charge:3,unbreaking:3,mending:1}}] 1"
],
"reasoning": "Looked up crossbow enchantments. Multishot and piercing are exclusive; chose multishot as requested. Max quick charge.",
},
]
PLAYER_INFO_EXAMPLES = [
{
"user_request": "sudo build a diamond block tower where I'm standing",
"player": "slingshooter08",
"commands": [
"setblock {x} {y} {z} minecraft:diamond_block",
"setblock {x} {y_1} {z} minecraft:diamond_block",
"setblock {x} {y_2} {z} minecraft:diamond_block",
"setblock {x} {y_3} {z} minecraft:diamond_block",
"setblock {x} {y_4} {z} minecraft:diamond_block",
],
"reasoning": "Got player position, then placed 5 diamond blocks in a tower at their location.",
},
{
"user_request": "sudo teleport me 100 blocks north",
"player": "slingshooter08",
"commands": ["tp {player} {x} {y} {z_minus_100}"],
"reasoning": "Got player position, then teleported 100 blocks north (negative Z direction).",
},
{
"user_request": "sudo put a glass dome over my head",
"player": "slingshooter08",
"commands": [
"fill {x_m5} {y} {z_m5} {x_p5} {y_p10} {z_p5} minecraft:glass hollow",
],
"reasoning": "Got player position to calculate dome coordinates. Used fill hollow with glass.",
},
{
"user_request": "sudo light up the area around me with glowstone",
"player": "slingshooter08",
"commands": [
"setblock {x_p3} {y_p5} {z} minecraft:glowstone",
"setblock {x_m3} {y_p5} {z} minecraft:glowstone",
"setblock {x} {y_p5} {z_p3} minecraft:glowstone",
"setblock {x} {y_p5} {z_m3} minecraft:glowstone",
],
"reasoning": "Got player position, placed glowstone lights at 4 cardinal points above the player.",
},
]
SERVER_STATE_EXAMPLES = [
{
"user_request": "sudo make it daytime if it's nighttime",
"commands": ["time set day"],
"reasoning": "Checked server state: it was nighttime, so set time to day.",
},
{
"user_request": "sudo clear weather if it's raining",
"commands": ["weather clear"],
"reasoning": "Checked server state: weather was rain, so cleared it.",
},
{
"user_request": "sudo give everyone online a diamond",
"commands": [
"give {p} minecraft:diamond 1"
],
"reasoning": "Checked server state to get online player list, then gave each player a diamond.",
},
{
"user_request": "sudo heal everyone on the server",
"commands": [
"effect give {p} minecraft:instant_health 1 5"
],
"reasoning": "Checked server state for online players, then healed each one.",
},
]
# Requests that map to error scenarios for natural phrasing
ERROR_SCENARIO_REQUESTS = {
"missing_prefix": "sudo give me a diamond sword",
"old_nbt_enchantments": "sudo give me a max enchanted diamond sword",
"invalid_effect_name": "sudo give me haste effect",
"wrong_item_bed": "sudo give me a bed",
"wrong_item_log": "sudo give me a stack of logs",
"count_wrong_position": "sudo give me 64 diamonds",
"effect_missing_give": "sudo give me speed",
"weather_storm": "sudo make it storm",
"gamemode_abbreviation": "sudo put me in creative",
"wrong_item_grass": "sudo give me some grass",
"summon_no_prefix": "sudo spawn a zombie near me",
"old_zombie_pigman": "sudo summon a zombie pigman",
}
# ---------------------------------------------------------------------------
# Format to Qwen3 chat template string
# ---------------------------------------------------------------------------
def format_qwen3(messages: List[Dict[str, str]]) -> str:
"""Convert messages list to Qwen3 chat template format."""
parts = []
for msg in messages:
role = msg["role"]
content = msg["content"]
parts.append(f"<|im_start|>{role}\n{content}\n<|im_end|>")
return "\n".join(parts)
# ---------------------------------------------------------------------------
# Main generation pipeline
# ---------------------------------------------------------------------------
def load_seed_data() -> List[Dict[str, Any]]:
"""Load the seed dataset."""
examples = []
with open(SEED_PATH) as f:
for line in f:
line = line.strip()
if line:
examples.append(json.loads(line))
return examples
def generate_all() -> List[Dict[str, Any]]:
"""Generate all tool-calling training examples."""
random.seed(42)
seed_data = load_seed_data()
all_examples = []
counts = {
"command_success": 0,
"error_correction": 0,
"error_scenario": 0,
"wiki_lookup": 0,
"player_info_lookup": 0,
"server_state_check": 0,
}
# --- 1. Command success examples from seed data ---
for ex in seed_data:
cat = ex.get("category", "")
if cat in ("command_gen", "safety", "troubleshoot"):
commands = ex.get("output", {}).get("commands", [])
if not commands:
continue
# Decide mode: if it has a "message" field, treat as god
mode = "god" if ex.get("output", {}).get("message") else "sudo"
result = gen_command_success(ex, mode=mode)
if result:
all_examples.append(result)
counts["command_success"] += 1
# --- 2. Error correction from negative_output examples ---
for ex in seed_data:
neg = ex.get("negative_output")
if neg and neg.get("commands"):
mode = "god" if ex.get("output", {}).get("message") else "sudo"
result = gen_error_correction_from_negative(ex, mode=mode)
if result:
all_examples.append(result)
counts["error_correction"] += 1
# --- 3. Error scenario examples ---
for scenario in ERROR_SCENARIOS:
user_req = ERROR_SCENARIO_REQUESTS.get(scenario["id"], "sudo do something")
player = random.choice(DEFAULT_PLAYERS)
# Generate both sudo and god mode variants for each scenario
for mode in ["sudo", "god"]:
result = gen_error_scenario(scenario, user_req, player=player, mode=mode)
if result:
result["id"] += f"-{mode}"
all_examples.append(result)
counts["error_scenario"] += 1
# --- 4. Wiki lookup examples ---
for wex in WIKI_LOOKUP_EXAMPLES:
player = random.choice(DEFAULT_PLAYERS)
resolved_cmds = [c.format(player=player) for c in wex["commands"]]
result = gen_wiki_lookup(
user_request=wex["user_request"],
query=wex["query"],
wiki_content=wex["wiki_content"],
wiki_url=wex["wiki_url"],
resulting_commands=resolved_cmds,
reasoning=wex["reasoning"],
player=player,
)
if result:
all_examples.append(result)
counts["wiki_lookup"] += 1
# --- 5. Player info lookup examples ---
for pex in PLAYER_INFO_EXAMPLES:
player = pex.get("player", "slingshooter08")
# We need to generate plausible coordinates for the commands
pos = simulate_player_info(player)["position"]
x, y, z = pos["x"], pos["y"], pos["z"]
# Build command templates with coordinates
resolved_cmds = []
for cmd in pex["commands"]:
resolved = cmd.format(
player=player, x=x, y=y, z=z,
y_1=y+1, y_2=y+2, y_3=y+3, y_4=y+4,
z_minus_100=z-100,
x_m5=x-5, x_p5=x+5, z_m5=z-5, z_p5=z+5,
y_p5=y+5, y_p10=y+10,
x_p3=x+3, x_m3=x-3, z_p3=z+3, z_m3=z-3,
)
resolved_cmds.append(resolved)
messages = [build_system_message("sudo")]
messages.append(build_user_message(pex["user_request"]))
messages.append(build_tool_call("world.player_info", {"player": player}))
messages.append(build_tool_result({
"health": 20.0,
"position": pos,
"inventory_summary": "Diamond gear, various items",
}))
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
messages.append(build_tool_result({
"success": True,
"result": simulate_rcon_success(cmd, player),
}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": pex["reasoning"]}
messages.append(build_assistant_final(final))
result = {
"id": f"tool-playerinfo-{hash(pex['user_request']) % 10000:04d}",
"source": "tool_training",
"type": "player_info_lookup",
"messages": messages,
}
all_examples.append(result)
counts["player_info_lookup"] += 1
# --- 6. Server state check examples ---
for sex in SERVER_STATE_EXAMPLES:
state = simulate_server_state()
players = state["online_players"]
messages = [build_system_message("sudo")]
messages.append(build_user_message(sex["user_request"]))
messages.append(build_tool_call("world.server_state", {}))
messages.append(build_tool_result(state))
# Resolve commands that reference {p} for each player
resolved_cmds = []
for cmd in sex["commands"]:
if "{p}" in cmd:
for p in players:
resolved_cmds.append(cmd.format(p=p))
else:
resolved_cmds.append(cmd)
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
messages.append(build_tool_result({
"success": True,
"result": simulate_rcon_success(cmd, players[0] if players else "player"),
}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": sex["reasoning"]}
messages.append(build_assistant_final(final))
result = {
"id": f"tool-serverstate-{hash(sex['user_request']) % 10000:04d}",
"source": "tool_training",
"type": "server_state_check",
"messages": messages,
}
all_examples.append(result)
counts["server_state_check"] += 1
return all_examples, counts
def main():
print("=" * 60)
print("Tool-Calling Training Data Generator")
print("=" * 60)
print(f"\nSeed dataset: {SEED_PATH}")
print(f"Output: {OUTPUT_PATH}")
if not SEED_PATH.exists():
print(f"\nERROR: Seed dataset not found at {SEED_PATH}")
sys.exit(1)
all_examples, counts = generate_all()
# Write output
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in all_examples:
# Store both the structured messages and the formatted Qwen3 text
ex["qwen3_text"] = format_qwen3(ex["messages"])
f.write(json.dumps(ex) + "\n")
# Summary
total = len(all_examples)
print(f"\nGenerated {total} tool-calling training examples:\n")
print(f" {'Type':<25} {'Count':>6}")
print(f" {'-'*25} {'-'*6}")
for typ, count in sorted(counts.items()):
print(f" {typ:<25} {count:>6}")
print(f" {'-'*25} {'-'*6}")
print(f" {'TOTAL':<25} {total:>6}")
# Validate a sample
print(f"\n--- Sample validation ---")
sample = random.choice(all_examples)
print(f" ID: {sample['id']}")
print(f" Type: {sample['type']}")
print(f" Turns: {len(sample['messages'])}")
roles = [m['role'] for m in sample['messages']]
print(f" Roles: {' -> '.join(roles)}")
print(f"\n Qwen3 text preview (first 500 chars):")
print(f" {sample['qwen3_text'][:500]}")
print(f"\nOutput written to: {OUTPUT_PATH}")
if __name__ == "__main__":
main()