1200+ distilled gold examples, journal system, redstone mastery, safety awareness
Distilled Training Data (1,203 examples): - 341 initial gold (plugins, enchantments, builds, effects, god, errors) - 165 buildings + pipeline (100 structures built on dev, 65 request→query→act) - 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks) - 17 advanced logic (decanonized items, redstone gates, iterative builds) - 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock) - 7 circuit verification and diagnosis - 1 compact comparator gates - 10 redstone methodology (build→test→save→recall→learn from mistakes) - 8 player journal usage - 29 creative+uncommon+pipeline+god with full tool chains Player Journal System: - agent/tools/player_journal.py — per-player text files (1-10 lines) - journal.read + journal.write tool schemas added - Cross-contaminated: God and Sudo share same journal per player - Includes sentiment, relationship, builds, preferences, skill level Redstone Engineering: - agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules - Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top - T-junction bypass prevention: dedicated lead wire between merge and NOT block - RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate) Training Data Cleaning: - 466 @s→@p fixes, 10 template commands removed - 12 outdated refusals replaced with correct plugin commands - Data de-duped across all sources Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Clean training data — fix known bad patterns before 0.6.0 training.
|
||||
|
||||
Fixes:
|
||||
- @s selector → @p (RCON has no executor entity)
|
||||
- Leading slash on commands
|
||||
- Template commands (remove entire example)
|
||||
- Old NBT enchant syntax
|
||||
- fill with trailing count
|
||||
- Generic bed/log → specific variants
|
||||
- steak → cooked_beef
|
||||
|
||||
Usage:
|
||||
python3 training/scripts/clean_training_data.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
FILES = [
|
||||
PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl",
|
||||
PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl",
|
||||
PROJECT_ROOT / "data" / "processed" / "tool_training_v05.jsonl",
|
||||
PROJECT_ROOT / "data" / "processed" / "filtered_exploration.jsonl",
|
||||
]
|
||||
|
||||
stats = {
|
||||
"files_processed": 0,
|
||||
"examples_in": 0,
|
||||
"examples_out": 0,
|
||||
"removed_template": 0,
|
||||
"fixed_at_s": 0,
|
||||
"fixed_leading_slash": 0,
|
||||
"fixed_old_nbt": 0,
|
||||
"fixed_fill_count": 0,
|
||||
"fixed_generic_items": 0,
|
||||
}
|
||||
|
||||
|
||||
def fix_command(cmd: str, player: str = "slingshooter08") -> str:
|
||||
"""Fix a single command string."""
|
||||
if not isinstance(cmd, str):
|
||||
return cmd
|
||||
|
||||
# Leading slash
|
||||
if cmd.startswith("/"):
|
||||
cmd = cmd[1:]
|
||||
stats["fixed_leading_slash"] += 1
|
||||
|
||||
# @s → @p (RCON has no executor)
|
||||
if "@s" in cmd:
|
||||
cmd = cmd.replace("@s", "@p")
|
||||
stats["fixed_at_s"] += 1
|
||||
|
||||
# Generic items
|
||||
if "minecraft:bed " in cmd or "minecraft:bed]" in cmd:
|
||||
cmd = cmd.replace("minecraft:bed", "minecraft:white_bed")
|
||||
stats["fixed_generic_items"] += 1
|
||||
if "minecraft:log " in cmd or "minecraft:log]" in cmd:
|
||||
cmd = cmd.replace("minecraft:log", "minecraft:oak_log")
|
||||
stats["fixed_generic_items"] += 1
|
||||
if "minecraft:steak" in cmd:
|
||||
cmd = cmd.replace("minecraft:steak", "minecraft:cooked_beef")
|
||||
stats["fixed_generic_items"] += 1
|
||||
|
||||
# Fill with trailing count (e.g. "fill ... minecraft:stone 1")
|
||||
m = re.match(r'^(fill .+ minecraft:\w+(?:\[.*?\])?)\s+\d+$', cmd)
|
||||
if m:
|
||||
cmd = m.group(1)
|
||||
stats["fixed_fill_count"] += 1
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def fix_commands_in_obj(obj):
|
||||
"""Recursively fix commands in any dict/list structure."""
|
||||
if isinstance(obj, str):
|
||||
# Fix @s in any string content (including tool call JSON)
|
||||
if "@s" in obj:
|
||||
obj = obj.replace("@s", "@p")
|
||||
return obj
|
||||
elif isinstance(obj, list):
|
||||
return [fix_commands_in_obj(item) for item in obj]
|
||||
elif isinstance(obj, dict):
|
||||
result = {}
|
||||
for k, v in obj.items():
|
||||
if k in ("commands", "commands_generated", "commands_executed"):
|
||||
result[k] = [fix_command(c) for c in v] if isinstance(v, list) else v
|
||||
elif k == "command" and isinstance(v, str):
|
||||
result[k] = fix_command(v)
|
||||
elif k == "content" and isinstance(v, str):
|
||||
# Fix @s in message content (tool calls, system prompts)
|
||||
fixed = v
|
||||
if "@s" in fixed and "rcon" in fixed.lower():
|
||||
fixed = fixed.replace("@s", "@p")
|
||||
result[k] = fixed
|
||||
else:
|
||||
result[k] = fix_commands_in_obj(v)
|
||||
return result
|
||||
return obj
|
||||
|
||||
|
||||
def has_template_commands(obj) -> bool:
|
||||
"""Check if this example contains template commands."""
|
||||
text = json.dumps(obj).lower()
|
||||
return any(t in text for t in ["template search", "template pick", "template build"])
|
||||
|
||||
|
||||
def process_file(path: Path):
|
||||
"""Clean one JSONL file in place."""
|
||||
if not path.exists():
|
||||
print(f" SKIP: {path.name} (not found)")
|
||||
return
|
||||
|
||||
examples = []
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
try:
|
||||
examples.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
stats["examples_in"] += len(examples)
|
||||
stats["files_processed"] += 1
|
||||
|
||||
cleaned = []
|
||||
for ex in examples:
|
||||
# Remove template command examples entirely
|
||||
if has_template_commands(ex):
|
||||
stats["removed_template"] += 1
|
||||
continue
|
||||
|
||||
# Fix all commands recursively
|
||||
fixed = fix_commands_in_obj(ex)
|
||||
cleaned.append(fixed)
|
||||
|
||||
stats["examples_out"] += len(cleaned)
|
||||
|
||||
# Write back
|
||||
with open(path, "w") as f:
|
||||
for ex in cleaned:
|
||||
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
|
||||
|
||||
removed = len(examples) - len(cleaned)
|
||||
print(f" {path.name}: {len(examples)} → {len(cleaned)} ({removed} removed)")
|
||||
|
||||
|
||||
def main():
|
||||
print("Cleaning training data...\n")
|
||||
|
||||
for path in FILES:
|
||||
process_file(path)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Files processed: {stats['files_processed']}")
|
||||
print(f"Examples: {stats['examples_in']} → {stats['examples_out']} ({stats['examples_in'] - stats['examples_out']} removed)")
|
||||
print(f"\nFixes applied:")
|
||||
print(f" @s → @p: {stats['fixed_at_s']}")
|
||||
print(f" Leading slash: {stats['fixed_leading_slash']}")
|
||||
print(f" Template removed: {stats['removed_template']}")
|
||||
print(f" Fill trailing count: {stats['fixed_fill_count']}")
|
||||
print(f" Generic items: {stats['fixed_generic_items']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert IGLU dataset to Mortdecai build training examples.
|
||||
|
||||
IGLU provides natural language instructions paired with block placement
|
||||
coordinates. We convert these to:
|
||||
1. Direct setblock/fill commands (for simple builds)
|
||||
2. script.write + script.execute flows (for complex builds)
|
||||
|
||||
Source: microsoft/iglu-datasets singleturn dataset
|
||||
Output: data/raw/iglu_build_training.jsonl
|
||||
|
||||
Usage:
|
||||
python3 training/scripts/convert_iglu_to_training.py
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from agent.tools.tool_schemas import qwen3_tools_block
|
||||
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
|
||||
|
||||
IGLU_DIR = PROJECT_ROOT / "data" / "external" / "iglu-repo" / "datasets" / "singleturn"
|
||||
OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "iglu_build_training.jsonl"
|
||||
|
||||
TOOLS_BLOCK = qwen3_tools_block()
|
||||
SYSTEM = (
|
||||
"You are a Minecraft 1.21 command translator with script writing abilities.\n"
|
||||
"For complex builds (4+ blocks), write a mcfunction script. Validate first.\n"
|
||||
"For simple builds (1-3 blocks), use rcon.execute directly.\n"
|
||||
"PERMISSION LEVEL: 4 (generous).\n\n"
|
||||
"Return JSON: {\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
|
||||
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
|
||||
)
|
||||
|
||||
# IGLU uses color IDs for blocks. Map to Minecraft wool colors.
|
||||
IGLU_BLOCK_MAP = {
|
||||
57: "minecraft:blue_wool",
|
||||
58: "minecraft:light_blue_wool",
|
||||
59: "minecraft:green_wool",
|
||||
60: "minecraft:red_wool",
|
||||
61: "minecraft:orange_wool",
|
||||
62: "minecraft:purple_wool",
|
||||
63: "minecraft:yellow_wool",
|
||||
}
|
||||
|
||||
# For variety, also map to concrete and terracotta
|
||||
BLOCK_VARIANTS = {
|
||||
"wool": {
|
||||
57: "minecraft:blue_wool", 58: "minecraft:light_blue_wool",
|
||||
59: "minecraft:green_wool", 60: "minecraft:red_wool",
|
||||
61: "minecraft:orange_wool", 62: "minecraft:purple_wool",
|
||||
63: "minecraft:yellow_wool",
|
||||
},
|
||||
"concrete": {
|
||||
57: "minecraft:blue_concrete", 58: "minecraft:light_blue_concrete",
|
||||
59: "minecraft:green_concrete", 60: "minecraft:red_concrete",
|
||||
61: "minecraft:orange_concrete", 62: "minecraft:purple_concrete",
|
||||
63: "minecraft:yellow_concrete",
|
||||
},
|
||||
"terracotta": {
|
||||
57: "minecraft:blue_terracotta", 58: "minecraft:light_blue_terracotta",
|
||||
59: "minecraft:green_terracotta", 60: "minecraft:red_terracotta",
|
||||
61: "minecraft:orange_terracotta", 62: "minecraft:purple_terracotta",
|
||||
63: "minecraft:yellow_terracotta",
|
||||
},
|
||||
}
|
||||
|
||||
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx", "CreeperKing99"]
|
||||
|
||||
|
||||
def sys_msg():
|
||||
return {"role": "system", "content": SYSTEM}
|
||||
|
||||
def user_msg(text):
|
||||
return {"role": "user", "content": text}
|
||||
|
||||
def tool_call(name, args):
|
||||
return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
|
||||
|
||||
def tool_result(data):
|
||||
return {"role": "tool", "content": json.dumps(data)}
|
||||
|
||||
def final_response(resp):
|
||||
return {"role": "assistant", "content": json.dumps(resp)}
|
||||
|
||||
|
||||
def blocks_to_commands(blocks_to_place, blocks_to_remove, block_map, use_relative=True, offset=(0, 64, 0)):
|
||||
"""Convert block coordinate lists to setblock/fill commands."""
|
||||
commands = []
|
||||
|
||||
# Group placed blocks by color for potential fill optimization
|
||||
by_color = defaultdict(list)
|
||||
for x, y, z, color_id in blocks_to_place:
|
||||
block = block_map.get(color_id, "minecraft:white_wool")
|
||||
by_color[block].append((x, y, z))
|
||||
|
||||
for block, coords in by_color.items():
|
||||
if len(coords) == 1:
|
||||
x, y, z = coords[0]
|
||||
if use_relative:
|
||||
commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
|
||||
else:
|
||||
commands.append(f"setblock {x} {y} {z} {block}")
|
||||
elif len(coords) <= 3:
|
||||
for x, y, z in coords:
|
||||
if use_relative:
|
||||
commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
|
||||
else:
|
||||
commands.append(f"setblock {x} {y} {z} {block}")
|
||||
else:
|
||||
# Try to find a bounding box for fill
|
||||
xs = [c[0] for c in coords]
|
||||
ys = [c[1] for c in coords]
|
||||
zs = [c[2] for c in coords]
|
||||
min_x, max_x = min(xs), max(xs)
|
||||
min_y, max_y = min(ys), max(ys)
|
||||
min_z, max_z = min(zs), max(zs)
|
||||
|
||||
# Check if it's a solid fill (all positions in the box are filled)
|
||||
box_volume = (max_x - min_x + 1) * (max_y - min_y + 1) * (max_z - min_z + 1)
|
||||
if box_volume == len(coords) and box_volume > 2:
|
||||
if use_relative:
|
||||
commands.append(
|
||||
f"fill ~{min_x} ~{min_y-offset[1]} ~{min_z} "
|
||||
f"~{max_x} ~{max_y-offset[1]} ~{max_z} {block}"
|
||||
)
|
||||
else:
|
||||
commands.append(
|
||||
f"fill {min_x} {min_y} {min_z} {max_x} {max_y} {max_z} {block}"
|
||||
)
|
||||
else:
|
||||
# Not a clean box — individual setblocks
|
||||
for x, y, z in coords:
|
||||
if use_relative:
|
||||
commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} {block}")
|
||||
else:
|
||||
commands.append(f"setblock {x} {y} {z} {block}")
|
||||
|
||||
# Remove blocks
|
||||
for x, y, z, _ in blocks_to_remove:
|
||||
if use_relative:
|
||||
commands.append(f"setblock ~{x} ~{y-offset[1]} ~{z} minecraft:air")
|
||||
else:
|
||||
commands.append(f"setblock {x} {y} {z} minecraft:air")
|
||||
|
||||
return commands
|
||||
|
||||
|
||||
def load_iglu_pairs():
|
||||
"""Load instruction-to-build pairs from IGLU dataset."""
|
||||
csv_path = IGLU_DIR / "clarifying_questions_train.csv"
|
||||
if not csv_path.exists():
|
||||
print(f"CSV not found: {csv_path}")
|
||||
return []
|
||||
|
||||
# Build target state index
|
||||
target_dir = IGLU_DIR / "target_world_states" / "builder-data"
|
||||
targets = {}
|
||||
if target_dir.exists():
|
||||
for game_dir in target_dir.iterdir():
|
||||
if game_dir.is_dir():
|
||||
for step_file in game_dir.iterdir():
|
||||
if step_file.is_file():
|
||||
targets.setdefault(game_dir.name, []).append(step_file)
|
||||
|
||||
pairs = []
|
||||
with open(csv_path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
game_id = row['GameId'].lower().replace(' ', '')
|
||||
instruction = row['InputInstruction'].strip()
|
||||
if not instruction or row.get('IsInstructionClear') != 'Yes':
|
||||
continue
|
||||
|
||||
init_path = IGLU_DIR / row['InitializedWorldPath']
|
||||
if game_id in targets and init_path.exists():
|
||||
pairs.append({
|
||||
'instruction': instruction,
|
||||
'init_path': str(init_path),
|
||||
'target_path': str(targets[game_id][0]),
|
||||
'game_id': game_id,
|
||||
})
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def convert_pair_to_example(pair, idx, block_variant="wool"):
|
||||
"""Convert one IGLU pair to a training example."""
|
||||
block_map = BLOCK_VARIANTS.get(block_variant, BLOCK_VARIANTS["wool"])
|
||||
player = random.choice(PLAYERS)
|
||||
|
||||
with open(pair['init_path']) as f:
|
||||
init = json.load(f)
|
||||
with open(pair['target_path']) as f:
|
||||
target = json.load(f)
|
||||
|
||||
init_blocks = set(tuple(b) for b in init.get('worldEndingState', {}).get('blocks', []))
|
||||
target_blocks = set(tuple(b) for b in target.get('worldEndingState', {}).get('blocks', []))
|
||||
|
||||
to_place = sorted(target_blocks - init_blocks)
|
||||
to_remove = sorted(init_blocks - target_blocks)
|
||||
|
||||
if not to_place and not to_remove:
|
||||
return None
|
||||
|
||||
total_changes = len(to_place) + len(to_remove)
|
||||
commands = blocks_to_commands(to_place, to_remove, block_map)
|
||||
|
||||
if not commands:
|
||||
return None
|
||||
|
||||
instruction = pair['instruction']
|
||||
# Make it sound like a Minecraft player request
|
||||
prefixes = [
|
||||
f"sudo {instruction}",
|
||||
f"sudo can you {instruction.lower()}",
|
||||
f"sudo please {instruction.lower()}",
|
||||
f"sudo I need you to {instruction.lower()}",
|
||||
]
|
||||
prompt = random.choice(prefixes)
|
||||
|
||||
msgs = [sys_msg(), user_msg(f"Player {player}: {prompt}")]
|
||||
|
||||
if total_changes <= 4:
|
||||
# Direct rcon.execute for small builds
|
||||
for cmd in commands:
|
||||
msgs.append(tool_call("rcon.execute", {"command": cmd}))
|
||||
msgs.append(tool_result({"success": True, "result": "Changed the block"}))
|
||||
reasoning = f"Direct block placement: {len(to_place)} placed, {len(to_remove)} removed."
|
||||
resp = {"risk_level": 3, "commands": commands, "reasoning": reasoning}
|
||||
else:
|
||||
# Script workflow for larger builds
|
||||
script_name = f"build_{idx:04d}"
|
||||
desc = instruction[:80]
|
||||
|
||||
# Validate
|
||||
msgs.append(tool_call("script.validate", {"commands": commands}))
|
||||
msgs.append(tool_result({
|
||||
"valid": True, "total": len(commands),
|
||||
"passed": len(commands), "errors": [],
|
||||
}))
|
||||
|
||||
# Write
|
||||
msgs.append(tool_call("script.write", {
|
||||
"name": script_name,
|
||||
"commands": commands,
|
||||
"description": desc,
|
||||
}))
|
||||
msgs.append(tool_result({
|
||||
"ok": True, "path": f"mortdecai:{script_name}",
|
||||
"lines": len(commands),
|
||||
}))
|
||||
|
||||
# Execute at player
|
||||
msgs.append(tool_call("script.execute", {
|
||||
"name": script_name, "as_player": player,
|
||||
}))
|
||||
msgs.append(tool_result({
|
||||
"ok": True,
|
||||
"result": f"Executed {len(commands)} commands from function mortdecai:{script_name}",
|
||||
}))
|
||||
|
||||
reasoning = (f"Complex build ({total_changes} block changes). "
|
||||
f"Wrote script '{script_name}' with {len(commands)} commands. "
|
||||
f"Placed {len(to_place)}, removed {len(to_remove)}.")
|
||||
resp = {
|
||||
"risk_level": 3,
|
||||
"commands": [f"function mortdecai:{script_name}"],
|
||||
"reasoning": reasoning,
|
||||
}
|
||||
|
||||
msgs.append(final_response(resp))
|
||||
|
||||
return {
|
||||
"id": f"iglu-build-{idx:05d}",
|
||||
"source": "iglu_dataset",
|
||||
"type": "build_script" if total_changes > 4 else "build_direct",
|
||||
"block_changes": total_changes,
|
||||
"messages": msgs,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
print("Loading IGLU dataset...")
|
||||
pairs = load_iglu_pairs()
|
||||
print(f"Found {len(pairs)} instruction-build pairs")
|
||||
|
||||
if not pairs:
|
||||
print("No data found. Make sure iglu-repo is cloned in data/external/")
|
||||
return
|
||||
|
||||
examples = []
|
||||
skipped = 0
|
||||
|
||||
# Process with variety — use different block variants
|
||||
variants = list(BLOCK_VARIANTS.keys())
|
||||
|
||||
for idx, pair in enumerate(pairs):
|
||||
variant = variants[idx % len(variants)]
|
||||
ex = convert_pair_to_example(pair, idx, variant)
|
||||
if ex:
|
||||
examples.append(ex)
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
if (idx + 1) % 500 == 0:
|
||||
print(f" Processed {idx+1}/{len(pairs)}, generated {len(examples)}")
|
||||
|
||||
# Stats
|
||||
direct = sum(1 for e in examples if e['type'] == 'build_direct')
|
||||
script = sum(1 for e in examples if e['type'] == 'build_script')
|
||||
avg_blocks = sum(e['block_changes'] for e in examples) / max(len(examples), 1)
|
||||
|
||||
print(f"\nGenerated {len(examples)} examples (skipped {skipped} empty)")
|
||||
print(f" Direct (1-4 blocks): {direct}")
|
||||
print(f" Script (5+ blocks): {script}")
|
||||
print(f" Avg block changes: {avg_blocks:.1f}")
|
||||
|
||||
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as f:
|
||||
for ex in examples:
|
||||
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
|
||||
|
||||
print(f"\nWritten to {OUTPUT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Filter audit log data for training quality.
|
||||
|
||||
Keeps the full validator loop (generated → executed → RCON result) as the
|
||||
training signal. Quarantines empty outputs, system prompt leaks, and broken
|
||||
JSON/tellraw commands into a separate file for review.
|
||||
|
||||
Usage:
|
||||
python3 filter_audit_log.py [--input FILE] [--output FILE] [--stats]
|
||||
python3 filter_audit_log.py --stats # dry run, print breakdown only
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
DEFAULT_INPUT = PROJECT_ROOT / "data" / "raw" / "training_audit_dev_latest.jsonl"
|
||||
DEFAULT_OUTPUT = PROJECT_ROOT / "data" / "processed" / "filtered_audit.jsonl"
|
||||
DEFAULT_QUARANTINE = PROJECT_ROOT / "data" / "quarantine" / "audit_quarantine.jsonl"
|
||||
|
||||
# Strings that indicate system prompt leakage in model output
|
||||
PROMPT_LEAK_PATTERNS = [
|
||||
"you are a minecraft",
|
||||
"command translator",
|
||||
"player request vector",
|
||||
"you are an ai",
|
||||
"system prompt",
|
||||
"Context: Player is",
|
||||
]
|
||||
|
||||
|
||||
def is_prompt_leak(message: str) -> bool:
|
||||
msg_lower = message.lower()
|
||||
return any(pat in msg_lower for pat in PROMPT_LEAK_PATTERNS)
|
||||
|
||||
|
||||
def has_broken_json(commands: list[str]) -> bool:
|
||||
"""Detect broken tellraw/JSON commands (bad escaping, fragments)."""
|
||||
for cmd in commands:
|
||||
stripped = cmd.strip()
|
||||
# Fragments from split JSON: starts with colon, comma, or brace-colon
|
||||
if stripped.startswith((":\\", ",\\", ":{", ",")):
|
||||
return True
|
||||
# Truncated tellraw with unmatched braces
|
||||
if "tellraw" in stripped:
|
||||
opens = stripped.count("{") + stripped.count("[")
|
||||
closes = stripped.count("}") + stripped.count("]")
|
||||
if abs(opens - closes) > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def classify(record: dict) -> tuple[str, str]:
|
||||
"""Classify a record as (keep|quarantine, reason)."""
|
||||
mode = record.get("mode", "")
|
||||
output = record.get("output", {})
|
||||
message = output.get("message", "").strip()
|
||||
cmds_gen = output.get("commands_generated", [])
|
||||
cmds_exe = output.get("commands_executed", [])
|
||||
|
||||
# ── Quarantine ─────────────────────────────────────────────────
|
||||
# System prompt leak
|
||||
if is_prompt_leak(message):
|
||||
return "quarantine", "prompt_leak"
|
||||
|
||||
# Completely empty output — teaches the model to produce nothing
|
||||
if not message and not cmds_gen:
|
||||
return "quarantine", "empty_output"
|
||||
|
||||
# Broken JSON/tellraw commands — teaches bad syntax
|
||||
if cmds_gen and has_broken_json(cmds_gen):
|
||||
return "quarantine", "broken_json"
|
||||
|
||||
# ── Keep ───────────────────────────────────────────────────────
|
||||
# Validator correction: generated != executed (highest value)
|
||||
if cmds_gen and cmds_exe and cmds_gen != cmds_exe:
|
||||
return "keep", "validator_corrected"
|
||||
|
||||
# Generated but validator blocked — teaches what gets rejected
|
||||
if cmds_gen and not cmds_exe:
|
||||
return "keep", "validator_blocked"
|
||||
|
||||
# Commands executed (sudo or god mode with action)
|
||||
if cmds_gen and cmds_exe:
|
||||
return "keep", "executed"
|
||||
|
||||
# God mode with RP message, no commands — valid roleplay response
|
||||
if mode == "god" and message:
|
||||
return "keep", "god_rp"
|
||||
|
||||
# Catch-all: keep with flag
|
||||
return "keep", "other"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--input", type=Path, default=DEFAULT_INPUT)
|
||||
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
||||
parser.add_argument("--quarantine", type=Path, default=DEFAULT_QUARANTINE,
|
||||
help="Write quarantined records here for review")
|
||||
parser.add_argument("--stats", action="store_true", help="Print stats only, don't write")
|
||||
args = parser.parse_args()
|
||||
|
||||
stats = {"keep": {}, "quarantine": {}}
|
||||
kept = []
|
||||
quarantined = []
|
||||
|
||||
with open(args.input) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
record = json.loads(line)
|
||||
action, reason = classify(record)
|
||||
stats[action][reason] = stats[action].get(reason, 0) + 1
|
||||
record["_filter_action"] = action
|
||||
record["_filter_reason"] = reason
|
||||
if action == "keep":
|
||||
kept.append(record)
|
||||
else:
|
||||
quarantined.append(record)
|
||||
|
||||
total = len(kept) + len(quarantined)
|
||||
print(f"Total records: {total}")
|
||||
print(f"Kept: {len(kept)} ({100*len(kept)/total:.1f}%)")
|
||||
print(f"Quarantined: {len(quarantined)} ({100*len(quarantined)/total:.1f}%)")
|
||||
print()
|
||||
|
||||
print("KEPT breakdown:")
|
||||
for reason, count in sorted(stats["keep"].items(), key=lambda x: -x[1]):
|
||||
print(f" {reason:<25} {count:>5}")
|
||||
print()
|
||||
print("QUARANTINED breakdown:")
|
||||
for reason, count in sorted(stats["quarantine"].items(), key=lambda x: -x[1]):
|
||||
print(f" {reason:<25} {count:>5}")
|
||||
|
||||
if args.stats:
|
||||
return
|
||||
|
||||
# Write kept records (strip internal filter tags)
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.output, "w") as f:
|
||||
for record in kept:
|
||||
out = {k: v for k, v in record.items() if not k.startswith("_filter_")}
|
||||
f.write(json.dumps(out) + "\n")
|
||||
print(f"\nWrote {len(kept)} records to {args.output}")
|
||||
|
||||
# Write quarantined records (keep filter tags for review)
|
||||
args.quarantine.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.quarantine, "w") as f:
|
||||
for record in quarantined:
|
||||
f.write(json.dumps(record) + "\n")
|
||||
print(f"Wrote {len(quarantined)} quarantined records to {args.quarantine}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,449 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dataset merge pipeline for Mortdecai training.
|
||||
|
||||
Normalizes all dataset formats into the two schemas the trainer expects:
|
||||
1. `conversations` — [{role, content}, ...] for simple command gen
|
||||
2. `messages` + `qwen3_text` — multi-turn tool-calling with pre-formatted text
|
||||
|
||||
Handles deduplication, mix ratios, and outputs a single training-ready JSONL.
|
||||
|
||||
Usage:
|
||||
# Default merge with recommended ratios
|
||||
python3 merge_datasets.py
|
||||
|
||||
# Custom ratios (multipliers per source)
|
||||
python3 merge_datasets.py --ratios seed=2.0,tool=1.0,iglu=0.5
|
||||
|
||||
# Dry run — show stats without writing
|
||||
python3 merge_datasets.py --dry-run
|
||||
|
||||
# Include chat app exports
|
||||
python3 merge_datasets.py --include-chat-logs
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import hashlib
|
||||
import random
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from agent.prompts.system_prompts import get_prompt, SYNTAX_RULES, RISK_GRADIENT
|
||||
|
||||
# ── Dataset sources ───────────────────────────────────────────────────────────
|
||||
|
||||
SOURCES = {
|
||||
"seed": {
|
||||
"path": "data/processed/seed_dataset.jsonl",
|
||||
"format": "seed",
|
||||
"default_ratio": 2.0, # Oversample — keep seed dominant
|
||||
"description": "Core command gen with pos/neg pairs",
|
||||
},
|
||||
"tool": {
|
||||
"path": "data/processed/tool_training.jsonl",
|
||||
"format": "tool_messages",
|
||||
"default_ratio": 1.0,
|
||||
"description": "Multi-turn tool-calling examples",
|
||||
},
|
||||
"tool_v05": {
|
||||
"path": "data/processed/tool_training_v05.jsonl",
|
||||
"format": "tool_messages",
|
||||
"default_ratio": 1.5, # High quality, oversample
|
||||
"description": "0.5.0 quality tool examples",
|
||||
},
|
||||
"iglu": {
|
||||
"path": "data/raw/iglu_build_training.jsonl",
|
||||
"format": "tool_messages",
|
||||
"default_ratio": 0.8,
|
||||
"description": "IGLU building dataset",
|
||||
},
|
||||
"plugin": {
|
||||
"path": "data/raw/plugin_training.jsonl",
|
||||
"format": "tool_messages",
|
||||
"default_ratio": 1.5,
|
||||
"description": "Plugin command examples",
|
||||
},
|
||||
"exploration": {
|
||||
"path": "data/processed/filtered_exploration.jsonl",
|
||||
"format": "exploration",
|
||||
"default_ratio": 1.0,
|
||||
"description": "Wiki-grounded exploration",
|
||||
},
|
||||
"self_play": {
|
||||
"path": "data/processed/self_play.jsonl",
|
||||
"format": "self_play",
|
||||
"default_ratio": 0.6, # Large set, don't let it dominate
|
||||
"description": "Self-play generations",
|
||||
},
|
||||
"audit": {
|
||||
"path": "data/processed/filtered_audit.jsonl",
|
||||
"format": "audit",
|
||||
"default_ratio": 0.5, # Large set, needs dilution
|
||||
"description": "Filtered audit log data",
|
||||
},
|
||||
"distilled": {
|
||||
"path": "data/processed/distilled.jsonl",
|
||||
"format": "seed",
|
||||
"default_ratio": 1.5, # Gold standard from Claude
|
||||
"description": "Claude-distilled examples",
|
||||
},
|
||||
"chat_logs": {
|
||||
"path": "data/chat_logs/training_export.jsonl",
|
||||
"format": "audit",
|
||||
"default_ratio": 2.0, # Hand-curated via chat app
|
||||
"description": "Chat app training exports",
|
||||
"optional": True,
|
||||
},
|
||||
}
|
||||
|
||||
# Also include all raw training files
|
||||
RAW_TRAINING_FILES = [
|
||||
"data/raw/advanced_commands_training.jsonl",
|
||||
"data/raw/biome_dimension_training.jsonl",
|
||||
"data/raw/chaos_event_training.jsonl",
|
||||
"data/raw/chaos_gaps_training.jsonl",
|
||||
"data/raw/command_reference_training.jsonl",
|
||||
"data/raw/cosmetic_xp_training.jsonl",
|
||||
"data/raw/dangerous_effects_training.jsonl",
|
||||
"data/raw/death_environment_training.jsonl",
|
||||
"data/raw/distance_projectile_training.jsonl",
|
||||
"data/raw/distance_scale_training.jsonl",
|
||||
"data/raw/enchant_order_errors.jsonl",
|
||||
"data/raw/enchantment_training.jsonl",
|
||||
"data/raw/entity_mob_training.jsonl",
|
||||
"data/raw/entity_targeting_training.jsonl",
|
||||
"data/raw/error_correction_training.jsonl",
|
||||
"data/raw/event_trigger_training.jsonl",
|
||||
"data/raw/execute_chain_training.jsonl",
|
||||
"data/raw/fall_safety_training.jsonl",
|
||||
"data/raw/gamerule_training.jsonl",
|
||||
"data/raw/kill_radius_training.jsonl",
|
||||
"data/raw/memory_training.jsonl",
|
||||
"data/raw/multiplayer_training.jsonl",
|
||||
"data/raw/multistep_training.jsonl",
|
||||
"data/raw/paper_training.jsonl",
|
||||
"data/raw/prod_pattern_fixes.jsonl",
|
||||
"data/raw/quantity_training.jsonl",
|
||||
"data/raw/recipe_training.jsonl",
|
||||
"data/raw/redstone_training.jsonl",
|
||||
"data/raw/revert_and_drops_training.jsonl",
|
||||
"data/raw/revert_format_training.jsonl",
|
||||
"data/raw/risk_hierarchy_training.jsonl",
|
||||
"data/raw/script_tool_training.jsonl",
|
||||
"data/raw/suffocation_training.jsonl",
|
||||
"data/raw/worldedit_training.jsonl",
|
||||
]
|
||||
|
||||
# ── Format converters ─────────────────────────────────────────────────────────
|
||||
|
||||
SUDO_SYSTEM = get_prompt("sudo")
|
||||
GOD_SYSTEM = get_prompt("god")
|
||||
|
||||
|
||||
def _seed_to_conversations(record: dict) -> dict:
|
||||
"""Convert seed_dataset format to conversations."""
|
||||
inp = record.get("input", {})
|
||||
out = record.get("output", {})
|
||||
user_msg = inp.get("user_message", "")
|
||||
commands = out.get("commands", [])
|
||||
reasoning = out.get("reasoning", "")
|
||||
|
||||
# Detect mode from prefix
|
||||
if user_msg.lower().startswith("pray "):
|
||||
system = GOD_SYSTEM
|
||||
mode = "god"
|
||||
else:
|
||||
system = SUDO_SYSTEM
|
||||
mode = "sudo"
|
||||
|
||||
# Build assistant response JSON
|
||||
response = {"commands": commands, "reasoning": reasoning}
|
||||
if mode == "god":
|
||||
response["message"] = out.get("message", "")
|
||||
|
||||
return {
|
||||
"conversations": [
|
||||
{"role": "system", "content": "/no_think\n" + system},
|
||||
{"role": "user", "content": user_msg},
|
||||
{"role": "assistant", "content": json.dumps(response)},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _audit_to_conversations(record: dict) -> dict:
|
||||
"""Convert audit log format to conversations."""
|
||||
inp = record.get("input", {})
|
||||
out = record.get("output", {})
|
||||
mode = record.get("mode", "sudo")
|
||||
user_msg = inp.get("user_message", "")
|
||||
commands = out.get("commands_generated", []) or out.get("commands", [])
|
||||
message = out.get("message", "")
|
||||
|
||||
system = GOD_SYSTEM if mode == "god" else SUDO_SYSTEM
|
||||
|
||||
response = {"commands": commands}
|
||||
if message:
|
||||
response["message"] = message
|
||||
|
||||
return {
|
||||
"conversations": [
|
||||
{"role": "system", "content": "/no_think\n" + system},
|
||||
{"role": "user", "content": user_msg},
|
||||
{"role": "assistant", "content": json.dumps(response)},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _self_play_to_conversations(record: dict) -> dict:
|
||||
"""Convert self_play format to conversations."""
|
||||
inp = record.get("input", {})
|
||||
out = record.get("output", {})
|
||||
user_msg = inp.get("user_message", "")
|
||||
commands = out.get("commands", [])
|
||||
reasoning = out.get("reasoning", "")
|
||||
message = out.get("message", record.get("message", ""))
|
||||
|
||||
if user_msg.lower().startswith("pray "):
|
||||
system = GOD_SYSTEM
|
||||
else:
|
||||
system = SUDO_SYSTEM
|
||||
|
||||
response = {"commands": commands, "reasoning": reasoning}
|
||||
if message:
|
||||
response["message"] = message
|
||||
|
||||
return {
|
||||
"conversations": [
|
||||
{"role": "system", "content": "/no_think\n" + system},
|
||||
{"role": "user", "content": user_msg},
|
||||
{"role": "assistant", "content": json.dumps(response)},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _exploration_to_conversations(record: dict) -> dict:
|
||||
"""Convert exploration format to conversations."""
|
||||
inp = record.get("input", {})
|
||||
out = record.get("output", {})
|
||||
user_msg = inp.get("user_message", "") if isinstance(inp, dict) else str(inp)
|
||||
commands = out.get("commands", [])
|
||||
reasoning = out.get("reasoning", "")
|
||||
|
||||
response = {"commands": commands, "reasoning": reasoning}
|
||||
|
||||
return {
|
||||
"conversations": [
|
||||
{"role": "system", "content": "/no_think\n" + SUDO_SYSTEM},
|
||||
{"role": "user", "content": user_msg},
|
||||
{"role": "assistant", "content": json.dumps(response)},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _tool_messages_passthrough(record: dict) -> dict:
|
||||
"""Tool training already has messages — pass through or use qwen3_text."""
|
||||
if "qwen3_text" in record:
|
||||
return {"text": record["qwen3_text"]}
|
||||
if "messages" in record:
|
||||
return {"conversations": record["messages"]}
|
||||
return None
|
||||
|
||||
|
||||
def _raw_training_to_conversations(record: dict) -> dict:
|
||||
"""Convert raw training files (same as seed format)."""
|
||||
return _seed_to_conversations(record)
|
||||
|
||||
|
||||
CONVERTERS = {
|
||||
"seed": _seed_to_conversations,
|
||||
"tool_messages": _tool_messages_passthrough,
|
||||
"audit": _audit_to_conversations,
|
||||
"self_play": _self_play_to_conversations,
|
||||
"exploration": _exploration_to_conversations,
|
||||
"raw_training": _raw_training_to_conversations,
|
||||
}
|
||||
|
||||
|
||||
# ── Pipeline ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def dedup_key(record: dict) -> str:
|
||||
"""Generate a dedup key from the training content."""
|
||||
if "text" in record:
|
||||
content = record["text"][:500]
|
||||
elif "conversations" in record:
|
||||
# Use user message + first 200 chars of assistant response
|
||||
user = ""
|
||||
asst = ""
|
||||
for msg in record["conversations"]:
|
||||
if msg["role"] == "user":
|
||||
user = msg["content"][:200]
|
||||
elif msg["role"] == "assistant" and not asst:
|
||||
asst = msg["content"][:200]
|
||||
content = user + "|" + asst
|
||||
else:
|
||||
content = json.dumps(record)[:500]
|
||||
return hashlib.md5(content.encode()).hexdigest()
|
||||
|
||||
|
||||
def load_and_convert(source_name: str, meta: dict, ratio: float) -> list:
|
||||
"""Load a source file, convert to training format, apply ratio."""
|
||||
path = PROJECT_ROOT / meta["path"]
|
||||
if not path.exists():
|
||||
if meta.get("optional"):
|
||||
return []
|
||||
print(f" WARNING: {path} not found, skipping {source_name}")
|
||||
return []
|
||||
|
||||
converter = CONVERTERS[meta["format"]]
|
||||
records = []
|
||||
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
raw = json.loads(line)
|
||||
converted = converter(raw)
|
||||
if converted:
|
||||
records.append(converted)
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
continue
|
||||
|
||||
# Apply ratio (oversample or downsample)
|
||||
if ratio > 1.0:
|
||||
# Oversample: duplicate records
|
||||
full_copies = int(ratio)
|
||||
partial = ratio - full_copies
|
||||
oversampled = records * full_copies
|
||||
if partial > 0:
|
||||
extra = random.sample(records, int(len(records) * partial))
|
||||
oversampled.extend(extra)
|
||||
records = oversampled
|
||||
elif ratio < 1.0:
|
||||
# Downsample
|
||||
k = max(1, int(len(records) * ratio))
|
||||
records = random.sample(records, k)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Merge datasets for Mortdecai training")
|
||||
parser.add_argument("--output", type=Path,
|
||||
default=PROJECT_ROOT / "data" / "processed" / "merged_training_v06.jsonl")
|
||||
parser.add_argument("--ratios", default="",
|
||||
help="Override ratios: seed=2.0,tool=1.0,iglu=0.5")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Print stats without writing output")
|
||||
parser.add_argument("--include-chat-logs", action="store_true",
|
||||
help="Include chat app training exports")
|
||||
parser.add_argument("--include-raw", action="store_true", default=True,
|
||||
help="Include raw training files (default: true)")
|
||||
parser.add_argument("--seed", type=int, default=42,
|
||||
help="Random seed for reproducibility")
|
||||
args = parser.parse_args()
|
||||
|
||||
random.seed(args.seed)
|
||||
|
||||
# Parse ratio overrides
|
||||
ratio_overrides = {}
|
||||
if args.ratios:
|
||||
for pair in args.ratios.split(","):
|
||||
name, val = pair.split("=")
|
||||
ratio_overrides[name.strip()] = float(val.strip())
|
||||
|
||||
# Filter sources
|
||||
active_sources = dict(SOURCES)
|
||||
if not args.include_chat_logs:
|
||||
active_sources.pop("chat_logs", None)
|
||||
|
||||
print("Mortdecai Dataset Merge Pipeline")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
all_records = []
|
||||
stats = {}
|
||||
|
||||
# Load named sources
|
||||
for name, meta in active_sources.items():
|
||||
ratio = ratio_overrides.get(name, meta["default_ratio"])
|
||||
records = load_and_convert(name, meta, ratio)
|
||||
raw_count = 0
|
||||
path = PROJECT_ROOT / meta["path"]
|
||||
if path.exists():
|
||||
with open(path) as f:
|
||||
raw_count = sum(1 for _ in f)
|
||||
|
||||
stats[name] = {"raw": raw_count, "after_ratio": len(records), "ratio": ratio}
|
||||
all_records.extend(records)
|
||||
print(f" {name:<20s} {raw_count:>6} raw x{ratio:.1f} = {len(records):>7} ({meta['description']})")
|
||||
|
||||
# Load raw training files
|
||||
if args.include_raw:
|
||||
raw_total = 0
|
||||
for filepath in RAW_TRAINING_FILES:
|
||||
path = PROJECT_ROOT / filepath
|
||||
if not path.exists():
|
||||
continue
|
||||
converter = CONVERTERS["raw_training"]
|
||||
count = 0
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
try:
|
||||
raw = json.loads(line.strip())
|
||||
converted = converter(raw)
|
||||
if converted:
|
||||
all_records.append(converted)
|
||||
count += 1
|
||||
except:
|
||||
continue
|
||||
raw_total += count
|
||||
stats["raw_files"] = {"raw": raw_total, "after_ratio": raw_total, "ratio": 1.0}
|
||||
print(f" {'raw_files':<20s} {raw_total:>6} raw x1.0 = {raw_total:>7} ({len(RAW_TRAINING_FILES)} files)")
|
||||
|
||||
print()
|
||||
print(f" Total before dedup: {len(all_records)}")
|
||||
|
||||
# Deduplicate
|
||||
seen = set()
|
||||
deduped = []
|
||||
for r in all_records:
|
||||
key = dedup_key(r)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
deduped.append(r)
|
||||
|
||||
dupes_removed = len(all_records) - len(deduped)
|
||||
print(f" Duplicates removed: {dupes_removed}")
|
||||
print(f" Total after dedup: {len(deduped)}")
|
||||
|
||||
# Count format split
|
||||
text_count = sum(1 for r in deduped if "text" in r)
|
||||
conv_count = sum(1 for r in deduped if "conversations" in r)
|
||||
print(f" Format: {conv_count} conversations, {text_count} pre-formatted text")
|
||||
|
||||
# Shuffle
|
||||
random.shuffle(deduped)
|
||||
|
||||
if args.dry_run:
|
||||
print("\n [DRY RUN] No output written.")
|
||||
return
|
||||
|
||||
# Write
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.output, "w") as f:
|
||||
for r in deduped:
|
||||
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
||||
|
||||
print(f"\n Wrote {len(deduped)} examples to {args.output}")
|
||||
print(f" File size: {args.output.stat().st_size / 1e6:.1f} MB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tool-focused self-play — exercises all 14 tools on a live dev server.
|
||||
Tool-focused self-play — exercises all 17 tools on a live dev server.
|
||||
|
||||
Unlike regular self-play (which tests command generation), this script
|
||||
specifically generates prompts that require tool use: script writing,
|
||||
memory operations, entity scanning, wiki lookups, and chained multi-tool
|
||||
flows. Runs on the dev server via RCON.
|
||||
memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
|
||||
and chained multi-tool flows. Runs on the dev server via RCON.
|
||||
|
||||
The model responds, its tool calls get executed for real, and the full
|
||||
interaction (prompt + tool calls + results + final response) gets logged
|
||||
@@ -15,7 +15,11 @@ Usage:
|
||||
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
|
||||
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
|
||||
|
||||
# Or via the scheduler preset
|
||||
# Load extra prompts from prayer bank
|
||||
python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
|
||||
|
||||
# Focus on weak categories only
|
||||
python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -34,10 +38,91 @@ import requests
|
||||
from agent.tools.persistent_rcon import get_rcon
|
||||
|
||||
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
|
||||
PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"
|
||||
|
||||
# ── Prompt categories that exercise specific tools ─────────────────────────
|
||||
# ── Template variables for prompt expansion ────────────────────────────────
|
||||
|
||||
PROMPTS = {
|
||||
TEMPLATE_VARS = {
|
||||
"player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
|
||||
"target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
|
||||
"region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
|
||||
"warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
|
||||
"group": ["vip", "builder", "moderator", "default"],
|
||||
"world": ["world", "world_nether", "world_the_end"],
|
||||
}
|
||||
|
||||
|
||||
def expand_template(prompt: str) -> str:
|
||||
"""Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
|
||||
for key, values in TEMPLATE_VARS.items():
|
||||
token = "{" + key + "}"
|
||||
while token in prompt:
|
||||
prompt = prompt.replace(token, random.choice(values), 1)
|
||||
return prompt
|
||||
|
||||
|
||||
def load_prompts(prompts_dir: Path = PROMPTS_DIR,
|
||||
mode_filter: str = None,
|
||||
call_type_filter: str = None) -> dict[str, list[str]]:
|
||||
"""Load prompt templates from per-category JSONL files.
|
||||
|
||||
Args:
|
||||
prompts_dir: Directory containing manifest.json and prompt JSONL files.
|
||||
mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
|
||||
call_type_filter: If set, only load categories matching this call type (model/gateway).
|
||||
|
||||
Returns:
|
||||
Dict mapping category name -> list of prompt template strings.
|
||||
Falls back to inline PROMPTS_FALLBACK if files don't exist.
|
||||
"""
|
||||
manifest_path = prompts_dir / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
print(f" Warning: {manifest_path} not found, using inline fallback")
|
||||
return PROMPTS_FALLBACK
|
||||
|
||||
with open(manifest_path) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
prompts = {}
|
||||
for category, meta in manifest.items():
|
||||
# Apply filters
|
||||
if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
|
||||
continue
|
||||
if call_type_filter and meta.get("call_type") != call_type_filter:
|
||||
continue
|
||||
|
||||
filepath = prompts_dir / meta["file"]
|
||||
if not filepath.exists():
|
||||
print(f" Warning: {filepath} not found, skipping {category}")
|
||||
continue
|
||||
cat_prompts = []
|
||||
with open(filepath) as f:
|
||||
for line in f:
|
||||
entry = json.loads(line)
|
||||
cat_prompts.append(entry["prompt"])
|
||||
prompts[category] = cat_prompts
|
||||
|
||||
print(f" Loaded {sum(len(v) for v in prompts.values())} prompts "
|
||||
f"from {len(prompts)} categories")
|
||||
return prompts
|
||||
|
||||
|
||||
def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
|
||||
"""Load the prompt manifest with full metadata.
|
||||
|
||||
Used by the chat app for template selection UI.
|
||||
Returns the raw manifest dict with mode, call_type, count per category.
|
||||
"""
|
||||
manifest_path = prompts_dir / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return {}
|
||||
with open(manifest_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ── Inline fallback (subset, used if prompt files missing) ─────────────────
|
||||
|
||||
PROMPTS_FALLBACK = {
|
||||
"script_build": [
|
||||
"sudo build me a small cobblestone house with a door and windows",
|
||||
"sudo create a fighting arena with red and blue corners",
|
||||
@@ -107,6 +192,37 @@ PROMPTS = {
|
||||
"sudo how does fortune work on ores?",
|
||||
"sudo what are all the copper variants?",
|
||||
"sudo how do trial spawners work?",
|
||||
"sudo what does the breeze drop?",
|
||||
"sudo how do you tame an armadillo?",
|
||||
"sudo what biomes have cherry blossoms?",
|
||||
],
|
||||
"plugin_docs": [
|
||||
"sudo how do I create a WorldGuard region?",
|
||||
"sudo what flags can I set on a region?",
|
||||
"sudo how does CoreProtect rollback work?",
|
||||
"sudo what's the command for LuckPerms group inheritance?",
|
||||
"sudo how do I set up EssentialsX warps?",
|
||||
"sudo what are the WorldEdit brush commands?",
|
||||
"sudo how do I configure CoreProtect auto-purge?",
|
||||
"sudo what permissions does the builder group need for WorldEdit?",
|
||||
"sudo how do I set a WorldGuard greeting message?",
|
||||
"sudo what's the difference between /rg flag and /rg addmember?",
|
||||
],
|
||||
"changelog_lookup": [
|
||||
"sudo what changed in 1.21?",
|
||||
"sudo what was added in the tricky trials update?",
|
||||
"sudo when were trial chambers added?",
|
||||
"sudo what's new with the mace weapon?",
|
||||
"sudo what version added the breeze mob?",
|
||||
"sudo what got nerfed in the latest update?",
|
||||
],
|
||||
"paper_docs": [
|
||||
"sudo how do I set the view distance on Paper?",
|
||||
"sudo what Paper config controls mob spawning rates?",
|
||||
"sudo how do I enable async chunk loading?",
|
||||
"sudo what's the Paper command to reload config?",
|
||||
"sudo how do I optimize TPS on Paper?",
|
||||
"sudo what Paper settings affect redstone performance?",
|
||||
],
|
||||
"player_info": [
|
||||
"sudo build a wall around me",
|
||||
@@ -215,9 +331,71 @@ PROMPTS = {
|
||||
"pray smite TheBigBoss for griefing",
|
||||
"pray make me a temple worthy of your glory",
|
||||
],
|
||||
# ── Direct command passthrough — teach faithful execution ──
|
||||
"direct_passthrough": [
|
||||
# WorldGuard — exact commands
|
||||
'sudo run this exactly: rg define test-region',
|
||||
'sudo run this exactly: rg flag test-region pvp deny',
|
||||
'sudo run this exactly: rg flag test-region mob-spawning deny',
|
||||
'sudo run this exactly: rg addmember test-region Ace13245',
|
||||
'sudo run this exactly: rg removemember test-region Ace13245',
|
||||
'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
|
||||
'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
|
||||
'sudo run this exactly: rg list',
|
||||
'sudo run this exactly: rg info test-region',
|
||||
'sudo run this exactly: rg remove test-region',
|
||||
# CoreProtect — exact commands
|
||||
'sudo run this exactly: co status',
|
||||
'sudo run this exactly: co lookup u:Ace13245 t:1h',
|
||||
'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
|
||||
'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
|
||||
'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
|
||||
'sudo run this exactly: co inspect',
|
||||
'sudo run this exactly: co lookup t:30m r:10 a:container',
|
||||
# LuckPerms — exact commands
|
||||
'sudo run this exactly: lp creategroup vip',
|
||||
'sudo run this exactly: lp group vip permission set essentials.fly true',
|
||||
'sudo run this exactly: lp group vip permission set essentials.heal true',
|
||||
'sudo run this exactly: lp user Ace13245 parent add vip',
|
||||
'sudo run this exactly: lp user Ace13245 parent remove vip',
|
||||
'sudo run this exactly: lp user Ace13245 info',
|
||||
'sudo run this exactly: lp group vip info',
|
||||
'sudo run this exactly: lp listgroups',
|
||||
'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
|
||||
'sudo run this exactly: lp deletegroup vip',
|
||||
# EssentialsX — exact commands
|
||||
'sudo run this exactly: heal Ace13245',
|
||||
'sudo run this exactly: feed Ace13245',
|
||||
'sudo run this exactly: eco give Ace13245 1000',
|
||||
'sudo run this exactly: eco take Ace13245 500',
|
||||
'sudo run this exactly: bal Ace13245',
|
||||
'sudo run this exactly: broadcast Welcome to the server!',
|
||||
'sudo run this exactly: setwarp arena',
|
||||
'sudo run this exactly: warp arena',
|
||||
'sudo run this exactly: delwarp arena',
|
||||
'sudo run this exactly: nick Ace13245 DragonLord',
|
||||
# FAWE — exact commands
|
||||
'sudo run this exactly: /worldedit version',
|
||||
],
|
||||
# ── Correction examples — model should fix wrong syntax ──
|
||||
"direct_correction": [
|
||||
'sudo gamemode slingshooter08 creative', # wrong arg order
|
||||
'sudo give slingshooter08 minecraft:bed 1', # should be white_bed
|
||||
'sudo effect slingshooter08 night_vision', # missing give and duration
|
||||
'sudo weather thunderstorm', # should be thunder
|
||||
'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1', # wrong enchant syntax
|
||||
'sudo tp 100 64 100', # missing player
|
||||
'sudo kill zombie 50', # wrong kill syntax
|
||||
'sudo enchant slingshooter08 sharpness 10', # max is 5
|
||||
'sudo effect give slingshooter08 minecraft:haste 99999', # duration too long
|
||||
'sudo fill 0 0 0 100 100 100 diamond_block', # too large, missing namespace
|
||||
'sudo rg define', # missing region name
|
||||
'sudo co rollback Ace13245 1h', # missing u: and t: prefixes
|
||||
'sudo lp addgroup vip Ace13245', # wrong syntax (should be lp user X parent add Y)
|
||||
],
|
||||
}
|
||||
|
||||
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
|
||||
PLAYERS = TEMPLATE_VARS["player"]
|
||||
|
||||
|
||||
def query_model(prompt, player, ollama_url, model, rcon):
|
||||
@@ -225,7 +403,8 @@ def query_model(prompt, player, ollama_url, model, rcon):
|
||||
system = (
|
||||
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
||||
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
|
||||
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
|
||||
"Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
|
||||
"minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
|
||||
"world.server_state, world.nearby_entities, memory.read, memory.write, "
|
||||
"script.write, script.validate, script.execute, script.read, script.list, "
|
||||
"script.delete, script.schedule.\n\n"
|
||||
@@ -246,7 +425,7 @@ def query_model(prompt, player, ollama_url, model, rcon):
|
||||
{"role": "user", "content": f"Player {player}: {prompt}"},
|
||||
],
|
||||
"stream": False, "format": "json",
|
||||
"options": {"temperature": 0.4, "num_predict": 800},
|
||||
"options": {"temperature": 0.85, "num_predict": 800},
|
||||
}, timeout=120)
|
||||
|
||||
content = r.json()["message"]["content"]
|
||||
@@ -272,9 +451,10 @@ def validate_commands(commands, rcon):
|
||||
return results
|
||||
|
||||
|
||||
def run_round(category, ollama_url, model, rcon, player):
|
||||
def run_round(category, ollama_url, model, rcon, player, prompts):
|
||||
"""Run one self-play round for a specific tool category."""
|
||||
prompt = random.choice(PROMPTS[category])
|
||||
raw_prompt = random.choice(prompts[category])
|
||||
prompt = expand_template(raw_prompt)
|
||||
|
||||
print(f" [{category:18s}] {prompt[:60]}")
|
||||
start = time.time()
|
||||
@@ -336,12 +516,13 @@ def run_round(category, ollama_url, model, rcon, player):
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Tool-focused self-play")
|
||||
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
|
||||
parser.add_argument("--model", default="mortdecai:0.4.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.112")
|
||||
parser.add_argument("--model", default="mortdecai:0.5.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.244")
|
||||
parser.add_argument("--rcon-port", type=int, default=25578)
|
||||
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
||||
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
|
||||
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
|
||||
parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
|
||||
parser.add_argument("--output", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -350,8 +531,23 @@ def main():
|
||||
|
||||
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
||||
|
||||
# Load prompts from template files (falls back to inline)
|
||||
prompts = load_prompts(PROMPTS_DIR)
|
||||
|
||||
# Load extra prompts from prompt bank (quarantine salvage, etc.)
|
||||
if args.prompt_bank:
|
||||
bank_path = Path(args.prompt_bank)
|
||||
if bank_path.exists():
|
||||
bank_prompts = []
|
||||
with open(bank_path) as f:
|
||||
for line in f:
|
||||
entry = json.loads(line)
|
||||
bank_prompts.append(entry["prompt"])
|
||||
prompts["prompt_bank"] = bank_prompts
|
||||
print(f" Loaded {len(bank_prompts)} prompts from {bank_path}")
|
||||
|
||||
if args.categories == "all":
|
||||
categories = list(PROMPTS.keys())
|
||||
categories = list(prompts.keys())
|
||||
else:
|
||||
categories = [c.strip() for c in args.categories.split(",")]
|
||||
|
||||
@@ -372,7 +568,7 @@ def main():
|
||||
|
||||
for cat in categories:
|
||||
player = random.choice(PLAYERS)
|
||||
example = run_round(cat, args.ollama_url, args.model, rcon, player)
|
||||
example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)
|
||||
|
||||
stats["total"] += 1
|
||||
if example is None:
|
||||
|
||||
Reference in New Issue
Block a user