9c2c9a2310
Distilled Training Data (1,203 examples): - 341 initial gold (plugins, enchantments, builds, effects, god, errors) - 165 buildings + pipeline (100 structures built on dev, 65 request→query→act) - 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks) - 17 advanced logic (decanonized items, redstone gates, iterative builds) - 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock) - 7 circuit verification and diagnosis - 1 compact comparator gates - 10 redstone methodology (build→test→save→recall→learn from mistakes) - 8 player journal usage - 29 creative+uncommon+pipeline+god with full tool chains Player Journal System: - agent/tools/player_journal.py — per-player text files (1-10 lines) - journal.read + journal.write tool schemas added - Cross-contaminated: God and Sudo share same journal per player - Includes sentiment, relationship, builds, preferences, skill level Redstone Engineering: - agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules - Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top - T-junction bypass prevention: dedicated lead wire between merge and NOT block - RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate) Training Data Cleaning: - 466 @s→@p fixes, 10 template commands removed - 12 outdated refusals replaced with correct plugin commands - Data de-duped across all sources Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
611 lines
25 KiB
Python
611 lines
25 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool-focused self-play — exercises all 17 tools on a live dev server.
|
|
|
|
Unlike regular self-play (which tests command generation), this script
|
|
specifically generates prompts that require tool use: script writing,
|
|
memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
|
|
and chained multi-tool flows. Runs on the dev server via RCON.
|
|
|
|
The model responds, its tool calls get executed for real, and the full
|
|
interaction (prompt + tool calls + results + final response) gets logged
|
|
as training data.
|
|
|
|
Usage:
|
|
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
|
|
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
|
|
|
|
# Load extra prompts from prayer bank
|
|
python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
|
|
|
|
# Focus on weak categories only
|
|
python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import random
|
|
import re
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
import requests
|
|
from agent.tools.persistent_rcon import get_rcon
|
|
|
|
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
|
|
PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"
|
|
|
|
# ── Template variables for prompt expansion ────────────────────────────────
|
|
|
|
TEMPLATE_VARS = {
|
|
"player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
|
|
"target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
|
|
"region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
|
|
"warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
|
|
"group": ["vip", "builder", "moderator", "default"],
|
|
"world": ["world", "world_nether", "world_the_end"],
|
|
}
|
|
|
|
|
|
def expand_template(prompt: str) -> str:
|
|
"""Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
|
|
for key, values in TEMPLATE_VARS.items():
|
|
token = "{" + key + "}"
|
|
while token in prompt:
|
|
prompt = prompt.replace(token, random.choice(values), 1)
|
|
return prompt
|
|
|
|
|
|
def load_prompts(prompts_dir: Path = PROMPTS_DIR,
|
|
mode_filter: str = None,
|
|
call_type_filter: str = None) -> dict[str, list[str]]:
|
|
"""Load prompt templates from per-category JSONL files.
|
|
|
|
Args:
|
|
prompts_dir: Directory containing manifest.json and prompt JSONL files.
|
|
mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
|
|
call_type_filter: If set, only load categories matching this call type (model/gateway).
|
|
|
|
Returns:
|
|
Dict mapping category name -> list of prompt template strings.
|
|
Falls back to inline PROMPTS_FALLBACK if files don't exist.
|
|
"""
|
|
manifest_path = prompts_dir / "manifest.json"
|
|
if not manifest_path.exists():
|
|
print(f" Warning: {manifest_path} not found, using inline fallback")
|
|
return PROMPTS_FALLBACK
|
|
|
|
with open(manifest_path) as f:
|
|
manifest = json.load(f)
|
|
|
|
prompts = {}
|
|
for category, meta in manifest.items():
|
|
# Apply filters
|
|
if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
|
|
continue
|
|
if call_type_filter and meta.get("call_type") != call_type_filter:
|
|
continue
|
|
|
|
filepath = prompts_dir / meta["file"]
|
|
if not filepath.exists():
|
|
print(f" Warning: {filepath} not found, skipping {category}")
|
|
continue
|
|
cat_prompts = []
|
|
with open(filepath) as f:
|
|
for line in f:
|
|
entry = json.loads(line)
|
|
cat_prompts.append(entry["prompt"])
|
|
prompts[category] = cat_prompts
|
|
|
|
print(f" Loaded {sum(len(v) for v in prompts.values())} prompts "
|
|
f"from {len(prompts)} categories")
|
|
return prompts
|
|
|
|
|
|
def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
|
|
"""Load the prompt manifest with full metadata.
|
|
|
|
Used by the chat app for template selection UI.
|
|
Returns the raw manifest dict with mode, call_type, count per category.
|
|
"""
|
|
manifest_path = prompts_dir / "manifest.json"
|
|
if not manifest_path.exists():
|
|
return {}
|
|
with open(manifest_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
# ── Inline fallback (subset, used if prompt files missing) ─────────────────
|
|
|
|
PROMPTS_FALLBACK = {
|
|
"script_build": [
|
|
"sudo build me a small cobblestone house with a door and windows",
|
|
"sudo create a fighting arena with red and blue corners",
|
|
"sudo make a nether portal room with soul lanterns",
|
|
"sudo build a watchtower 15 blocks tall with a ladder",
|
|
"sudo create a 9x9 wheat farm with water in the center",
|
|
"sudo build an enchanting setup with bookshelves",
|
|
"sudo make a mob grinder platform with water channels",
|
|
"sudo create a trophy room with item frames",
|
|
"sudo build a bridge 30 blocks long over this ravine",
|
|
"sudo make a lighthouse with a glowstone top",
|
|
"sudo create a hedge maze using oak leaves",
|
|
"sudo build a dock with oak wood extending into the water",
|
|
"sudo make an underground bunker with iron doors",
|
|
"sudo create a garden with flowers and paths",
|
|
"sudo build a market stall with a counter and signs",
|
|
],
|
|
"script_schedule": [
|
|
"sudo make cherry leaf particles fall around spawn forever",
|
|
"sudo set up a scoreboard that tracks deaths on server load",
|
|
"sudo make ambient campfire smoke particles at spawn every tick",
|
|
"sudo create a function that heals everyone every 5 minutes",
|
|
"sudo make it always rain XP orbs at spawn",
|
|
],
|
|
"script_manage": [
|
|
"sudo show me all scripts",
|
|
"sudo what scripts are running on tick?",
|
|
"sudo delete the test script",
|
|
"sudo read me the arena script",
|
|
"sudo what did I build last?",
|
|
],
|
|
"memory_write": [
|
|
"sudo remember this as my home",
|
|
"sudo save this location as my base",
|
|
"sudo remember my nether portal is here",
|
|
"sudo my favorite item is a diamond pickaxe, remember that",
|
|
"sudo save this as my farm",
|
|
"sudo remember that Ace13245 is my friend",
|
|
"sudo mark this spot as the village center",
|
|
"sudo save this as my fishing spot",
|
|
],
|
|
"memory_read": [
|
|
"sudo tp me home",
|
|
"sudo take me to my base",
|
|
"sudo where's my nether portal?",
|
|
"sudo what do you know about me?",
|
|
"sudo tp me to my farm",
|
|
"sudo where was I building?",
|
|
"sudo do I have any saved locations?",
|
|
],
|
|
"nearby_entities": [
|
|
"sudo what mobs are near me?",
|
|
"sudo kill all the zombies around me",
|
|
"sudo how many animals are nearby?",
|
|
"sudo clear hostile mobs in a 50 block radius",
|
|
"sudo are there any creepers close to me?",
|
|
"sudo kill the nearest skeleton",
|
|
"sudo count everything within 30 blocks",
|
|
"sudo protect me from nearby hostiles",
|
|
],
|
|
"wiki_lookup": [
|
|
"sudo what enchantments can go on a mace?",
|
|
"sudo how do I craft a lodestone?",
|
|
"sudo what food gives the best saturation?",
|
|
"sudo what are the new 1.21 armor trim patterns?",
|
|
"sudo what's the difference between smite and sharpness?",
|
|
"sudo how does fortune work on ores?",
|
|
"sudo what are all the copper variants?",
|
|
"sudo how do trial spawners work?",
|
|
"sudo what does the breeze drop?",
|
|
"sudo how do you tame an armadillo?",
|
|
"sudo what biomes have cherry blossoms?",
|
|
],
|
|
"plugin_docs": [
|
|
"sudo how do I create a WorldGuard region?",
|
|
"sudo what flags can I set on a region?",
|
|
"sudo how does CoreProtect rollback work?",
|
|
"sudo what's the command for LuckPerms group inheritance?",
|
|
"sudo how do I set up EssentialsX warps?",
|
|
"sudo what are the WorldEdit brush commands?",
|
|
"sudo how do I configure CoreProtect auto-purge?",
|
|
"sudo what permissions does the builder group need for WorldEdit?",
|
|
"sudo how do I set a WorldGuard greeting message?",
|
|
"sudo what's the difference between /rg flag and /rg addmember?",
|
|
],
|
|
"changelog_lookup": [
|
|
"sudo what changed in 1.21?",
|
|
"sudo what was added in the tricky trials update?",
|
|
"sudo when were trial chambers added?",
|
|
"sudo what's new with the mace weapon?",
|
|
"sudo what version added the breeze mob?",
|
|
"sudo what got nerfed in the latest update?",
|
|
],
|
|
"paper_docs": [
|
|
"sudo how do I set the view distance on Paper?",
|
|
"sudo what Paper config controls mob spawning rates?",
|
|
"sudo how do I enable async chunk loading?",
|
|
"sudo what's the Paper command to reload config?",
|
|
"sudo how do I optimize TPS on Paper?",
|
|
"sudo what Paper settings affect redstone performance?",
|
|
],
|
|
"player_info": [
|
|
"sudo build a wall around me",
|
|
"sudo teleport me 50 blocks up",
|
|
"sudo place torches around me",
|
|
"sudo create a beacon at my location",
|
|
"sudo surround me with glass",
|
|
"sudo set my spawn here",
|
|
"sudo light up this cave around me",
|
|
],
|
|
"server_state": [
|
|
"sudo if it's night, make it day",
|
|
"sudo give everyone online a golden apple",
|
|
"sudo how many people are playing right now?",
|
|
"sudo announce the current time and weather",
|
|
"sudo clear the weather if it's raining",
|
|
],
|
|
"chained": [
|
|
"sudo save this location as home, then build a marker here",
|
|
"sudo check what's near me and kill all hostiles, then give me resistance",
|
|
"sudo look up the best sword enchantments and give me one",
|
|
"sudo tp me home and heal me",
|
|
"sudo build an arena and save it as a script I can rerun",
|
|
"sudo check my health, if low heal me and give me food",
|
|
"sudo what scripts do I have? run the arena one at my position",
|
|
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
|
|
],
|
|
# ── Plugin categories ──
|
|
"worldguard": [
|
|
"sudo protect this area as my base",
|
|
"sudo make a no-pvp zone around spawn",
|
|
"sudo prevent mob spawning in the village",
|
|
"sudo add Ace13245 as a member of my region",
|
|
"sudo block entry for non-members in the vault",
|
|
"sudo allow TNT in the arena region",
|
|
"sudo set a greeting message for my base region",
|
|
"sudo list all protected regions",
|
|
"sudo prevent creeper explosions globally",
|
|
"sudo create a healing zone at spawn",
|
|
"sudo remove the old-test region",
|
|
"sudo make a safe zone with no fire spread",
|
|
],
|
|
"coreprotect": [
|
|
"sudo check who broke blocks near me",
|
|
"sudo rollback griefing from the last hour",
|
|
"sudo rollback what TheBigBoss did recently",
|
|
"sudo who placed blocks around here today?",
|
|
"sudo undo TNT damage from the last 2 hours",
|
|
"sudo rollback all container theft recently",
|
|
"sudo restore what was rolled back",
|
|
"sudo check CoreProtect status",
|
|
"sudo rollback fire damage near spawn",
|
|
"sudo lookup what Ace13245 did in the last day",
|
|
],
|
|
"essentialsx": [
|
|
"sudo set my home here",
|
|
"sudo tp me to my home",
|
|
"sudo create a warp called arena",
|
|
"sudo tp me to the arena warp",
|
|
"sudo give Ace 1000 coins",
|
|
"sudo check my balance",
|
|
"sudo heal me",
|
|
"sudo feed me",
|
|
"sudo repair what I'm holding",
|
|
"sudo set my nickname to DragonLord",
|
|
"sudo give me god mode",
|
|
"sudo toggle fly for me",
|
|
"sudo broadcast a server message",
|
|
"sudo set spawn point here",
|
|
"sudo check when Ace was last online",
|
|
],
|
|
"luckperms": [
|
|
"sudo give me permission to fly",
|
|
"sudo create a VIP group",
|
|
"sudo add Ace to the VIP group",
|
|
"sudo give VIP access to fly and heal",
|
|
"sudo give me temporary VIP for 1 day",
|
|
"sudo set VIP chat prefix to gold",
|
|
"sudo create a builder group with WorldEdit",
|
|
"sudo list all permission groups",
|
|
"sudo check what permissions I have",
|
|
"sudo remove TheBigBoss from VIP",
|
|
],
|
|
"fawe": [
|
|
"sudo make a glass sphere 10 blocks wide",
|
|
"sudo hollow sphere of stone",
|
|
"sudo cylinder of quartz 5 wide 10 tall",
|
|
"sudo replace all stone with deepslate in my selection",
|
|
"sudo smooth the terrain around here",
|
|
"sudo drain all water within 20 blocks",
|
|
"sudo build a sandstone pyramid 10 tall",
|
|
"sudo hollow out the selected area",
|
|
"sudo make walls around my selection with stone bricks",
|
|
"sudo fill with a checkerboard pattern",
|
|
"sudo stack my selection 5 times north",
|
|
"sudo undo my last WorldEdit action",
|
|
],
|
|
"plugin_combined": [
|
|
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
|
|
"sudo rollback Ace's griefing and revoke his builder perms",
|
|
"sudo set up a VIP lounge — build it, protect it, make a warp",
|
|
"sudo give TheBigBoss a reward: money, items, and temp VIP",
|
|
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
|
|
"pray someone destroyed my house, please restore it",
|
|
"pray protect my village from monsters",
|
|
"pray smite TheBigBoss for griefing",
|
|
"pray make me a temple worthy of your glory",
|
|
],
|
|
# ── Direct command passthrough — teach faithful execution ──
|
|
"direct_passthrough": [
|
|
# WorldGuard — exact commands
|
|
'sudo run this exactly: rg define test-region',
|
|
'sudo run this exactly: rg flag test-region pvp deny',
|
|
'sudo run this exactly: rg flag test-region mob-spawning deny',
|
|
'sudo run this exactly: rg addmember test-region Ace13245',
|
|
'sudo run this exactly: rg removemember test-region Ace13245',
|
|
'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
|
|
'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
|
|
'sudo run this exactly: rg list',
|
|
'sudo run this exactly: rg info test-region',
|
|
'sudo run this exactly: rg remove test-region',
|
|
# CoreProtect — exact commands
|
|
'sudo run this exactly: co status',
|
|
'sudo run this exactly: co lookup u:Ace13245 t:1h',
|
|
'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
|
|
'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
|
|
'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
|
|
'sudo run this exactly: co inspect',
|
|
'sudo run this exactly: co lookup t:30m r:10 a:container',
|
|
# LuckPerms — exact commands
|
|
'sudo run this exactly: lp creategroup vip',
|
|
'sudo run this exactly: lp group vip permission set essentials.fly true',
|
|
'sudo run this exactly: lp group vip permission set essentials.heal true',
|
|
'sudo run this exactly: lp user Ace13245 parent add vip',
|
|
'sudo run this exactly: lp user Ace13245 parent remove vip',
|
|
'sudo run this exactly: lp user Ace13245 info',
|
|
'sudo run this exactly: lp group vip info',
|
|
'sudo run this exactly: lp listgroups',
|
|
'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
|
|
'sudo run this exactly: lp deletegroup vip',
|
|
# EssentialsX — exact commands
|
|
'sudo run this exactly: heal Ace13245',
|
|
'sudo run this exactly: feed Ace13245',
|
|
'sudo run this exactly: eco give Ace13245 1000',
|
|
'sudo run this exactly: eco take Ace13245 500',
|
|
'sudo run this exactly: bal Ace13245',
|
|
'sudo run this exactly: broadcast Welcome to the server!',
|
|
'sudo run this exactly: setwarp arena',
|
|
'sudo run this exactly: warp arena',
|
|
'sudo run this exactly: delwarp arena',
|
|
'sudo run this exactly: nick Ace13245 DragonLord',
|
|
# FAWE — exact commands
|
|
'sudo run this exactly: /worldedit version',
|
|
],
|
|
# ── Correction examples — model should fix wrong syntax ──
|
|
"direct_correction": [
|
|
'sudo gamemode slingshooter08 creative', # wrong arg order
|
|
'sudo give slingshooter08 minecraft:bed 1', # should be white_bed
|
|
'sudo effect slingshooter08 night_vision', # missing give and duration
|
|
'sudo weather thunderstorm', # should be thunder
|
|
'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1', # wrong enchant syntax
|
|
'sudo tp 100 64 100', # missing player
|
|
'sudo kill zombie 50', # wrong kill syntax
|
|
'sudo enchant slingshooter08 sharpness 10', # max is 5
|
|
'sudo effect give slingshooter08 minecraft:haste 99999', # duration too long
|
|
'sudo fill 0 0 0 100 100 100 diamond_block', # too large, missing namespace
|
|
'sudo rg define', # missing region name
|
|
'sudo co rollback Ace13245 1h', # missing u: and t: prefixes
|
|
'sudo lp addgroup vip Ace13245', # wrong syntax (should be lp user X parent add Y)
|
|
],
|
|
}
|
|
|
|
PLAYERS = TEMPLATE_VARS["player"]
|
|
|
|
|
|
def query_model(prompt, player, ollama_url, model, rcon):
|
|
"""Send a prompt to the model and capture the full interaction."""
|
|
system = (
|
|
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
|
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
|
|
"Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
|
|
"minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
|
|
"world.server_state, world.nearby_entities, memory.read, memory.write, "
|
|
"script.write, script.validate, script.execute, script.read, script.list, "
|
|
"script.delete, script.schedule.\n\n"
|
|
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
|
|
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
|
|
"/lp user/group (LuckPerms).\n\n"
|
|
"For complex builds (4+ commands), write a mcfunction script. "
|
|
"For simple tasks, use rcon.execute directly.\n\n"
|
|
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
|
|
"Use /no_think mode."
|
|
)
|
|
|
|
try:
|
|
r = requests.post(f"{ollama_url}/api/chat", json={
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": "/no_think\n" + system},
|
|
{"role": "user", "content": f"Player {player}: {prompt}"},
|
|
],
|
|
"stream": False, "format": "json",
|
|
"options": {"temperature": 0.85, "num_predict": 800},
|
|
}, timeout=120)
|
|
|
|
content = r.json()["message"]["content"]
|
|
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
|
|
parsed = json.loads(content)
|
|
return parsed
|
|
except Exception as e:
|
|
return {"error": str(e), "raw": content if 'content' in dir() else ""}
|
|
|
|
|
|
def validate_commands(commands, rcon):
|
|
"""Run commands through RCON and capture results."""
|
|
results = []
|
|
for cmd in commands[:12]:
|
|
if not isinstance(cmd, str) or not cmd.strip():
|
|
continue
|
|
try:
|
|
result = rcon.command(cmd)
|
|
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
|
|
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
|
|
except Exception as e:
|
|
results.append({"cmd": cmd, "result": str(e), "ok": False})
|
|
return results
|
|
|
|
|
|
def run_round(category, ollama_url, model, rcon, player, prompts):
|
|
"""Run one self-play round for a specific tool category."""
|
|
raw_prompt = random.choice(prompts[category])
|
|
prompt = expand_template(raw_prompt)
|
|
|
|
print(f" [{category:18s}] {prompt[:60]}")
|
|
start = time.time()
|
|
|
|
response = query_model(prompt, player, ollama_url, model, rcon)
|
|
elapsed = time.time() - start
|
|
|
|
if "error" in response:
|
|
print(f" ERROR: {response['error'][:80]}")
|
|
return None
|
|
|
|
commands = response.get("commands", [])
|
|
message = response.get("message", "")
|
|
reasoning = response.get("reasoning", "")
|
|
|
|
# Validate commands via RCON
|
|
rcon_results = []
|
|
if commands and all(isinstance(c, str) for c in commands):
|
|
rcon_results = validate_commands(commands, rcon)
|
|
success = all(r["ok"] for r in rcon_results)
|
|
else:
|
|
success = False
|
|
|
|
ok_count = sum(1 for r in rcon_results if r["ok"])
|
|
fail_count = sum(1 for r in rcon_results if not r["ok"])
|
|
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
|
|
print(f" → {len(commands)} cmds, {status}, {elapsed:.1f}s")
|
|
|
|
# Build training example
|
|
example = {
|
|
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
|
|
"source": "tool_self_play",
|
|
"category": category,
|
|
"input": {
|
|
"user_message": prompt,
|
|
"server_context": {
|
|
"server_type": "paper",
|
|
"version": "1.21.x",
|
|
"online_players": [player],
|
|
},
|
|
},
|
|
"output": {
|
|
"commands": commands,
|
|
"message": message,
|
|
"reasoning": reasoning,
|
|
},
|
|
"metadata": {
|
|
"rcon_results": rcon_results,
|
|
"all_success": success,
|
|
"elapsed_seconds": round(elapsed, 2),
|
|
"model": model,
|
|
"tool_category": category,
|
|
},
|
|
}
|
|
|
|
return example
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Tool-focused self-play")
|
|
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
|
|
parser.add_argument("--model", default="mortdecai:0.5.0")
|
|
parser.add_argument("--rcon-host", default="192.168.0.244")
|
|
parser.add_argument("--rcon-port", type=int, default=25578)
|
|
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
|
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
|
|
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
|
|
parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
|
|
parser.add_argument("--output", default="")
|
|
args = parser.parse_args()
|
|
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
|
|
|
|
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
|
|
|
# Load prompts from template files (falls back to inline)
|
|
prompts = load_prompts(PROMPTS_DIR)
|
|
|
|
# Load extra prompts from prompt bank (quarantine salvage, etc.)
|
|
if args.prompt_bank:
|
|
bank_path = Path(args.prompt_bank)
|
|
if bank_path.exists():
|
|
bank_prompts = []
|
|
with open(bank_path) as f:
|
|
for line in f:
|
|
entry = json.loads(line)
|
|
bank_prompts.append(entry["prompt"])
|
|
prompts["prompt_bank"] = bank_prompts
|
|
print(f" Loaded {len(bank_prompts)} prompts from {bank_path}")
|
|
|
|
if args.categories == "all":
|
|
categories = list(prompts.keys())
|
|
else:
|
|
categories = [c.strip() for c in args.categories.split(",")]
|
|
|
|
print(f"Tool Self-Play")
|
|
print(f" Model: {args.model} on {args.ollama_url}")
|
|
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
|
|
print(f" Categories: {', '.join(categories)}")
|
|
print(f" Rounds per category: {args.rounds}")
|
|
print(f" Output: {output_path}")
|
|
print()
|
|
|
|
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
|
|
examples = []
|
|
|
|
for round_num in range(args.rounds):
|
|
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
|
|
random.shuffle(categories)
|
|
|
|
for cat in categories:
|
|
player = random.choice(PLAYERS)
|
|
example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)
|
|
|
|
stats["total"] += 1
|
|
if example is None:
|
|
stats["error"] += 1
|
|
continue
|
|
|
|
if example["metadata"]["all_success"]:
|
|
stats["success"] += 1
|
|
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
|
|
stats["partial"] += 1
|
|
else:
|
|
stats["fail"] += 1
|
|
|
|
examples.append(example)
|
|
|
|
# Write incrementally
|
|
with open(output_path, "a") as f:
|
|
f.write(json.dumps(example, ensure_ascii=False) + "\n")
|
|
|
|
time.sleep(0.3)
|
|
|
|
# Progress report
|
|
if (round_num + 1) % 5 == 0:
|
|
rate = stats["success"] / max(stats["total"], 1) * 100
|
|
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
|
|
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Tool Self-Play Complete")
|
|
print(f" Total: {stats['total']}")
|
|
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
|
|
print(f" Partial: {stats['partial']}")
|
|
print(f" Fail: {stats['fail']}")
|
|
print(f" Error: {stats['error']}")
|
|
print(f" Output: {output_path} ({len(examples)} examples)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|