GPU scheduler, 14-tool architecture, plugin deployment, event dispatcher
GPU Scheduler (gpu.sethpc.xyz): - Live dashboard with 4 GPUs, training monitor, loss sparklines - Preset-based job scheduler with 3 triggers (time, finish_training, cost) - Model selection per GPU, pipeline configuration - Tool self-play and training pipeline types - Behind Google OAuth, live-refresh without page reload Tool Architecture (14 tools): - 3 new tools: world.nearby_entities, memory.read, memory.write - 7 script.* tools: write, validate, execute, read, list, delete, schedule - ScriptManager: full mcfunction datapack CRUD with RCON validation - Training data: 1,430 tool examples (up from 1,159) Plugin Deployment (paper-ai-25567): - WorldGuard 7.0.12, CoreProtect CE 23.1, EssentialsX 2.21.2, Vault 1.7.3 - Fresh greenfield world reset - 104 RCON-validated plugin training examples Event Dispatcher: - Watches server log for deaths, joins, advancements, PvP kills - Configurable trigger probability and cooldowns per event type - Deployed to dev server, fires god_system prompts on events - 21 event-response training examples Training Infrastructure: - train_lora.py: --save-steps 50, --resume from checkpoint - run_training.sh: stops Ollama, activates conda, restarts after - Passwordless sudo for ollama services on steel141 - Dev server added to MCSManager with autoStart Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,414 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tool-focused self-play — exercises all 14 tools on a live dev server.
|
||||
|
||||
Unlike regular self-play (which tests command generation), this script
|
||||
specifically generates prompts that require tool use: script writing,
|
||||
memory operations, entity scanning, wiki lookups, and chained multi-tool
|
||||
flows. Runs on the dev server via RCON.
|
||||
|
||||
The model responds, its tool calls get executed for real, and the full
|
||||
interaction (prompt + tool calls + results + final response) gets logged
|
||||
as training data.
|
||||
|
||||
Usage:
|
||||
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
|
||||
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
|
||||
|
||||
# Or via the scheduler preset
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
import requests
|
||||
from agent.tools.persistent_rcon import get_rcon
|
||||
|
||||
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
|
||||
|
||||
# ── Prompt categories that exercise specific tools ─────────────────────────
|
||||
|
||||
PROMPTS = {
|
||||
"script_build": [
|
||||
"sudo build me a small cobblestone house with a door and windows",
|
||||
"sudo create a fighting arena with red and blue corners",
|
||||
"sudo make a nether portal room with soul lanterns",
|
||||
"sudo build a watchtower 15 blocks tall with a ladder",
|
||||
"sudo create a 9x9 wheat farm with water in the center",
|
||||
"sudo build an enchanting setup with bookshelves",
|
||||
"sudo make a mob grinder platform with water channels",
|
||||
"sudo create a trophy room with item frames",
|
||||
"sudo build a bridge 30 blocks long over this ravine",
|
||||
"sudo make a lighthouse with a glowstone top",
|
||||
"sudo create a hedge maze using oak leaves",
|
||||
"sudo build a dock with oak wood extending into the water",
|
||||
"sudo make an underground bunker with iron doors",
|
||||
"sudo create a garden with flowers and paths",
|
||||
"sudo build a market stall with a counter and signs",
|
||||
],
|
||||
"script_schedule": [
|
||||
"sudo make cherry leaf particles fall around spawn forever",
|
||||
"sudo set up a scoreboard that tracks deaths on server load",
|
||||
"sudo make ambient campfire smoke particles at spawn every tick",
|
||||
"sudo create a function that heals everyone every 5 minutes",
|
||||
"sudo make it always rain XP orbs at spawn",
|
||||
],
|
||||
"script_manage": [
|
||||
"sudo show me all scripts",
|
||||
"sudo what scripts are running on tick?",
|
||||
"sudo delete the test script",
|
||||
"sudo read me the arena script",
|
||||
"sudo what did I build last?",
|
||||
],
|
||||
"memory_write": [
|
||||
"sudo remember this as my home",
|
||||
"sudo save this location as my base",
|
||||
"sudo remember my nether portal is here",
|
||||
"sudo my favorite item is a diamond pickaxe, remember that",
|
||||
"sudo save this as my farm",
|
||||
"sudo remember that Ace13245 is my friend",
|
||||
"sudo mark this spot as the village center",
|
||||
"sudo save this as my fishing spot",
|
||||
],
|
||||
"memory_read": [
|
||||
"sudo tp me home",
|
||||
"sudo take me to my base",
|
||||
"sudo where's my nether portal?",
|
||||
"sudo what do you know about me?",
|
||||
"sudo tp me to my farm",
|
||||
"sudo where was I building?",
|
||||
"sudo do I have any saved locations?",
|
||||
],
|
||||
"nearby_entities": [
|
||||
"sudo what mobs are near me?",
|
||||
"sudo kill all the zombies around me",
|
||||
"sudo how many animals are nearby?",
|
||||
"sudo clear hostile mobs in a 50 block radius",
|
||||
"sudo are there any creepers close to me?",
|
||||
"sudo kill the nearest skeleton",
|
||||
"sudo count everything within 30 blocks",
|
||||
"sudo protect me from nearby hostiles",
|
||||
],
|
||||
"wiki_lookup": [
|
||||
"sudo what enchantments can go on a mace?",
|
||||
"sudo how do I craft a lodestone?",
|
||||
"sudo what food gives the best saturation?",
|
||||
"sudo what are the new 1.21 armor trim patterns?",
|
||||
"sudo what's the difference between smite and sharpness?",
|
||||
"sudo how does fortune work on ores?",
|
||||
"sudo what are all the copper variants?",
|
||||
"sudo how do trial spawners work?",
|
||||
],
|
||||
"player_info": [
|
||||
"sudo build a wall around me",
|
||||
"sudo teleport me 50 blocks up",
|
||||
"sudo place torches around me",
|
||||
"sudo create a beacon at my location",
|
||||
"sudo surround me with glass",
|
||||
"sudo set my spawn here",
|
||||
"sudo light up this cave around me",
|
||||
],
|
||||
"server_state": [
|
||||
"sudo if it's night, make it day",
|
||||
"sudo give everyone online a golden apple",
|
||||
"sudo how many people are playing right now?",
|
||||
"sudo announce the current time and weather",
|
||||
"sudo clear the weather if it's raining",
|
||||
],
|
||||
"chained": [
|
||||
"sudo save this location as home, then build a marker here",
|
||||
"sudo check what's near me and kill all hostiles, then give me resistance",
|
||||
"sudo look up the best sword enchantments and give me one",
|
||||
"sudo tp me home and heal me",
|
||||
"sudo build an arena and save it as a script I can rerun",
|
||||
"sudo check my health, if low heal me and give me food",
|
||||
"sudo what scripts do I have? run the arena one at my position",
|
||||
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
|
||||
],
|
||||
# ── Plugin categories ──
|
||||
"worldguard": [
|
||||
"sudo protect this area as my base",
|
||||
"sudo make a no-pvp zone around spawn",
|
||||
"sudo prevent mob spawning in the village",
|
||||
"sudo add Ace13245 as a member of my region",
|
||||
"sudo block entry for non-members in the vault",
|
||||
"sudo allow TNT in the arena region",
|
||||
"sudo set a greeting message for my base region",
|
||||
"sudo list all protected regions",
|
||||
"sudo prevent creeper explosions globally",
|
||||
"sudo create a healing zone at spawn",
|
||||
"sudo remove the old-test region",
|
||||
"sudo make a safe zone with no fire spread",
|
||||
],
|
||||
"coreprotect": [
|
||||
"sudo check who broke blocks near me",
|
||||
"sudo rollback griefing from the last hour",
|
||||
"sudo rollback what TheBigBoss did recently",
|
||||
"sudo who placed blocks around here today?",
|
||||
"sudo undo TNT damage from the last 2 hours",
|
||||
"sudo rollback all container theft recently",
|
||||
"sudo restore what was rolled back",
|
||||
"sudo check CoreProtect status",
|
||||
"sudo rollback fire damage near spawn",
|
||||
"sudo lookup what Ace13245 did in the last day",
|
||||
],
|
||||
"essentialsx": [
|
||||
"sudo set my home here",
|
||||
"sudo tp me to my home",
|
||||
"sudo create a warp called arena",
|
||||
"sudo tp me to the arena warp",
|
||||
"sudo give Ace 1000 coins",
|
||||
"sudo check my balance",
|
||||
"sudo heal me",
|
||||
"sudo feed me",
|
||||
"sudo repair what I'm holding",
|
||||
"sudo set my nickname to DragonLord",
|
||||
"sudo give me god mode",
|
||||
"sudo toggle fly for me",
|
||||
"sudo broadcast a server message",
|
||||
"sudo set spawn point here",
|
||||
"sudo check when Ace was last online",
|
||||
],
|
||||
"luckperms": [
|
||||
"sudo give me permission to fly",
|
||||
"sudo create a VIP group",
|
||||
"sudo add Ace to the VIP group",
|
||||
"sudo give VIP access to fly and heal",
|
||||
"sudo give me temporary VIP for 1 day",
|
||||
"sudo set VIP chat prefix to gold",
|
||||
"sudo create a builder group with WorldEdit",
|
||||
"sudo list all permission groups",
|
||||
"sudo check what permissions I have",
|
||||
"sudo remove TheBigBoss from VIP",
|
||||
],
|
||||
"fawe": [
|
||||
"sudo make a glass sphere 10 blocks wide",
|
||||
"sudo hollow sphere of stone",
|
||||
"sudo cylinder of quartz 5 wide 10 tall",
|
||||
"sudo replace all stone with deepslate in my selection",
|
||||
"sudo smooth the terrain around here",
|
||||
"sudo drain all water within 20 blocks",
|
||||
"sudo build a sandstone pyramid 10 tall",
|
||||
"sudo hollow out the selected area",
|
||||
"sudo make walls around my selection with stone bricks",
|
||||
"sudo fill with a checkerboard pattern",
|
||||
"sudo stack my selection 5 times north",
|
||||
"sudo undo my last WorldEdit action",
|
||||
],
|
||||
"plugin_combined": [
|
||||
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
|
||||
"sudo rollback Ace's griefing and revoke his builder perms",
|
||||
"sudo set up a VIP lounge — build it, protect it, make a warp",
|
||||
"sudo give TheBigBoss a reward: money, items, and temp VIP",
|
||||
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
|
||||
"pray someone destroyed my house, please restore it",
|
||||
"pray protect my village from monsters",
|
||||
"pray smite TheBigBoss for griefing",
|
||||
"pray make me a temple worthy of your glory",
|
||||
],
|
||||
}
|
||||
|
||||
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
|
||||
|
||||
|
||||
def query_model(prompt, player, ollama_url, model, rcon):
|
||||
"""Send a prompt to the model and capture the full interaction."""
|
||||
system = (
|
||||
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
||||
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
|
||||
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
|
||||
"world.server_state, world.nearby_entities, memory.read, memory.write, "
|
||||
"script.write, script.validate, script.execute, script.read, script.list, "
|
||||
"script.delete, script.schedule.\n\n"
|
||||
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
|
||||
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
|
||||
"/lp user/group (LuckPerms).\n\n"
|
||||
"For complex builds (4+ commands), write a mcfunction script. "
|
||||
"For simple tasks, use rcon.execute directly.\n\n"
|
||||
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
|
||||
"Use /no_think mode."
|
||||
)
|
||||
|
||||
try:
|
||||
r = requests.post(f"{ollama_url}/api/chat", json={
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "/no_think\n" + system},
|
||||
{"role": "user", "content": f"Player {player}: {prompt}"},
|
||||
],
|
||||
"stream": False, "format": "json",
|
||||
"options": {"temperature": 0.4, "num_predict": 800},
|
||||
}, timeout=120)
|
||||
|
||||
content = r.json()["message"]["content"]
|
||||
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
|
||||
parsed = json.loads(content)
|
||||
return parsed
|
||||
except Exception as e:
|
||||
return {"error": str(e), "raw": content if 'content' in dir() else ""}
|
||||
|
||||
|
||||
def validate_commands(commands, rcon):
|
||||
"""Run commands through RCON and capture results."""
|
||||
results = []
|
||||
for cmd in commands[:12]:
|
||||
if not isinstance(cmd, str) or not cmd.strip():
|
||||
continue
|
||||
try:
|
||||
result = rcon.command(cmd)
|
||||
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
|
||||
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
|
||||
except Exception as e:
|
||||
results.append({"cmd": cmd, "result": str(e), "ok": False})
|
||||
return results
|
||||
|
||||
|
||||
def run_round(category, ollama_url, model, rcon, player):
|
||||
"""Run one self-play round for a specific tool category."""
|
||||
prompt = random.choice(PROMPTS[category])
|
||||
|
||||
print(f" [{category:18s}] {prompt[:60]}")
|
||||
start = time.time()
|
||||
|
||||
response = query_model(prompt, player, ollama_url, model, rcon)
|
||||
elapsed = time.time() - start
|
||||
|
||||
if "error" in response:
|
||||
print(f" ERROR: {response['error'][:80]}")
|
||||
return None
|
||||
|
||||
commands = response.get("commands", [])
|
||||
message = response.get("message", "")
|
||||
reasoning = response.get("reasoning", "")
|
||||
|
||||
# Validate commands via RCON
|
||||
rcon_results = []
|
||||
if commands and all(isinstance(c, str) for c in commands):
|
||||
rcon_results = validate_commands(commands, rcon)
|
||||
success = all(r["ok"] for r in rcon_results)
|
||||
else:
|
||||
success = False
|
||||
|
||||
ok_count = sum(1 for r in rcon_results if r["ok"])
|
||||
fail_count = sum(1 for r in rcon_results if not r["ok"])
|
||||
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
|
||||
print(f" → {len(commands)} cmds, {status}, {elapsed:.1f}s")
|
||||
|
||||
# Build training example
|
||||
example = {
|
||||
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
|
||||
"source": "tool_self_play",
|
||||
"category": category,
|
||||
"input": {
|
||||
"user_message": prompt,
|
||||
"server_context": {
|
||||
"server_type": "paper",
|
||||
"version": "1.21.x",
|
||||
"online_players": [player],
|
||||
},
|
||||
},
|
||||
"output": {
|
||||
"commands": commands,
|
||||
"message": message,
|
||||
"reasoning": reasoning,
|
||||
},
|
||||
"metadata": {
|
||||
"rcon_results": rcon_results,
|
||||
"all_success": success,
|
||||
"elapsed_seconds": round(elapsed, 2),
|
||||
"model": model,
|
||||
"tool_category": category,
|
||||
},
|
||||
}
|
||||
|
||||
return example
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Tool-focused self-play")
|
||||
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
|
||||
parser.add_argument("--model", default="mortdecai:0.4.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.112")
|
||||
parser.add_argument("--rcon-port", type=int, default=25578)
|
||||
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
||||
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
|
||||
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
|
||||
parser.add_argument("--output", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
|
||||
|
||||
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
||||
|
||||
if args.categories == "all":
|
||||
categories = list(PROMPTS.keys())
|
||||
else:
|
||||
categories = [c.strip() for c in args.categories.split(",")]
|
||||
|
||||
print(f"Tool Self-Play")
|
||||
print(f" Model: {args.model} on {args.ollama_url}")
|
||||
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
|
||||
print(f" Categories: {', '.join(categories)}")
|
||||
print(f" Rounds per category: {args.rounds}")
|
||||
print(f" Output: {output_path}")
|
||||
print()
|
||||
|
||||
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
|
||||
examples = []
|
||||
|
||||
for round_num in range(args.rounds):
|
||||
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
|
||||
random.shuffle(categories)
|
||||
|
||||
for cat in categories:
|
||||
player = random.choice(PLAYERS)
|
||||
example = run_round(cat, args.ollama_url, args.model, rcon, player)
|
||||
|
||||
stats["total"] += 1
|
||||
if example is None:
|
||||
stats["error"] += 1
|
||||
continue
|
||||
|
||||
if example["metadata"]["all_success"]:
|
||||
stats["success"] += 1
|
||||
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
|
||||
stats["partial"] += 1
|
||||
else:
|
||||
stats["fail"] += 1
|
||||
|
||||
examples.append(example)
|
||||
|
||||
# Write incrementally
|
||||
with open(output_path, "a") as f:
|
||||
f.write(json.dumps(example, ensure_ascii=False) + "\n")
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# Progress report
|
||||
if (round_num + 1) % 5 == 0:
|
||||
rate = stats["success"] / max(stats["total"], 1) * 100
|
||||
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
|
||||
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Tool Self-Play Complete")
|
||||
print(f" Total: {stats['total']}")
|
||||
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
|
||||
print(f" Partial: {stats['partial']}")
|
||||
print(f" Fail: {stats['fail']}")
|
||||
print(f" Error: {stats['error']}")
|
||||
print(f" Output: {output_path} ({len(examples)} examples)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user