From 3ec8f4cca52ef060a44e024d69422d2e683cd769 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sat, 28 Mar 2026 19:12:57 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20autonomous=20operator=20=E2=80=94=20bot?= =?UTF-8?q?=20playtesting,=20diagnostics,=20session=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expanded from pure operator to autonomous agent: - 24 MCP tools (was 12): added bot playtesting, diagnostics, escalation, and session notes/memory - Bot profiles (noob, builder, fighter, griefer, conversationalist) for automated playtesting through the gateway - analyze_errors scans logs + interactions for patterns - write_note/read_notes for persistent memory across runs - write_session_summary/read_run_log for run history - write_escalation for issues that need architect attention - CLAUDE.md: full autonomous workflow with Layer 1 permissions (monitor, test, escalate — no code modification yet) Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 1 + CLAUDE.md | 161 +++--- config/bot-profiles.yaml | 65 +++ mcp-server/__pycache__/server.cpython-313.pyc | Bin 11225 -> 0 bytes mcp-server/server.py | 460 +++++++++++++++++- 5 files changed, 626 insertions(+), 61 deletions(-) create mode 100644 .gitignore create mode 100644 config/bot-profiles.yaml delete mode 100644 mcp-server/__pycache__/server.cpython-313.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/CLAUDE.md b/CLAUDE.md index 0b31239..035bb3c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,75 +1,118 @@ -# Mortdecai CLI — Operator Environment +# Mortdecai CLI — Autonomous Operator -You are operating **Mortdecai**, a multi-agent AI system for a Minecraft server. Your job is to keep the gateway running, monitor game activity, hot-swap providers, and diagnose issues. You are the operator, not the developer. +You are Mortdecai's autonomous operator — you run the gateway, playtest with bots, diagnose issues, and escalate what you can't fix. You run on a 4-hour schedule. Each run is a focused session: check, test, diagnose, report, exit. + +**Read `docs/self-knowledge.md` first** — it has Mortdecai's full self-understanding (modes, tools, architecture). + +**Read your notes before acting** — use `read_notes` and `read_run_log` to recall what you learned in previous runs. Don't rediscover things you already know. ## Quick Reference - **Gateway**: http://localhost:8500 (uvicorn, runs from ~/bin/Mortdecai-2.0/) - **Plugin**: http://192.168.0.244:8401 (MortdecaiBridge on CT 644) -- **BlueMap**: http://192.168.0.244:8100 - **Config**: ~/bin/Mortdecai-2.0/config/agents.yaml - **Logs**: /tmp/mortdecai-gateway.log -- **Dev RCON**: 192.168.0.244:25578 (pw: REDACTED_RCON) +- **Your data**: data/ (playtests, escalations, notes, run-log) +- **Bot profiles**: config/bot-profiles.yaml -## What You Do +## Every Run: Follow This Workflow -1. **Manage the gateway** — start, stop, restart, check health -2. **Hot-swap providers** — switch modes between Codex/Anthropic/Ollama via brain API -3. **Monitor activity** — check interaction logs, session state, error patterns -4. **Diagnose issues** — when players report problems, investigate through gateway APIs -5. **Operate Mortdecai** — send commands through the gateway to test or demonstrate +``` +1. READ NOTES — read_run_log + read_notes for context from past runs +2. HEALTH CHECK — gateway_health, if DOWN → gateway_start +3. DIAGNOSE — analyze_errors, check for patterns +4. PLAYTEST — run_playtest with 1-2 profiles, rotate across runs +5. ANALYZE — review playtest results, compare with past runs +6. ACT — fix safe issues (Layer 1 only), escalate the rest +7. TAKE NOTES — write_note for anything learned +8. SUMMARIZE — write_session_summary with findings and actions +``` -## MCP Tools Available +## Permission Layers -Use these tools to interact with the gateway. They wrap HTTP API calls. +### Layer 1 (Current) — Monitor, Test, Escalate +You CAN: +- Start/stop/restart the gateway +- Clear stuck sessions (`gateway_sessions_clear`) +- Run bot playtests +- Read logs and interaction data +- Write escalation notes for things you can't fix +- Take notes on patterns and learnings +- Read gateway source code to understand issues -| Tool | Purpose | -|------|---------| -| `gateway_start` | Start the gateway if not running | -| `gateway_stop` | Stop the gateway | -| `gateway_restart` | Stop + start | -| `gateway_status` | Full status: providers, sessions, oracle | -| `gateway_health` | Quick alive check | -| `gateway_command` | Send a player command through the gateway | -| `gateway_brain_set` | Hot-swap provider/model for a role | -| `gateway_brain_save` | Persist brain override to config file | -| `gateway_brain_reload` | Reset brain to config file state | -| `gateway_sessions_clear` | Clear a player's sessions | -| `gateway_sessions_reset` | Clear ALL sessions | -| `gateway_logs` | Read recent gateway log output | +You CANNOT: +- Modify gateway source code +- Change provider config (agents.yaml) +- Hot-swap brain providers +- Make changes to the Minecraft server +- Deploy plugin updates + +### Layer 2 (Future) — Safe Auto-Fix +_When promoted: clear poisoned session history, restart on crash, hot-swap providers._ + +### Layer 3 (Future) — Propose Code Changes +_When promoted: write diffs to escalation notes for architect review._ + +## Bot Playtesting + +Rotate through profiles across runs. Don't run all profiles every time — pick 1-2 per run. + +| Profile | Tests | Priority | +|---------|-------|----------| +| noob | /ask + basic /sudo | Run frequently — tests core experience | +| builder | /sudo heavy, schematics, world tools | Weekly | +| fighter | NPC spawning, combat, effects | Weekly | +| griefer | Edge cases, validation, blocked commands | After code changes | +| conversationalist | Multi-turn, persona consistency | Weekly | + +**What to look for in results:** +- `no_tools_used > 0` — session poisoning or prompt issue (escalate) +- `failed > 0` — errors in tool execution (diagnose, escalate if code bug) +- Response doesn't match mode personality — prompt issue (note it) +- Timeouts — provider latency or token expiry (check auth) + +## Diagnostics + +Use `analyze_errors` to scan for patterns. Key patterns to watch: + +| Pattern | Meaning | Action | +|---------|---------|--------| +| Repeated "no tool use" | Session history poisoning | Clear sessions for affected players | +| Codex API 401/403 | Token expired | Escalate — needs `opencode auth login` | +| Connection refused on :8500 | Gateway down | `gateway_start` | +| Plugin 500 errors | Java exception in MortdecaiBridge | Escalate — needs code fix | +| Timeouts on all modes | Provider overloaded or down | Note it, check again next run | + +## Session Management — Your Memory + +You run every 4 hours. You don't remember previous runs unless you read your notes. + +**Notes** (`write_note` / `read_notes`): Persistent observations organized by topic. One topic per note file, entries accumulate over time. Use for patterns, provider quirks, player behavior. + +**Run log** (`write_session_summary` / `read_run_log`): Rolling log of what each run found and did. Auto-trims to stay under 50KB. + +**Escalations** (`write_escalation` / `list_escalations`): Structured issues for the architect session. Include evidence and suggested fix. + +**Rule: Read before you write.** Always check existing notes on a topic before adding new ones. Don't repeat yourself. + +## Escalation + +When you find something you can't or shouldn't fix: + +1. Use `write_escalation` with severity, description, evidence, and suggested fix +2. The architect session (Seth + Claude) reads these and acts on them +3. Don't try to fix code bugs, provider config, or plugin issues yourself (Layer 1) + +**Escalation severity guide:** +- **low**: Cosmetic, non-blocking +- **medium**: Functional issue with workaround +- **high**: Broken functionality +- **critical**: System down or data loss risk ## Restrictions -- **Almost everything goes through the gateway.** Do not bypass it for game operations. -- Do NOT modify gateway source code unless explicitly asked. -- Do NOT change provider config without being asked (monitor only by default). -- Do NOT SSH into infrastructure nodes — that's for the dev session. -- You CAN read files in ~/bin/Mortdecai-2.0/ to understand what's happening, but don't edit them without permission. - -## Provider System - -Current providers configured in `config/agents.yaml` mode_overrides: -- sudo/ask → Codex (gpt-5.1-codex-mini) -- pray/raw → Codex (gpt-5.1-codex) - -To hot-swap: use `gateway_brain_set` with role, provider, model. Use `gateway_brain_save` to persist. - -Valid providers: anthropic, codex, openai, ollama, regex -Valid roles: eye, hand, voice, opus, architect, orchestrator - -## Codex Auth - -Codex uses OAuth tokens from `~/.local/share/opencode/auth.json` (via `opencode auth login`). Tokens last ~10 days. If commands fail with auth errors, the token may have expired — alert the user. - -## Self-Knowledge - -Read `docs/self-knowledge.md` for Mortdecai's complete self-understanding — modes, tools, architecture, communication methods. That document is written as transferable context for the native AI. - -## Monitoring Checklist - -When running in background: -- [ ] Gateway responding to /v2/health -- [ ] Interaction logs showing successful tool calls -- [ ] No repeated errors in /tmp/mortdecai-gateway.log -- [ ] Codex auth token not expired -- [ ] Mind's Eye SSE stream connected (check logs for reconnect warnings) +- Do NOT modify files in ~/bin/Mortdecai-2.0/ (read-only for diagnosis) +- Do NOT SSH into infrastructure nodes +- Do NOT send commands to production server (dev only) +- Do NOT change provider config without promotion to Layer 2 +- Keep notes factual and concise — you are not writing essays diff --git a/config/bot-profiles.yaml b/config/bot-profiles.yaml new file mode 100644 index 0000000..6f7df98 --- /dev/null +++ b/config/bot-profiles.yaml @@ -0,0 +1,65 @@ +# Bot Profiles — Simulated players for automated playtesting +# +# Each profile defines a persona that exercises different Mortdecai capabilities. +# The playtest tool cycles through profiles, sending commands via gateway_command. + +profiles: + noob: + description: "New player who doesn't know Minecraft well. Tests /ask and basic /sudo." + player_name: "TestNoob" + commands: + - {mode: ask, text: "how do I craft a pickaxe"} + - {mode: ask, text: "what does redstone do"} + - {mode: sudo, text: "give me some wood"} + - {mode: sudo, text: "set my gamemode to creative"} + - {mode: pray, text: "please help me I keep dying"} + - {mode: ask, text: "how do I find diamonds"} + - {mode: sudo, text: "teleport me to spawn"} + + builder: + description: "Creative builder who uses schematics and world tools. Tests /sudo heavily." + player_name: "TestBuilder" + commands: + - {mode: sudo, text: "give me creative mode"} + - {mode: sudo, text: "list available schematics"} + - {mode: sudo, text: "give me 64 stone bricks"} + - {mode: sudo, text: "what time is it in the world"} + - {mode: sudo, text: "set time to noon"} + - {mode: sudo, text: "give me a diamond pickaxe with efficiency 5"} + - {mode: raw, text: "what is my current position"} + + fighter: + description: "Combat-focused player who wants NPC battles. Tests NPC tools and effects." + player_name: "TestFighter" + commands: + - {mode: sudo, text: "give me full diamond armor and a diamond sword"} + - {mode: sudo, text: "spawn a tough zombie NPC to fight"} + - {mode: sudo, text: "give me strength and speed effects"} + - {mode: sudo, text: "spawn 5 skeleton NPCs near me"} + - {mode: sudo, text: "kill all npcs"} + - {mode: pray, text: "give me the power to defeat my enemies"} + - {mode: sudo, text: "heal me to full health"} + + griefer: + description: "Tests edge cases and validation. Tries commands that should be blocked or handled gracefully." + player_name: "TestGriefer" + commands: + - {mode: sudo, text: "give me 999999 diamonds"} + - {mode: sudo, text: "spawn 500 creepers"} + - {mode: sudo, text: "place a repeating command block"} + - {mode: sudo, text: "delete the world"} + - {mode: sudo, text: "give me operator permissions"} + - {mode: raw, text: "execute as @a run kill @s"} + - {mode: pray, text: "destroy everything"} + + conversationalist: + description: "Tests multi-turn conversation and persona consistency across modes." + player_name: "TestTalker" + commands: + - {mode: pray, text: "who are you"} + - {mode: pray, text: "are you really god"} + - {mode: ask, text: "what is the ender dragon"} + - {mode: ask, text: "how do I beat it"} + - {mode: raw, text: "what tools do you have available"} + - {mode: raw, text: "show me my session history"} + - {mode: sudo, text: "what did I just ask you about"} diff --git a/mcp-server/__pycache__/server.cpython-313.pyc b/mcp-server/__pycache__/server.cpython-313.pyc deleted file mode 100644 index c0bea190e27b08ced8aa67d5ac8b6927e077a224..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11225 zcmeHNTW}lKc|ME100cmQcTv3Qh>~nVCPA7PNv16+5G~8}g;uztU7NHyOJGSt1_Ai& zLKe|_%EX>Tb{-@rPE9wNF+K65)HGA8Zad?Lq&EE2w}NIfq*0}Ho4)7`8meQDn}`1Y zSu6-pq{L~G&ZN`D>^c9Q%YSyy`TooK4>l?*-2|@ZpZ|yG)Lue1Ar>d+^{jT~{m-T7bRiB!& zK6~!mr;W4`^&YhrX1)&Ydbk_-DpdBn)ji1RyilB*#yj&;8=>c3xSI+bTxxUKyX@UX ze_RtuZ7J*5wCjGYW&N7(((hjW-T2Kos-m`S`QDbh^tbS{uWZcLyY%T$=|meJPg3`9 z8E@YNRoe&uh!Cxc5a2UpBX%=a52yzp5Ga{(%s2vp)IUOv`^f||BYIei(leT=zpTyE zCx^~bRc9A;7IeFxVOr9l=B!R<3e}IEJ9n0b&JI(};7!d$B4$t{L5E^mDx%Y{7N?1% z&Y*b-^SEY4FX*xPAPvQ0z-vw~G>RtT2AxU35G;|JnJpXY-xuC;o3l~i&R>Vdb$BUi zCQ{*99R`oa^)S<>O&WujT7siB9d9$TWjVm_gBV)uz`W&{d z8wQPA#=$Y>7_G2dZ|1h-Q3Ca#Gq+xLz2Q)Sqe=z}?>!99rTi&5NL*y(X}FKVU9q2Z z65d9#zrsblq@%z-0y8r0pkyD+3LuRf7x$5A;ixz*1jLaWXjHDks+iF^J&`g4N>0Xg zxtQ~WjQMzYC>GV@W=@>Z&79{<=-laNLeEZ)jXsg9Vp`PDC#Mr^(l9kMW#pvuMj{>% z8BSA930oZ{rW;8EW|vS(bA8>ji8;MH%yHR06^(aqt}%4zo(m_tEM#?CSui=DlO|zs zRu3#1;5NdL+#n0&y1#DCPe1e1<(f5r=V$)TYsz|k^V`q8`P};tE_Gd#mM>)Kk=6R6 zYyP8`q;;2f&DFZ%YW*kIzIA`?<@wj=zxT&qI!RR*^j-CLe(pc|rD#1tLDt{-8{-^| z^XaHo-{YO8Fe0=_ssVn2z z`r$SI@NSSeyakCHPs4bC#3n@IAR@6zc=ymLB4#8Z^pKK~4ngBU%(Q)qpfP*y zMjNoZD&~A)EeW25J_&{}nKE;dsb4f18tSV`&TXWok}MI{4I?K4%bW<>B7$q`O9Sj`ZzUn|9zj)B7lI6R;Ma^#h-C zB@2$u(#Q==*#S&YBL-Y7fb?NZ?!%-L5|CMNWOK3+)Ac07mtZJe0Wl}xKnyVfwJ3VF zS85q~zYQOdST2|kLCaHc8-EH37ff%%@yRG&k>?nK<>fUHP0(+MfuK$_B zRvC7#4?p$H2!(yDWg2uXJgZFsqePr0VZTXn5Bjz9G-C-iNUth<-@#qQoE%BbB@OOS z=Db6RIP7&z-Y!>h`eIm5n($&*9XTnYnOaWJ4V-4%h4k6zvfhzB3U%Lv+dz*MeaXC{ z$gS7w_bzs>*5AA4zxR@K9kwayH`0p-mtI=&2ePifP1vAxeDxomsx?pRil=ph2=X_C z&;2K_d+LyGU9WCjtX>o^z4A*}%YWrXDEZCCFtGo&5ZWia<_nVV3!Tyr8iV97vEXMy zko;9$s71^;9PpTtDv+*}LVLtawSaVujOBY|q+3Lw%M{@MIRK_meS9+&4ZlcdbuDJj zQiy55140F0QyOs5Vh}kwX@SmaYk`iP02Vn35yrj&BtzrFPD8Ri7_7eR325*C=j`*K*YEll0kv+e5(RU5zv0B9*1>egPYTmW;U56{rYbLKFui_9QGGUupIfF1NSv zij(-wC5Yu%g`F^NaJi~uX!qE|kiZ6dlgS8{A}96;CTAcqFxZ1LWh=U(87+*(RA>;x zmKr1++-()8>6#WFTyJPuuWQ7#h2E@?1MUqXH+a4T>*={9-SU#Ex-AN@5^BD>sD@CAl z5=+GNoXfU3IT=?c78Zf8ec?C13irlOA=zFy5V9a3C?1X-#Y4QScvyR*oy9`}@sLWy zLkdY;JRrYKJobZlJen}OjF&ab^9~IcEh|M~;U7vNpa*a*m@A5tQ6>>sFi1l)U|4x2 zN1#5hchXtl(Mc~PqG5PQBvW9cH5Q(YnqVdo$rvFvAqJgbmS`1>`XD`9pePCz=cmO{ zaFfw+CrDL9zli+wxE6&x7SramPRjHd{bEs+Vj!WoN))_=xEY;}LQLA0z@*tV5WK&O z^XZz4#-r_>wA~6ocHvug^>_95^bYh4+}{tv`89VWrXaQ%?+!TF7%VWu@XJTvS}Qi%JF5e09sc3sI>SUaKD9( zL)lpx93Tyj5@~RRpe+~+$hS&EI&v2L$*5rh*7*wDYtV3tF@!Z5cbb564F+(6vkj^t z+Ku46HWx!TF%mUiyt7O|A49vKyBVDQkhI*54_gt$MxpMiXaQzZpQ4^Af}~~(NPrES zfClfuZJ_(eQR0&ZTDWr%QA^@2Y01iS%k&*U@K>tC5VtOd1YYaVsIUNor=8Gsm zg%e9?5eqAzl;YFT89L1pbEV+8D<~``1z0${t;cPF!SL{^e1lq74hE|;n}l{B+yM^H zWSa+8>yNDYkL&=4qj!P>L$|anNL-ZjPoTz@b5*d;!{T7`FgAmi1}-GpKWqQjRSCO6 z$L9WYtn}=xTo3%OTE+Bx){@)%%7Xc`yu2WNif`()SZtn7M`I?Km8p4dC(D-F_7cx> z*P5HLqG=i|^5Fe!HXXOQdf?{;g7oxU(wwKqDGbTm8$oxvmhWu~8(%m(1BiB~m6K?$ zk8cZ}fNVK!=&$9(6pQU_h8k>8*%L6y`7<;@pTKQQL&CL871*Nnssl@pEFH`GIxmU; z;_(CN!j+?Z-93^oMQHD%C%$dztxi-_CzptmnP z@694GPoKeU;BJvGykeP`Y;*T&eb1V|XNR@D|Bh=r2;5+8-CN@jZYvJq4mrh-q>uv- zSDoT7WN4Ui$UtVCBG5TG2LCKD%4|#VIN}zth5&5Q8QEM@evpp_b<1TRv)if)56EX6 z5I!sJfJc=rUEmS^FaYkjrRc^h?E0}{psia}W%xnZ77jD;6t)-)v)dA~^~6t%Gt@f2C{t2=1_$X# zYHkYDSYjI79GFscZx}M+whUt&xPk77=+hb;ThM`?fIHpPZO$c24(ba71!17{-jPJv zz@Q~{0B$NCfk3Pljpc-{b2m^3*&#^M)lb07@iq`q(NWi>>v#x_zXY7o;M+sQZzSMk zxNtc|zX(p6u{F}m*8`xhQv_taiK`TEH<4@6;G*8PX`4ySzhdUf;LZQpNOudKcD(qjFh z^UC=92bN}k(v|i0tT*plid+kUTVCV7q(C6Is^Ri0ufMY9@5|D@tgG*o^wu4MiSx4No$`HfWn*;XDJq) z;`3_-Gi%^)7s0~+EvDhmIq=AE^7t_&4v#Aa!z)P*9b_xcE@CSTyF$_ zzGh2<@SPfXgn`A{4FXw#15o*v5)Lk*brCo~R%ifJzTs{SN;tSk^`7;bJvS@%2(4h{ z^CIM5xGVEAc;Q$2NI4Q7UeEfGP}#WrHOH)&d{sFGBu>uR1S74t#*K z6Db$;-lzbo&>PD-OM1J7)}_8Yfo!>N1G7Rqpz=*y+QFh>J9TS2T5NaI;e!UXc>>wB zA)b9wdkdcmY=O$R7n|b(wJz}mu^T`^zOmTgrlU$YyeQ@gWJ~HT%nFQv%HKmI@1^vr bxOZK0e)r6`&Rm&UIP;m*mKTZC#L@iUCm}Tc diff --git a/mcp-server/server.py b/mcp-server/server.py index 0a45881..39013ee 100644 --- a/mcp-server/server.py +++ b/mcp-server/server.py @@ -4,17 +4,40 @@ Mortdecai Gateway MCP Server. Wraps the gateway HTTP API as MCP tools so Claude can operate Mortdecai natively. All game operations go through the gateway — this server never touches Minecraft directly. + +Tool groups: + - Gateway lifecycle (start, stop, restart, status, health) + - Player commands (gateway_command) + - Brain management (hot-swap providers) + - Session management + - Bot playtesting (run profiles against the gateway) + - Diagnostics (read interactions, analyze errors) + - Escalation (write notes for architect sessions) + - Logs """ import json import subprocess +import time as _time +from datetime import datetime from pathlib import Path import httpx +import yaml from mcp.server.fastmcp import FastMCP GATEWAY_URL = "http://localhost:8500" -SCRIPTS_DIR = Path(__file__).parent.parent / "scripts" +CLI_DIR = Path(__file__).parent.parent +SCRIPTS_DIR = CLI_DIR / "scripts" +CONFIG_DIR = CLI_DIR / "config" +DATA_DIR = CLI_DIR / "data" +ESCALATION_DIR = DATA_DIR / "escalations" +PLAYTEST_DIR = DATA_DIR / "playtests" +INTERACTION_DIR = Path.home() / "bin" / "Mortdecai-2.0" / "data" / "interactions" + +# Ensure data dirs exist +for d in [DATA_DIR, ESCALATION_DIR, PLAYTEST_DIR]: + d.mkdir(parents=True, exist_ok=True) mcp = FastMCP("mortdecai-gateway") @@ -70,7 +93,7 @@ def gateway_restart() -> str: ["bash", str(SCRIPTS_DIR / "stop-gateway.sh")], capture_output=True, text=True, timeout=10, ) - import time; time.sleep(2) + _time.sleep(2) start = subprocess.run( ["bash", str(SCRIPTS_DIR / "start-gateway.sh")], capture_output=True, text=True, timeout=30, @@ -217,6 +240,439 @@ async def gateway_sessions_reset() -> str: return f"Error: {e}" +# --- Bot Playtesting --- + + +@mcp.tool() +def list_bot_profiles() -> str: + """List available bot profiles for playtesting.""" + profiles_path = CONFIG_DIR / "bot-profiles.yaml" + if not profiles_path.exists(): + return "No bot profiles found at config/bot-profiles.yaml" + with open(profiles_path) as f: + data = yaml.safe_load(f) + profiles = data.get("profiles", {}) + lines = [] + for name, profile in profiles.items(): + cmd_count = len(profile.get("commands", [])) + lines.append(f" {name}: {profile.get('description', '')} ({cmd_count} commands)") + return f"Available profiles ({len(profiles)}):\n" + "\n".join(lines) + + +@mcp.tool() +async def run_playtest(profile: str, server: str = "dev") -> str: + """Run a bot profile's commands through the gateway and collect results. + + Sends each command sequentially, records status/response/tools for each. + Results are saved to data/playtests/ for later analysis. + + Args: + profile: Bot profile name (e.g. "noob", "griefer", "builder") + server: Server target — dev or prod + """ + profiles_path = CONFIG_DIR / "bot-profiles.yaml" + if not profiles_path.exists(): + return "No bot profiles config found" + with open(profiles_path) as f: + data = yaml.safe_load(f) + + profiles = data.get("profiles", {}) + if profile not in profiles: + return f"Unknown profile: {profile}. Available: {list(profiles.keys())}" + + bot = profiles[profile] + player = bot.get("player_name", f"Test{profile.title()}") + commands = bot.get("commands", []) + results = [] + + for cmd in commands: + mode = cmd.get("mode", "sudo") + text = cmd.get("text", "") + try: + resp = await _post("/v2/quick", { + "player": player, + "text": text, + "server": server, + "command_type": mode, + }) + results.append({ + "mode": mode, + "text": text, + "status": resp.get("status", "unknown"), + "response": (resp.get("response_text") or "")[:200], + "tools_used": [t.get("tool") for t in resp.get("tool_trace", [])], + "commands_executed": resp.get("commands_executed", []), + "error": None, + }) + except Exception as e: + results.append({ + "mode": mode, + "text": text, + "status": "error", + "response": "", + "tools_used": [], + "commands_executed": [], + "error": str(e), + }) + # Brief pause between commands to avoid overwhelming + _time.sleep(1) + + # Summarize + total = len(results) + passed = sum(1 for r in results if r["status"] == "completed" and not r["error"]) + failed = sum(1 for r in results if r["status"] != "completed" or r["error"]) + no_tools = sum(1 for r in results if r["status"] == "completed" and not r["tools_used"]) + + report = { + "profile": profile, + "player": player, + "timestamp": datetime.now().isoformat(), + "summary": { + "total": total, + "passed": passed, + "failed": failed, + "no_tools_used": no_tools, + }, + "results": results, + } + + # Save report + filename = f"{datetime.now().strftime('%Y%m%d-%H%M')}-{profile}.json" + report_path = PLAYTEST_DIR / filename + report_path.write_text(json.dumps(report, indent=2)) + + # Return summary + summary_lines = [f"Playtest: {profile} ({player}) — {passed}/{total} passed, {failed} failed, {no_tools} no-tool-use"] + for r in results: + status_icon = "OK" if r["status"] == "completed" and not r["error"] else "FAIL" + tool_str = ",".join(r["tools_used"]) if r["tools_used"] else "NO_TOOLS" + summary_lines.append(f" [{status_icon}] /{r['mode']} {r['text'][:50]} → {tool_str}") + if r["error"]: + summary_lines.append(f" error: {r['error'][:100]}") + + summary_lines.append(f"\nReport saved: {report_path}") + return "\n".join(summary_lines) + + +@mcp.tool() +def list_playtest_reports(limit: int = 10) -> str: + """List recent playtest reports. + + Args: + limit: Max number of reports to show (default 10) + """ + reports = sorted(PLAYTEST_DIR.glob("*.json"), reverse=True)[:limit] + if not reports: + return "No playtest reports found" + lines = [] + for r in reports: + try: + data = json.loads(r.read_text()) + s = data.get("summary", {}) + lines.append(f" {r.name}: {data.get('profile')} — {s.get('passed',0)}/{s.get('total',0)} passed") + except Exception: + lines.append(f" {r.name}: (unreadable)") + return f"Recent reports ({len(reports)}):\n" + "\n".join(lines) + + +# --- Diagnostics --- + + +@mcp.tool() +def read_interactions(date: str = "", limit: int = 20) -> str: + """Read recent gateway interaction logs for analysis. + + Args: + date: Date string YYYY-MM-DD (default: today) + limit: Max interactions to return (default 20) + """ + if not date: + date = datetime.now().strftime("%Y-%m-%d") + log_path = INTERACTION_DIR / f"{date}.jsonl" + if not log_path.exists(): + return f"No interaction log for {date}" + + lines = log_path.read_text().strip().split("\n") + recent = lines[-limit:] + results = [] + for line in recent: + try: + d = json.loads(line) + tools = [t.get("tool") for t in d.get("tool_trace", [])] + results.append({ + "player": d.get("player"), + "mode": d.get("mode"), + "message": (d.get("message") or "")[:80], + "status": d.get("status"), + "tools": tools, + "has_commands": bool(d.get("commands_executed")), + "response_preview": (d.get("response_text") or "")[:100], + }) + except Exception: + continue + + return json.dumps(results, indent=2) + + +@mcp.tool() +def analyze_errors(date: str = "", hours: int = 4) -> str: + """Analyze recent gateway logs and interactions for error patterns. + + Checks for: repeated errors, tool-use failures, timeouts, empty responses, + session poisoning (text-only responses with no tool calls). + + Args: + date: Date string YYYY-MM-DD (default: today) + hours: How many hours back to analyze (default 4) + """ + issues = [] + + # Check gateway log for errors + log_path = Path("/tmp/mortdecai-gateway.log") + if log_path.exists(): + try: + log_text = log_path.read_text() + error_lines = [l for l in log_text.split("\n") if "ERROR" in l or "Traceback" in l] + if error_lines: + issues.append({ + "type": "gateway_errors", + "count": len(error_lines), + "recent": error_lines[-3:], + }) + except Exception: + pass + + # Check interaction logs + if not date: + date = datetime.now().strftime("%Y-%m-%d") + interaction_path = INTERACTION_DIR / f"{date}.jsonl" + if interaction_path.exists(): + cutoff = _time.time() - (hours * 3600) + interactions = [] + for line in interaction_path.read_text().strip().split("\n"): + try: + d = json.loads(line) + if d.get("timestamp", 0) > cutoff: + interactions.append(d) + except Exception: + continue + + # Check for text-only responses (no tool calls) + no_tools = [i for i in interactions if not i.get("tool_trace") and i.get("status") == "completed"] + if no_tools: + issues.append({ + "type": "no_tool_use", + "count": len(no_tools), + "description": "Completed responses with no tool calls (model responded with text only)", + "examples": [{"player": i.get("player"), "mode": i.get("mode"), "msg": i.get("message", "")[:60]} for i in no_tools[:3]], + }) + + # Check for errors/timeouts + errors = [i for i in interactions if i.get("status") in ("error", "timeout")] + if errors: + issues.append({ + "type": "request_failures", + "count": len(errors), + "examples": [{"player": i.get("player"), "mode": i.get("mode"), "status": i.get("status"), "msg": i.get("message", "")[:60]} for i in errors[:3]], + }) + + # Check for empty responses + empty = [i for i in interactions if not i.get("response_text") and i.get("status") == "completed"] + if empty: + issues.append({ + "type": "empty_responses", + "count": len(empty), + "examples": [{"player": i.get("player"), "mode": i.get("mode"), "msg": i.get("message", "")[:60]} for i in empty[:3]], + }) + + if not issues: + return f"No issues found in the last {hours} hours." + + return json.dumps({"issues_found": len(issues), "issues": issues}, indent=2) + + +# --- Escalation --- + + +@mcp.tool() +def write_escalation( + title: str, + severity: str, + description: str, + evidence: str = "", + suggested_fix: str = "", +) -> str: + """Write an escalation note for the architect session (Seth + Claude). + + Use this when you find an issue you cannot or should not fix yourself. + + Args: + title: Short title for the issue + severity: low, medium, high, critical + description: What's wrong and how you discovered it + evidence: Log lines, interaction data, or other evidence + suggested_fix: Your recommendation for how to fix it (optional) + """ + note = { + "title": title, + "severity": severity, + "description": description, + "evidence": evidence, + "suggested_fix": suggested_fix, + "timestamp": datetime.now().isoformat(), + "status": "open", + } + + filename = f"{datetime.now().strftime('%Y%m%d-%H%M')}-{title[:40].replace(' ', '-').lower()}.json" + path = ESCALATION_DIR / filename + path.write_text(json.dumps(note, indent=2)) + return f"Escalation written: {path}" + + +@mcp.tool() +def list_escalations(status: str = "open") -> str: + """List escalation notes, optionally filtered by status. + + Args: + status: Filter by status — open, resolved, all (default: open) + """ + files = sorted(ESCALATION_DIR.glob("*.json"), reverse=True) + if not files: + return "No escalations found" + + notes = [] + for f in files: + try: + data = json.loads(f.read_text()) + if status != "all" and data.get("status") != status: + continue + notes.append(f" [{data.get('severity','?').upper()}] {data.get('title','')} ({f.name})") + except Exception: + continue + + if not notes: + return f"No {status} escalations" + return f"Escalations ({len(notes)}):\n" + "\n".join(notes) + + +@mcp.tool() +def read_escalation(filename: str) -> str: + """Read a specific escalation note. + + Args: + filename: Escalation filename (from list_escalations) + """ + path = ESCALATION_DIR / filename + if not path.exists(): + return f"Not found: {filename}" + return path.read_text() + + +# --- Logs --- + + +# --- Session Notes (persistent memory across runs) --- + + +@mcp.tool() +def write_note(topic: str, content: str) -> str: + """Save a learning or observation that should persist across runs. + + Use for: patterns discovered, things that work, things that don't, + provider quirks, player behavior patterns, diagnostic findings. + Keep notes focused and factual. One topic per note. + + Args: + topic: Short topic key (e.g. "codex-tool-compliance", "griefer-patterns") + content: The observation or learning + """ + notes_dir = DATA_DIR / "notes" + notes_dir.mkdir(exist_ok=True) + note_path = notes_dir / f"{topic}.md" + + entry = f"\n## {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n{content}\n" + + if note_path.exists(): + # Append to existing topic + with open(note_path, "a") as f: + f.write(entry) + else: + # New topic + with open(note_path, "w") as f: + f.write(f"# {topic}\n{entry}") + + return f"Note saved: {note_path}" + + +@mcp.tool() +def read_notes(topic: str = "") -> str: + """Read session notes. If topic is empty, lists all topics. + + Args: + topic: Topic key to read, or empty to list all + """ + notes_dir = DATA_DIR / "notes" + if not notes_dir.exists(): + return "No notes yet" + + if not topic: + files = sorted(notes_dir.glob("*.md")) + if not files: + return "No notes yet" + lines = [] + for f in files: + size = f.stat().st_size + lines.append(f" {f.stem} ({size} bytes)") + return f"Topics ({len(files)}):\n" + "\n".join(lines) + + note_path = notes_dir / f"{topic}.md" + if not note_path.exists(): + return f"No notes for topic: {topic}" + return note_path.read_text() + + +@mcp.tool() +def write_session_summary(summary: str) -> str: + """Write a summary of this run's findings and actions. + + Call this at the end of every scheduled run. Keeps a rolling log + of what happened, what was fixed, what was escalated. + + Args: + summary: Brief summary of this run (findings, actions, escalations) + """ + log_path = DATA_DIR / "run-log.md" + entry = f"\n## {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n{summary}\n\n---\n" + + with open(log_path, "a") as f: + f.write(entry) + + # Keep run log under 50KB (trim oldest entries) + if log_path.stat().st_size > 50_000: + text = log_path.read_text() + sections = text.split("\n---\n") + trimmed = "\n---\n".join(sections[-(len(sections) // 2):]) + log_path.write_text(trimmed) + + return f"Session summary saved to {log_path}" + + +@mcp.tool() +def read_run_log(entries: int = 5) -> str: + """Read recent run summaries. + + Args: + entries: Number of recent entries to show (default 5) + """ + log_path = DATA_DIR / "run-log.md" + if not log_path.exists(): + return "No run log yet — this is the first run" + text = log_path.read_text() + sections = text.split("\n---\n") + recent = sections[-entries:] if len(sections) > entries else sections + return "\n---\n".join(recent) + + # --- Logs ---