From 35919e8d57d391f77229fa1f39eb0803f0af7346 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sat, 28 Mar 2026 19:38:03 -0400 Subject: [PATCH] =?UTF-8?q?data:=20first=20operator=20run=20=E2=80=94=204?= =?UTF-8?q?=20issues=20found,=203=20notes,=202=20escalations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noob playtest: 4/7 passed, 3 blocked (perms). Found: context bleed via Mind's Eye, opus slot misconfigured, player_message tag leaking, test players need perms. Co-Authored-By: Claude Opus 4.6 (1M context) --- data/escalations/20260328-1930-context-bleed.json | 9 +++++++++ .../20260328-1930-opus-slot-config.json | 9 +++++++++ data/notes/context-bleed.md | 5 +++++ data/notes/playtest-permissions.md | 5 +++++ data/notes/tag-leaking.md | 5 +++++ data/run-log.md | 14 ++++++++++++++ 6 files changed, 47 insertions(+) create mode 100644 data/escalations/20260328-1930-context-bleed.json create mode 100644 data/escalations/20260328-1930-opus-slot-config.json create mode 100644 data/notes/context-bleed.md create mode 100644 data/notes/playtest-permissions.md create mode 100644 data/notes/tag-leaking.md create mode 100644 data/run-log.md diff --git a/data/escalations/20260328-1930-context-bleed.json b/data/escalations/20260328-1930-context-bleed.json new file mode 100644 index 0000000..6b48aa8 --- /dev/null +++ b/data/escalations/20260328-1930-context-bleed.json @@ -0,0 +1,9 @@ +{ + "title": "Cross-session context bleed via Mind's Eye", + "severity": "medium", + "description": "Ask mode queried slingshooter08 instead of TestNoob after pray mode discovered that player was online. Session IDs are separate but Mind's Eye world context injection includes online player names, which the model targets instead of the requesting player.", + "evidence": "Noob playtest test 6: ask 'how do I find diamonds' — response referenced slingshooter08's position, not TestNoob's", + "suggested_fix": "Mind's Eye context injection should prioritize the requesting player, or clearly label other players as 'other online players' so the model doesn't confuse them with the requester.", + "timestamp": "2026-03-28T19:30:00", + "status": "open" +} diff --git a/data/escalations/20260328-1930-opus-slot-config.json b/data/escalations/20260328-1930-opus-slot-config.json new file mode 100644 index 0000000..1ae0667 --- /dev/null +++ b/data/escalations/20260328-1930-opus-slot-config.json @@ -0,0 +1,9 @@ +{ + "title": "Opus brain slot shows ollama provider with Claude model", + "severity": "medium", + "description": "Gateway status shows opus slot configured as provider=ollama, model=claude-opus-4-20250514. Ollama cannot run Claude models. This may be a stale override or config error.", + "evidence": "curl http://localhost:8500/v2/status — model_slots.opus shows provider=ollama", + "suggested_fix": "Check agents.yaml opus section and any in-memory overrides. Should be provider=anthropic or provider=codex.", + "timestamp": "2026-03-28T19:30:00", + "status": "open" +} diff --git a/data/notes/context-bleed.md b/data/notes/context-bleed.md new file mode 100644 index 0000000..93cd24f --- /dev/null +++ b/data/notes/context-bleed.md @@ -0,0 +1,5 @@ +# context-bleed + +## 2026-03-28 19:30 + +Cross-session context bleed observed: ask mode queried slingshooter08 (online player) instead of TestNoob after pray mode discovered that player. Session IDs are separate (TestNoob:ask vs TestNoob:pray) but Mind's Eye world context injection includes online player names, which the model then targets instead of the requesting player. This could cause confused responses in production. diff --git a/data/notes/playtest-permissions.md b/data/notes/playtest-permissions.md new file mode 100644 index 0000000..6a9371b --- /dev/null +++ b/data/notes/playtest-permissions.md @@ -0,0 +1,5 @@ +# playtest-permissions + +## 2026-03-28 19:30 + +Bot test players (TestNoob, TestBuilder, etc.) need permissions granted for all modes before playtesting. Use /raw to grant: perms_manage grant TestNoob sudo,pray,ask,raw. Without this, 3/7 noob commands are blocked by the permission check. diff --git a/data/notes/tag-leaking.md b/data/notes/tag-leaking.md new file mode 100644 index 0000000..bf62c18 --- /dev/null +++ b/data/notes/tag-leaking.md @@ -0,0 +1,5 @@ +# tag-leaking + +## 2026-03-28 19:30 + +`` XML tags sometimes leak into response_text for /ask mode responses. The gateway's tag stripping (_parse_player_message) should catch these but isn't in all cases. Observed on ask mode test: "what does redstone do". diff --git a/data/run-log.md b/data/run-log.md new file mode 100644 index 0000000..a2671f3 --- /dev/null +++ b/data/run-log.md @@ -0,0 +1,14 @@ +## 2026-03-28 19:30 — First Run + +**Gateway**: UP, v1.0.0-alpha, 10 sessions, TPS 20.0, 1 player online +**Playtest**: noob profile — 4/7 passed, 3 blocked (TestNoob lacks /sudo perms) + +**Issues found**: +1. [MEDIUM] Cross-session context bleed — ask mode queried slingshooter08 instead of TestNoob after pray mode discovered that player +2. [MEDIUM] Opus brain slot shows provider=ollama with model=claude-opus-4 — invalid combo +3. [LOW] `` tags leaking into response_text on ask mode +4. [INFO] TestNoob needs /sudo perms for full playtest coverage + +**Actions**: None (Layer 1 — escalated all issues) + +---