#!/usr/bin/env python3 """Deep diagnostic probes to understand training failure modes.""" import json, requests, time OLLAMA_URL = "http://192.168.0.141:11437" def query(model, messages, temp=0.1): payload = { "model": model, "messages": messages, "stream": False, "options": {"temperature": temp, "num_predict": 256} } try: r = requests.post(f"{OLLAMA_URL}/api/chat", json=payload, timeout=120) data = r.json() return data.get("message", {}).get("content", "NO CONTENT") except Exception as e: return f"ERROR: {e}" # Probe 1: Does it remember ANY training signal? # Use exact phrases from training data print("=" * 80) print("PROBE 1: Training signal detection (exact training phrases)") print("=" * 80) for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]: print(f"\n--- {model} ---") # Try the exact system prompt format from training r = query(model, [ {"role": "system", "content": "/no_think\nYou are a Minecraft 1.21 command translator for a server admin.\nReturn ONLY JSON: {\"commands\": [\"cmd1\", \"cmd2\"], \"reasoning\": \"why\"}\nNo prose, no markdown, no labels, no leading slash on commands."}, {"role": "user", "content": "give me a diamond sword"} ]) print(f" Exact training format: {r[:200]}") # Probe 2: Does /no_think suppress thinking? print("\n" + "=" * 80) print("PROBE 2: /no_think effect") print("=" * 80) for model in ["mortdecai:0.6.0-9b", "mortdecai:latest", "qwen3.5:latest", "qwen3.5:27b"]: print(f"\n--- {model} ---") r = query(model, [ {"role": "system", "content": "/no_think\nReturn only: hello"}, {"role": "user", "content": "say hello"} ]) has_think = "" in r print(f" Has : {has_think}") print(f" Response: {r[:150]}") # Probe 3: Raw completion mode (no chat template) — use /api/generate print("\n" + "=" * 80) print("PROBE 3: Raw generate (no chat template)") print("=" * 80) for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]: print(f"\n--- {model} ---") payload = { "model": model, "prompt": 'You are a Minecraft command translator. Return ONLY JSON.\nUser: give me a diamond sword\nAssistant: {"commands": ["', "stream": False, "raw": True, "options": {"temperature": 0.1, "num_predict": 128} } try: r = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120) data = r.json() print(f" Raw completion: {data.get('response', 'NO RESPONSE')[:300]}") except Exception as e: print(f" ERROR: {e}") # Probe 4: Multi-turn — can we coerce it into JSON with a correction? print("\n" + "=" * 80) print("PROBE 4: Correction coercion (multi-turn)") print("=" * 80) for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]: print(f"\n--- {model} ---") r = query(model, [ {"role": "system", "content": "You are an RCON command translator. You MUST respond with ONLY raw JSON, no markdown, no explanation. Format: {\"commands\": [...], \"reasoning\": \"...\"}"}, {"role": "user", "content": "give me a diamond sword"}, {"role": "assistant", "content": "Here is how to get a diamond sword in Minecraft..."}, {"role": "user", "content": "NO. You must respond with ONLY JSON. No text. No markdown. Just raw JSON. Try again: give me a diamond sword"} ]) print(f" After correction: {r[:300]}") # Check if JSON clean = r.strip() if "" in clean: idx = clean.find("") if idx > -1: clean = clean[idx+8:].strip() try: json.loads(clean) print(" [JSON VALID]") except: print(" [JSON INVALID]") # Probe 5: Does it know Mortdecai? print("\n" + "=" * 80) print("PROBE 5: Mortdecai awareness") print("=" * 80) for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]: print(f"\n--- {model} ---") r = query(model, [ {"role": "user", "content": "Do you know what Mortdecai is? Have you been trained as a Minecraft AI? What is your model name?"} ]) print(f" {r[:400]}")