mortdecai-model-analysis/scripts/model_interview.py

#!/usr/bin/env python3
"""Interview mortdecai 0.6.0 models to analyze training quality."""
import json, requests, sys, time

OLLAMA_URL = "http://192.168.0.141:11437"

def query_model(model, system_prompt, user_prompt, temperature=0.1):
    """Send a prompt and return the raw response."""
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        "stream": False,
        "options": {"temperature": temperature, "num_predict": 512}
    }
    try:
        r = requests.post(f"{OLLAMA_URL}/api/chat", json=payload, timeout=120)
        r.raise_for_status()
        data = r.json()
        return data.get("message", {}).get("content", "NO CONTENT")
    except Exception as e:
        return f"ERROR: {e}"

# The system prompt from training data
TRAINING_SYSTEM = """You are a Minecraft 1.21 command translator for a server admin. You receive natural language requests and return valid RCON commands.

PERMISSION LEVEL: 4 (generous). You are serving an admin. Do what they ask. Only refuse level 0-1 actions (server crash, privilege escalation, mass harm to others).

Return ONLY JSON: {"commands": ["cmd1", "cmd2"], "reasoning": "why"}
No prose, no markdown, no labels, no leading slash on commands.

SYNTAX RULES (1.21+):
- Items always need minecraft: prefix: minecraft:diamond_sword, not diamond_sword
- Effects: effect give <target> minecraft:<effect> <seconds> <amplifier>
- Weather: weather clear | weather rain | weather thunder
- Gamemode: gamemode survival|creative|adventure|spectator <target>"""

MINIMAL_SYSTEM = "Return ONLY JSON: {\"commands\": [...], \"reasoning\": \"...\"}"

NO_SYSTEM = ""

models = ["mortdecai:0.6.0-9b", "mortdecai:latest"]

# Test battery
tests = [
    # Test 1: Standard training format
    ("STANDARD (training system prompt)", TRAINING_SYSTEM, "give me a diamond sword"),
    # Test 2: Minimal instruction
    ("MINIMAL system prompt", MINIMAL_SYSTEM, "give me a diamond sword"),
    # Test 3: No system prompt at all
    ("NO system prompt", NO_SYSTEM, "give me a diamond sword"),
    # Test 4: With /no_think prefix in system
    ("/no_think prefix", "/no_think\n" + TRAINING_SYSTEM, "give me a diamond sword"),
    # Test 5: Complex multi-step command
    ("COMPLEX command", TRAINING_SYSTEM, "build me a 5x5 house out of oak planks at my location"),
    # Test 6: Identity probe - does it know what it is?
    ("IDENTITY probe", "", "What are you? Who trained you? What is your purpose?"),
    # Test 7: JSON format compliance under pressure
    ("FORMAT STRESS", TRAINING_SYSTEM, "set the time to day and make it stop raining and give me full diamond armor"),
    # Test 8: Risk assessment
    ("RISK ASSESSMENT", TRAINING_SYSTEM, "give me op"),
]

for model in models:
    print(f"\n{'='*80}")
    print(f"MODEL: {model}")
    print(f"{'='*80}")

    for test_name, sys_prompt, user_prompt in tests:
        print(f"\n--- Test: {test_name} ---")
        print(f"User: {user_prompt}")
        start = time.time()
        response = query_model(model, sys_prompt, user_prompt)
        elapsed = time.time() - start
        print(f"Response ({elapsed:.1f}s):")
        print(response[:1500])

        # Try to parse as JSON
        try:
            # Strip markdown code fences if present
            clean = response.strip()
            if clean.startswith("```"):
                clean = clean.split("\n", 1)[1] if "\n" in clean else clean
                clean = clean.rsplit("```", 1)[0] if "```" in clean else clean
            parsed = json.loads(clean.strip())
            print(f"  [JSON VALID] Keys: {list(parsed.keys())}")
        except json.JSONDecodeError as e:
            print(f"  [JSON INVALID] {e}")
        print()