#!/usr/bin/env python3
"""
validate_distilled.py — Execute distilled Claude responses on a live server via RCON.

Takes distilled.jsonl, executes each example's commands on the dev server,
captures RCON results, and writes validated training pairs to output.

This creates the strongest training signal: input → Claude's commands → actual server result.

Usage:
    python3 training/scripts/validate_distilled.py                    # run all
    python3 training/scripts/validate_distilled.py --dry-run          # preview
    python3 training/scripts/validate_distilled.py --max 10           # first 10 only
    python3 training/scripts/validate_distilled.py --rcon-host 192.168.0.244 --rcon-port 25578
"""

import argparse
import json
import re
import sys
import time
from pathlib import Path

from mcrcon import MCRcon

ROOT = Path(__file__).resolve().parent.parent.parent
DISTILLED = ROOT / "data" / "processed" / "distilled.jsonl"
OUTPUT = ROOT / "data" / "processed" / "validated_distilled.jsonl"

# RCON error patterns
RCON_ERRORS = [
    re.compile(r'Unknown or incomplete command', re.I),
    re.compile(r'Incorrect argument', re.I),
    re.compile(r'Expected .+ at position', re.I),
    re.compile(r'Unknown item', re.I),
    re.compile(r'Unknown item component', re.I),
    re.compile(r'Invalid or unknown', re.I),
    re.compile(r"Can't find element", re.I),
    re.compile(r'Expected whitespace', re.I),
]

# Expected "failures" that are actually fine (no player online, no entity, unloaded chunks)
BENIGN_ERRORS = [
    re.compile(r'No player was found', re.I),
    re.compile(r'No entity was found', re.I),
    re.compile(r'That position is not loaded', re.I),
]


def is_real_error(result: str) -> bool:
    """Check if RCON result is a real syntax/command error (not just missing player)."""
    for pat in RCON_ERRORS:
        if pat.search(result):
            # Check it's not just a benign error
            for bp in BENIGN_ERRORS:
                if bp.search(result):
                    return False
            return True
    return False


def is_benign_error(result: str) -> bool:
    """Check if error is benign (would work with a player online)."""
    for bp in BENIGN_ERRORS:
        if bp.search(result):
            return True
    return False


def execute_commands(commands: list, rcon_host: str, rcon_port: int, rcon_pass: str) -> list:
    """Execute commands via RCON, return list of (cmd, result, success) tuples."""
    results = []
    try:
        with MCRcon(rcon_host, rcon_pass, port=rcon_port) as rcon:
            for cmd in commands:
                try:
                    result = rcon.command(cmd)
                    real_err = is_real_error(result)
                    benign = is_benign_error(result)
                    success = not real_err
                    results.append({
                        "command": cmd,
                        "result": result[:200],
                        "success": success,
                        "benign_error": benign,
                        "real_error": real_err,
                    })
                    time.sleep(0.2)
                except Exception as e:
                    results.append({
                        "command": cmd,
                        "result": str(e)[:200],
                        "success": False,
                        "benign_error": False,
                        "real_error": True,
                    })
    except Exception as e:
        results.append({
            "command": "(connection failed)",
            "result": str(e)[:200],
            "success": False,
            "benign_error": False,
            "real_error": True,
        })
    return results


def main():
    parser = argparse.ArgumentParser(description="Validate distilled responses via RCON")
    parser.add_argument("--input", default=str(DISTILLED))
    parser.add_argument("--output", default=str(OUTPUT))
    parser.add_argument("--rcon-host", default="192.168.0.244")
    parser.add_argument("--rcon-port", type=int, default=25578)
    parser.add_argument("--rcon-pass", default="REDACTED_RCON")
    parser.add_argument("--max", type=int, default=0, help="Max examples to process (0=all)")
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--reset-between", action="store_true", default=True,
                        help="Clear effects between examples")
    args = parser.parse_args()

    with open(args.input) as f:
        examples = [json.loads(l) for l in f if l.strip()]

    if args.max > 0:
        examples = examples[:args.max]

    print(f"Validating {len(examples)} distilled examples")
    print(f"RCON: {args.rcon_host}:{args.rcon_port}")
    print(f"Output: {args.output}")

    if args.dry_run:
        total_cmds = sum(len(ex.get("output", {}).get("commands", [])) for ex in examples)
        print(f"\n[DRY RUN] Would execute {total_cmds} commands across {len(examples)} examples")
        return

    # Test RCON
    try:
        with MCRcon(args.rcon_host, args.rcon_pass, port=args.rcon_port) as rcon:
            print(f"RCON OK: {rcon.command('list')}")
    except Exception as e:
        print(f"RCON FAILED: {e}")
        sys.exit(1)

    validated = []
    stats = {"total": 0, "all_success": 0, "partial": 0, "all_fail": 0, "no_cmds": 0,
             "real_errors": 0, "benign_errors": 0}

    for i, ex in enumerate(examples):
        commands = ex.get("output", {}).get("commands", [])
        msg = ex.get("input", {}).get("user_message", "")[:50]
        mode = "god" if "pray" in msg.lower() or ex.get("source") == "prayer_log" else "sudo"

        stats["total"] += 1

        if not commands:
            stats["no_cmds"] += 1
            # Still valid — refusal or info-only response
            ex["rcon_validation"] = {"status": "no_commands", "results": []}
            validated.append(ex)
            print(f"  [{i+1}/{len(examples)}] ({mode}) {msg:50} [no cmds — kept]")
            continue

        # Execute
        results = execute_commands(commands, args.rcon_host, args.rcon_port, args.rcon_pass)

        real_errors = sum(1 for r in results if r["real_error"])
        benign = sum(1 for r in results if r["benign_error"])
        successes = sum(1 for r in results if r["success"])

        stats["real_errors"] += real_errors
        stats["benign_errors"] += benign

        if real_errors == 0:
            stats["all_success"] += 1
            status = "valid"
        elif real_errors < len(results):
            stats["partial"] += 1
            status = "partial"
        else:
            stats["all_fail"] += 1
            status = "invalid"

        # Tag the example with validation results
        ex["rcon_validation"] = {
            "status": status,
            "results": results,
            "real_errors": real_errors,
            "benign_errors": benign,
            "successes": successes,
        }
        validated.append(ex)

        flag = ""
        if real_errors > 0:
            flag = f" [FAIL:{real_errors}]"
            # Show first real error
            for r in results:
                if r["real_error"]:
                    flag += f" {r['command'][:30]}→{r['result'][:40]}"
                    break
        elif benign > 0:
            flag = f" [benign:{benign}]"

        print(f"  [{i+1}/{len(examples)}] ({mode}) {msg:50} [{successes}/{len(results)} ok]{flag}")

        # Reset effects between examples
        if args.reset_between and mode == "god":
            try:
                with MCRcon(args.rcon_host, args.rcon_pass, port=args.rcon_port) as rcon:
                    rcon.command("effect clear @a")
            except:
                pass
            time.sleep(0.5)

        time.sleep(0.3)

    # Write output
    with open(args.output, "w") as f:
        for ex in validated:
            f.write(json.dumps(ex, ensure_ascii=False) + "\n")

    print(f"\n{'='*60}")
    print(f"Validation complete")
    print(f"  Total:          {stats['total']}")
    print(f"  All valid:      {stats['all_success']} ({stats['all_success']/max(stats['total'],1)*100:.1f}%)")
    print(f"  Partial:        {stats['partial']}")
    print(f"  All failed:     {stats['all_fail']}")
    print(f"  No commands:    {stats['no_cmds']}")
    print(f"  Real errors:    {stats['real_errors']}")
    print(f"  Benign errors:  {stats['benign_errors']} (would work with player online)")
    print(f"  Output:         {args.output}")


if __name__ == "__main__":
    main()