Files
Mortdecai/scripts/training_status_printer.py
T
Seth 029bd28a58 Gemini-powered prayer bots, POS cost printer, first LoRA training run
Prayer bots (ingame/prayer_bots.js):
- 3 Mineflayer bots that actively pray, sudo, and bug_log on dev server
- Gemini 2.5 Flash Lite generates diverse natural prompts on the fly
- Falls back to static pool if Gemini unavailable
- 15-45s interval per bot, 50/35/10/5 pray/sudo/bug/chat split

POS status printer (scripts/training_status_printer.py):
- Prints training data collection status to Epson TM-m30
- Tracks: dataset size, audit logs, bot activity, Gemini API cost, service status
- Triggers on $0.50 cost threshold (configurable), checks every 15 min
- --dry-run, --check, --force flags

Training:
- First LoRA run completed (233 examples, 3 epochs, loss 1.5→0.10)
- GGUF exported and loaded into Ollama as qwen3-8b-mc-lora on steel141
- Model is bad (expected) — hallucinating Chinese, leaking system prompt
- Deployed to dev server for live testing and data collection
- bf16 fix for Ampere GPU, system prompts included in training conversations

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 17:36:08 -04:00

308 lines
9.7 KiB
Python

#!/usr/bin/env python3
"""
training_status_printer.py — Prints training data collection status to the POS printer.
Tracks:
- Gemini API usage and estimated cost
- Training audit log growth
- Bot activity
- Model performance snapshot
Runs on a 4-hour interval via cron or direct invocation.
Usage:
python3 training_status_printer.py # print now
python3 training_status_printer.py --dry-run # show what would print
"""
import json
import os
import socket
import subprocess
import sys
import time
from datetime import datetime, timedelta
from pathlib import Path
# --- Config ---
PRINTER_IP = "192.168.0.137"
PRINTER_PORT = 9100
COLS = 57
# Gemini Flash Lite pricing (per 1M tokens, as of 2026-03)
# https://ai.google.dev/pricing
GEMINI_INPUT_COST_PER_M = 0.075 # $0.075 per 1M input tokens
GEMINI_OUTPUT_COST_PER_M = 0.30 # $0.30 per 1M output tokens
# Approximate: our prompt is ~300 tokens input, ~200 tokens output per call
EST_INPUT_TOKENS_PER_CALL = 300
EST_OUTPUT_TOKENS_PER_CALL = 200
# Gemini usage tracking file
GEMINI_USAGE_FILE = "/var/log/mc_gemini_usage.json"
# Cost threshold for printing ($)
COST_PRINT_THRESHOLD = 0.50
LAST_PRINT_COST_FILE = "/var/log/mc_training_last_print_cost.json"
# Remote paths (on CT 644 via pve112)
DEV_AUDIT_LOG = "/var/log/mc_training_audit_dev.jsonl"
PROD_AUDIT_LOG = "/var/log/mc_training_audit.jsonl"
BOT_LOG = "/var/log/prayer_bots.log"
DEV_BUG_LOG = "/var/log/mc_aigod_dev_bug.log"
def remote_cmd(cmd, timeout=10):
"""Run a command on CT 644 via pve112."""
try:
full_cmd = f'ssh pve112 "pct exec 644 -- {cmd}"'
result = subprocess.run(
full_cmd, shell=True,
capture_output=True, text=True, timeout=timeout
)
return result.stdout.strip()
except Exception as e:
return f"ERROR: {e}"
def get_audit_stats():
"""Get training audit log stats from both servers."""
dev_lines = remote_cmd(f"wc -l {DEV_AUDIT_LOG} 2>/dev/null | cut -d' ' -f1")
prod_lines = remote_cmd(f"wc -l {PROD_AUDIT_LOG} 2>/dev/null | cut -d' ' -f1")
bug_lines = remote_cmd(f"wc -l {DEV_BUG_LOG} 2>/dev/null | cut -d' ' -f1")
try:
dev_count = int(dev_lines) if dev_lines.isdigit() else 0
except:
dev_count = 0
try:
prod_count = int(prod_lines) if prod_lines.isdigit() else 0
except:
prod_count = 0
try:
bug_count = int(bug_lines) if bug_lines.isdigit() else 0
except:
bug_count = 0
return dev_count, prod_count, bug_count
def get_bot_stats():
"""Get bot activity stats."""
bot_procs = remote_cmd("ps aux | grep prayer_bots | grep -v grep | wc -l")
bot_last = remote_cmd(f"tail -1 {BOT_LOG} 2>/dev/null")
bot_sends = remote_cmd(f"grep -c 'SEND' {BOT_LOG} 2>/dev/null")
try:
num_bots = int(bot_procs)
except:
num_bots = 0
try:
num_sends = int(bot_sends)
except:
num_sends = 0
return num_bots, num_sends, bot_last[:60] if bot_last else "N/A"
def get_gemini_usage():
"""Track Gemini API calls. Reads/writes a local JSON counter."""
# Count Gemini calls from bot log
gemini_calls = remote_cmd(f"grep -c 'Gemini.*Generated' {BOT_LOG} 2>/dev/null")
gemini_errors = remote_cmd(f"grep -c 'Gemini.*Error' {BOT_LOG} 2>/dev/null")
try:
calls = int(gemini_calls)
except:
calls = 0
try:
errors = int(gemini_errors)
except:
errors = 0
# Estimate cost
total_input_tokens = calls * EST_INPUT_TOKENS_PER_CALL
total_output_tokens = calls * EST_OUTPUT_TOKENS_PER_CALL
input_cost = (total_input_tokens / 1_000_000) * GEMINI_INPUT_COST_PER_M
output_cost = (total_output_tokens / 1_000_000) * GEMINI_OUTPUT_COST_PER_M
total_cost = input_cost + output_cost
return {
"calls": calls,
"errors": errors,
"est_input_tokens": total_input_tokens,
"est_output_tokens": total_output_tokens,
"est_cost_usd": total_cost,
}
def get_dataset_size():
"""Get current seed dataset size."""
try:
path = Path(__file__).resolve().parent.parent / "data" / "processed" / "seed_dataset.jsonl"
with open(path) as f:
return sum(1 for line in f if line.strip())
except:
return 0
def get_service_status():
"""Check if AI God services are running."""
statuses = {}
for svc in ["mc-aigod-paper", "mc-aigod-dev", "mc-aigod"]:
status = remote_cmd(f"systemctl is-active {svc}.service 2>/dev/null")
statuses[svc] = status
return statuses
def build_receipt():
"""Build the POS receipt."""
from escpos.printer import Dummy
now = datetime.now()
p = Dummy(profile="default")
# Header
p.set(font='b', align='center', bold=True, height=2)
p.text("MC AI TRAINING\n")
p.set(font='b', align='center', bold=True, height=1)
p.text("STATUS REPORT\n")
p.set(font='b', align='center', bold=False)
p.text(now.strftime("%Y-%m-%d %H:%M") + "\n")
p.text("=" * COLS + "\n")
# Dataset
dataset_size = get_dataset_size()
p.set(font='b', align='left', bold=True)
p.text("DATASET\n")
p.set(font='b', align='left', bold=False)
p.text(f" Seed examples: {dataset_size}\n")
dev_audit, prod_audit, bug_count = get_audit_stats()
p.text(f" Dev audit log: {dev_audit}\n")
p.text(f" Prod audit log: {prod_audit}\n")
p.text(f" Dev bug reports: {bug_count}\n")
p.text(f" Total unprocessed: {dev_audit + prod_audit}\n")
p.text("-" * COLS + "\n")
# Bot activity
num_bots, num_sends, last_msg = get_bot_stats()
p.set(font='b', align='left', bold=True)
p.text("BOT ACTIVITY\n")
p.set(font='b', align='left', bold=False)
p.text(f" Active bots: {num_bots}\n")
p.text(f" Total messages: {num_sends}\n")
p.text(f" Last: {last_msg}\n")
p.text("-" * COLS + "\n")
# Gemini API
gemini = get_gemini_usage()
p.set(font='b', align='left', bold=True)
p.text("GEMINI API (flash-lite)\n")
p.set(font='b', align='left', bold=False)
p.text(f" Calls: {gemini['calls']}\n")
p.text(f" Errors: {gemini['errors']}\n")
p.text(f" Est input tokens: {gemini['est_input_tokens']:,}\n")
p.text(f" Est output tokens: {gemini['est_output_tokens']:,}\n")
p.set(font='b', align='left', bold=True)
p.text(f" Est cost: ${gemini['est_cost_usd']:.4f}\n")
p.set(font='b', align='left', bold=False)
p.text("-" * COLS + "\n")
# Services
statuses = get_service_status()
p.set(font='b', align='left', bold=True)
p.text("SERVICES\n")
p.set(font='b', align='left', bold=False)
for svc, status in statuses.items():
indicator = "OK" if status == "active" else "DOWN"
p.text(f" {svc:22} [{indicator}]\n")
p.text("-" * COLS + "\n")
# Footer
p.set(font='b', align='center', bold=False)
p.text("Next print in 4 hours\n")
p.text("=" * COLS + "\n")
p.cut()
return p.output
def send_to_printer(raw_bytes):
"""Send raw ESC/POS bytes to the TM-m30."""
with socket.create_connection((PRINTER_IP, PRINTER_PORT), timeout=10) as sock:
sock.sendall(raw_bytes)
def get_last_print_cost():
"""Get the cumulative cost at which we last printed."""
try:
with open(LAST_PRINT_COST_FILE) as f:
return json.load(f).get("cost", 0.0)
except:
return 0.0
def save_last_print_cost(cost):
"""Save the cumulative cost at which we printed."""
with open(LAST_PRINT_COST_FILE, "w") as f:
json.dump({"cost": cost, "timestamp": datetime.now().isoformat()}, f)
def should_print(current_cost):
"""Check if we've crossed the next $COST_PRINT_THRESHOLD boundary."""
last_cost = get_last_print_cost()
return current_cost - last_cost >= COST_PRINT_THRESHOLD
def main():
dry_run = "--dry-run" in sys.argv
force = "--force" in sys.argv
check_only = "--check" in sys.argv
gemini = get_gemini_usage()
current_cost = gemini["est_cost_usd"]
if check_only:
last = get_last_print_cost()
next_at = last + COST_PRINT_THRESHOLD
print(f"Current cost: ${current_cost:.4f}")
print(f"Last printed at: ${last:.4f}")
print(f"Next print at: ${next_at:.4f}")
print(f"Will print: {'YES' if should_print(current_cost) else 'NO'}")
return
if dry_run:
print("=== DRY RUN — would print: ===\n")
dataset_size = get_dataset_size()
dev_audit, prod_audit, bug_count = get_audit_stats()
num_bots, num_sends, last_msg = get_bot_stats()
statuses = get_service_status()
print(f"Dataset: {dataset_size} seed examples")
print(f"Dev audit: {dev_audit} entries")
print(f"Prod audit: {prod_audit} entries")
print(f"Bug reports: {bug_count}")
print(f"Bots: {num_bots} active, {num_sends} messages sent")
print(f"Gemini: {gemini['calls']} calls, {gemini['errors']} errors, ${gemini['est_cost_usd']:.4f}")
print(f"Services: {statuses}")
print(f"Threshold: ${COST_PRINT_THRESHOLD} (would print: {should_print(current_cost) or force})")
return
if not force and not should_print(current_cost):
print(f"[{datetime.now().isoformat()}] Cost ${current_cost:.4f} — threshold not reached (next at ${get_last_print_cost() + COST_PRINT_THRESHOLD:.4f})")
return
receipt = build_receipt()
try:
send_to_printer(receipt)
save_last_print_cost(current_cost)
print(f"[{datetime.now().isoformat()}] Receipt printed at ${current_cost:.4f}")
except Exception as e:
print(f"[{datetime.now().isoformat()}] Print failed: {e}")
if __name__ == "__main__":
main()