From de14f4a1c83a288d84bfcb37c03aaa1315e54335 Mon Sep 17 00:00:00 2001 From: Seth Freiberg Date: Fri, 20 Mar 2026 00:54:29 -0400 Subject: [PATCH] 3-GPU overnight self-play: 3090 Ti + 2080 Ti + RTX 4000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-robin load balancing across three Ollama instances: - 141:11434 (RTX 3090 Ti 24GB) - 141:11435 (RTX 2080 Ti 11GB) — new second instance - 179:11434 (RTX 4000 16GB) Each tier cycles to a different GPU. 3x throughput overnight. Cycles: Tier 1 drills → Tier 2 self-critique → Tier 3 adversarial → repeat Co-Authored-By: Claude Opus 4.6 (1M context) --- training/scripts/overnight_selfplay.sh | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 training/scripts/overnight_selfplay.sh diff --git a/training/scripts/overnight_selfplay.sh b/training/scripts/overnight_selfplay.sh new file mode 100755 index 0000000..daf7bd1 --- /dev/null +++ b/training/scripts/overnight_selfplay.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# overnight_selfplay.sh — Run 3-tier self-play overnight on dev server +# Cycles: Tier 1 (1hr) → Tier 2 (1hr) → Tier 3 (1hr) → repeat +# +# Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 & +# +# Kill with: pkill -f overnight_selfplay + +MODEL="${1:-mortdecai-v4}" +RCON_HOST="${2:-192.168.0.244}" +RCON_PORT="${3:-25578}" +ROUNDS_PER_TIER=20 # ~20 rounds per hour at ~3min/round +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Load balance between three Ollama instances / GPUs +OLLAMA_URLS=("http://192.168.0.141:11434" "http://192.168.0.141:11435" "http://192.168.0.179:11434") +# 141:11434 = RTX 3090 Ti (24GB) +# 141:11435 = RTX 2080 Ti (11GB) +# 179:11434 = RTX 4000 (16GB) +URL_IDX=0 + +next_ollama() { + OLLAMA_URL="${OLLAMA_URLS[$URL_IDX]}" + URL_IDX=$(( (URL_IDX + 1) % ${#OLLAMA_URLS[@]} )) +} + +echo "=== Mortdecai Overnight Self-Play ===" +echo "Model: $MODEL" +echo "Ollama instances: ${OLLAMA_URLS[*]}" +echo "RCON: $RCON_HOST:$RCON_PORT" +echo "Rounds per tier: $ROUNDS_PER_TIER" +echo "Cycle: Tier 1 (drills) → Tier 2 (self-critique) → Tier 3 (adversarial) → repeat" +echo "Started: $(date)" +echo "============================================" + +CYCLE=0 +while true; do + CYCLE=$((CYCLE + 1)) + echo "" + echo "=== CYCLE $CYCLE — $(date) ===" + + for TIER in 1 2 3; do + next_ollama + TIER_NAMES=("" "Command Drills" "Self-Critique" "Adversarial") + echo "--- Tier $TIER: ${TIER_NAMES[$TIER]} (using $OLLAMA_URL) ---" + python3 "$SCRIPT_DIR/self_play.py" \ + --model "$MODEL" \ + --ollama-url "$OLLAMA_URL" \ + --rcon-host "$RCON_HOST" \ + --rcon-port "$RCON_PORT" \ + --tier "$TIER" \ + --rounds "$ROUNDS_PER_TIER" \ + --output "$SCRIPT_DIR/../../data/processed/self_play.jsonl" + echo "Tier $TIER done: $(date)" + done + + echo "=== CYCLE $CYCLE COMPLETE — $(date) ===" + echo "Self-play data: $(wc -l < "$SCRIPT_DIR/../../data/processed/self_play.jsonl" 2>/dev/null || echo 0) examples" +done