#!/bin/bash # overnight_selfplay.sh — Run 3-tier self-play overnight, all three GPUs in parallel # Each GPU runs a different tier simultaneously, then they rotate. # # Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 & # Kill with: pkill -f overnight_selfplay ; pkill -f self_play.py MODEL="${1:-mortdecai-v4}" RCON_HOST="${2:-192.168.0.244}" RCON_PORT="${3:-25578}" ROUNDS_PER_TIER=50 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl" # Three GPUs GPU1="http://192.168.0.141:11434" # RTX 3090 Ti GPU2="http://192.168.0.141:11435" # RTX 2080 Ti GPU3="http://192.168.0.179:11434" # RTX 4000 echo "=== Mortdecai Overnight Self-Play (3 GPU Parallel) ===" echo "Model: $MODEL" echo "GPU1: $GPU1 (RTX 3090 Ti)" echo "GPU2: $GPU2 (RTX 2080 Ti)" echo "GPU3: $GPU3 (RTX 4000)" echo "RCON: $RCON_HOST:$RCON_PORT" echo "Rounds per tier: $ROUNDS_PER_TIER" echo "All three tiers run simultaneously, rotating GPUs each cycle" echo "Started: $(date)" echo "============================================" run_tier() { local tier=$1 local gpu=$2 local tier_names=("" "Drills" "Self-Critique" "Adversarial") echo "[Tier $tier - ${tier_names[$tier]}] Starting on $gpu at $(date)" python3 "$SCRIPT_DIR/self_play.py" \ --model "$MODEL" \ --ollama-url "$gpu" \ --rcon-host "$RCON_HOST" \ --rcon-port "$RCON_PORT" \ --tier "$tier" \ --rounds "$ROUNDS_PER_TIER" \ --output "$OUTPUT" 2>&1 | while read line; do echo "[Tier $tier] $line" done echo "[Tier $tier - ${tier_names[$tier]}] Done at $(date)" } CYCLE=0 while true; do CYCLE=$((CYCLE + 1)) echo "" echo "=== CYCLE $CYCLE — $(date) ===" echo "Examples so far: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)" # Rotate GPU assignments each cycle case $((CYCLE % 3)) in 0) T1_GPU=$GPU1; T2_GPU=$GPU2; T3_GPU=$GPU3 ;; 1) T1_GPU=$GPU2; T2_GPU=$GPU3; T3_GPU=$GPU1 ;; 2) T1_GPU=$GPU3; T2_GPU=$GPU1; T3_GPU=$GPU2 ;; esac # Run all three tiers in parallel run_tier 1 "$T1_GPU" & PID1=$! run_tier 2 "$T2_GPU" & PID2=$! run_tier 3 "$T3_GPU" & PID3=$! # Wait for all three to finish wait $PID1 $PID2 $PID3 echo "=== CYCLE $CYCLE COMPLETE — $(date) ===" echo "Total examples: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)" echo "" done