diff --git a/training/scripts/overnight_selfplay.sh b/training/scripts/overnight_selfplay.sh index daf7bd1..78e05a1 100755 --- a/training/scripts/overnight_selfplay.sh +++ b/training/scripts/overnight_selfplay.sh @@ -1,59 +1,77 @@ #!/bin/bash -# overnight_selfplay.sh — Run 3-tier self-play overnight on dev server -# Cycles: Tier 1 (1hr) → Tier 2 (1hr) → Tier 3 (1hr) → repeat +# overnight_selfplay.sh — Run 3-tier self-play overnight, all three GPUs in parallel +# Each GPU runs a different tier simultaneously, then they rotate. # # Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 & -# -# Kill with: pkill -f overnight_selfplay +# Kill with: pkill -f overnight_selfplay ; pkill -f self_play.py MODEL="${1:-mortdecai-v4}" RCON_HOST="${2:-192.168.0.244}" RCON_PORT="${3:-25578}" -ROUNDS_PER_TIER=20 # ~20 rounds per hour at ~3min/round +ROUNDS_PER_TIER=20 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl" -# Load balance between three Ollama instances / GPUs -OLLAMA_URLS=("http://192.168.0.141:11434" "http://192.168.0.141:11435" "http://192.168.0.179:11434") -# 141:11434 = RTX 3090 Ti (24GB) -# 141:11435 = RTX 2080 Ti (11GB) -# 179:11434 = RTX 4000 (16GB) -URL_IDX=0 +# Three GPUs +GPU1="http://192.168.0.141:11434" # RTX 3090 Ti +GPU2="http://192.168.0.141:11435" # RTX 2080 Ti +GPU3="http://192.168.0.179:11434" # RTX 4000 -next_ollama() { - OLLAMA_URL="${OLLAMA_URLS[$URL_IDX]}" - URL_IDX=$(( (URL_IDX + 1) % ${#OLLAMA_URLS[@]} )) -} - -echo "=== Mortdecai Overnight Self-Play ===" +echo "=== Mortdecai Overnight Self-Play (3 GPU Parallel) ===" echo "Model: $MODEL" -echo "Ollama instances: ${OLLAMA_URLS[*]}" +echo "GPU1: $GPU1 (RTX 3090 Ti)" +echo "GPU2: $GPU2 (RTX 2080 Ti)" +echo "GPU3: $GPU3 (RTX 4000)" echo "RCON: $RCON_HOST:$RCON_PORT" echo "Rounds per tier: $ROUNDS_PER_TIER" -echo "Cycle: Tier 1 (drills) → Tier 2 (self-critique) → Tier 3 (adversarial) → repeat" +echo "All three tiers run simultaneously, rotating GPUs each cycle" echo "Started: $(date)" echo "============================================" +run_tier() { + local tier=$1 + local gpu=$2 + local tier_names=("" "Drills" "Self-Critique" "Adversarial") + echo "[Tier $tier - ${tier_names[$tier]}] Starting on $gpu at $(date)" + python3 "$SCRIPT_DIR/self_play.py" \ + --model "$MODEL" \ + --ollama-url "$gpu" \ + --rcon-host "$RCON_HOST" \ + --rcon-port "$RCON_PORT" \ + --tier "$tier" \ + --rounds "$ROUNDS_PER_TIER" \ + --output "$OUTPUT" 2>&1 | while read line; do + echo "[Tier $tier] $line" + done + echo "[Tier $tier - ${tier_names[$tier]}] Done at $(date)" +} + CYCLE=0 while true; do CYCLE=$((CYCLE + 1)) echo "" echo "=== CYCLE $CYCLE — $(date) ===" + echo "Examples so far: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)" - for TIER in 1 2 3; do - next_ollama - TIER_NAMES=("" "Command Drills" "Self-Critique" "Adversarial") - echo "--- Tier $TIER: ${TIER_NAMES[$TIER]} (using $OLLAMA_URL) ---" - python3 "$SCRIPT_DIR/self_play.py" \ - --model "$MODEL" \ - --ollama-url "$OLLAMA_URL" \ - --rcon-host "$RCON_HOST" \ - --rcon-port "$RCON_PORT" \ - --tier "$TIER" \ - --rounds "$ROUNDS_PER_TIER" \ - --output "$SCRIPT_DIR/../../data/processed/self_play.jsonl" - echo "Tier $TIER done: $(date)" - done + # Rotate GPU assignments each cycle + case $((CYCLE % 3)) in + 0) T1_GPU=$GPU1; T2_GPU=$GPU2; T3_GPU=$GPU3 ;; + 1) T1_GPU=$GPU2; T2_GPU=$GPU3; T3_GPU=$GPU1 ;; + 2) T1_GPU=$GPU3; T2_GPU=$GPU1; T3_GPU=$GPU2 ;; + esac + + # Run all three tiers in parallel + run_tier 1 "$T1_GPU" & + PID1=$! + run_tier 2 "$T2_GPU" & + PID2=$! + run_tier 3 "$T3_GPU" & + PID3=$! + + # Wait for all three to finish + wait $PID1 $PID2 $PID3 echo "=== CYCLE $CYCLE COMPLETE — $(date) ===" - echo "Self-play data: $(wc -l < "$SCRIPT_DIR/../../data/processed/self_play.jsonl" 2>/dev/null || echo 0) examples" + echo "Total examples: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)" + echo "" done