Parallel 3-GPU self-play: all tiers run simultaneously
Each cycle runs all three tiers at the same time on different GPUs: - Tier 1 (drills) on GPU A - Tier 2 (self-critique) on GPU B - Tier 3 (adversarial) on GPU C GPU assignments rotate each cycle for even wear. 3x throughput vs sequential. RCON handles concurrent commands. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,59 +1,77 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# overnight_selfplay.sh — Run 3-tier self-play overnight on dev server
|
# overnight_selfplay.sh — Run 3-tier self-play overnight, all three GPUs in parallel
|
||||||
# Cycles: Tier 1 (1hr) → Tier 2 (1hr) → Tier 3 (1hr) → repeat
|
# Each GPU runs a different tier simultaneously, then they rotate.
|
||||||
#
|
#
|
||||||
# Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 &
|
# Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 &
|
||||||
#
|
# Kill with: pkill -f overnight_selfplay ; pkill -f self_play.py
|
||||||
# Kill with: pkill -f overnight_selfplay
|
|
||||||
|
|
||||||
MODEL="${1:-mortdecai-v4}"
|
MODEL="${1:-mortdecai-v4}"
|
||||||
RCON_HOST="${2:-192.168.0.244}"
|
RCON_HOST="${2:-192.168.0.244}"
|
||||||
RCON_PORT="${3:-25578}"
|
RCON_PORT="${3:-25578}"
|
||||||
ROUNDS_PER_TIER=20 # ~20 rounds per hour at ~3min/round
|
ROUNDS_PER_TIER=20
|
||||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl"
|
||||||
|
|
||||||
# Load balance between three Ollama instances / GPUs
|
# Three GPUs
|
||||||
OLLAMA_URLS=("http://192.168.0.141:11434" "http://192.168.0.141:11435" "http://192.168.0.179:11434")
|
GPU1="http://192.168.0.141:11434" # RTX 3090 Ti
|
||||||
# 141:11434 = RTX 3090 Ti (24GB)
|
GPU2="http://192.168.0.141:11435" # RTX 2080 Ti
|
||||||
# 141:11435 = RTX 2080 Ti (11GB)
|
GPU3="http://192.168.0.179:11434" # RTX 4000
|
||||||
# 179:11434 = RTX 4000 (16GB)
|
|
||||||
URL_IDX=0
|
|
||||||
|
|
||||||
next_ollama() {
|
echo "=== Mortdecai Overnight Self-Play (3 GPU Parallel) ==="
|
||||||
OLLAMA_URL="${OLLAMA_URLS[$URL_IDX]}"
|
|
||||||
URL_IDX=$(( (URL_IDX + 1) % ${#OLLAMA_URLS[@]} ))
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "=== Mortdecai Overnight Self-Play ==="
|
|
||||||
echo "Model: $MODEL"
|
echo "Model: $MODEL"
|
||||||
echo "Ollama instances: ${OLLAMA_URLS[*]}"
|
echo "GPU1: $GPU1 (RTX 3090 Ti)"
|
||||||
|
echo "GPU2: $GPU2 (RTX 2080 Ti)"
|
||||||
|
echo "GPU3: $GPU3 (RTX 4000)"
|
||||||
echo "RCON: $RCON_HOST:$RCON_PORT"
|
echo "RCON: $RCON_HOST:$RCON_PORT"
|
||||||
echo "Rounds per tier: $ROUNDS_PER_TIER"
|
echo "Rounds per tier: $ROUNDS_PER_TIER"
|
||||||
echo "Cycle: Tier 1 (drills) → Tier 2 (self-critique) → Tier 3 (adversarial) → repeat"
|
echo "All three tiers run simultaneously, rotating GPUs each cycle"
|
||||||
echo "Started: $(date)"
|
echo "Started: $(date)"
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
|
|
||||||
|
run_tier() {
|
||||||
|
local tier=$1
|
||||||
|
local gpu=$2
|
||||||
|
local tier_names=("" "Drills" "Self-Critique" "Adversarial")
|
||||||
|
echo "[Tier $tier - ${tier_names[$tier]}] Starting on $gpu at $(date)"
|
||||||
|
python3 "$SCRIPT_DIR/self_play.py" \
|
||||||
|
--model "$MODEL" \
|
||||||
|
--ollama-url "$gpu" \
|
||||||
|
--rcon-host "$RCON_HOST" \
|
||||||
|
--rcon-port "$RCON_PORT" \
|
||||||
|
--tier "$tier" \
|
||||||
|
--rounds "$ROUNDS_PER_TIER" \
|
||||||
|
--output "$OUTPUT" 2>&1 | while read line; do
|
||||||
|
echo "[Tier $tier] $line"
|
||||||
|
done
|
||||||
|
echo "[Tier $tier - ${tier_names[$tier]}] Done at $(date)"
|
||||||
|
}
|
||||||
|
|
||||||
CYCLE=0
|
CYCLE=0
|
||||||
while true; do
|
while true; do
|
||||||
CYCLE=$((CYCLE + 1))
|
CYCLE=$((CYCLE + 1))
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== CYCLE $CYCLE — $(date) ==="
|
echo "=== CYCLE $CYCLE — $(date) ==="
|
||||||
|
echo "Examples so far: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"
|
||||||
|
|
||||||
for TIER in 1 2 3; do
|
# Rotate GPU assignments each cycle
|
||||||
next_ollama
|
case $((CYCLE % 3)) in
|
||||||
TIER_NAMES=("" "Command Drills" "Self-Critique" "Adversarial")
|
0) T1_GPU=$GPU1; T2_GPU=$GPU2; T3_GPU=$GPU3 ;;
|
||||||
echo "--- Tier $TIER: ${TIER_NAMES[$TIER]} (using $OLLAMA_URL) ---"
|
1) T1_GPU=$GPU2; T2_GPU=$GPU3; T3_GPU=$GPU1 ;;
|
||||||
python3 "$SCRIPT_DIR/self_play.py" \
|
2) T1_GPU=$GPU3; T2_GPU=$GPU1; T3_GPU=$GPU2 ;;
|
||||||
--model "$MODEL" \
|
esac
|
||||||
--ollama-url "$OLLAMA_URL" \
|
|
||||||
--rcon-host "$RCON_HOST" \
|
# Run all three tiers in parallel
|
||||||
--rcon-port "$RCON_PORT" \
|
run_tier 1 "$T1_GPU" &
|
||||||
--tier "$TIER" \
|
PID1=$!
|
||||||
--rounds "$ROUNDS_PER_TIER" \
|
run_tier 2 "$T2_GPU" &
|
||||||
--output "$SCRIPT_DIR/../../data/processed/self_play.jsonl"
|
PID2=$!
|
||||||
echo "Tier $TIER done: $(date)"
|
run_tier 3 "$T3_GPU" &
|
||||||
done
|
PID3=$!
|
||||||
|
|
||||||
|
# Wait for all three to finish
|
||||||
|
wait $PID1 $PID2 $PID3
|
||||||
|
|
||||||
echo "=== CYCLE $CYCLE COMPLETE — $(date) ==="
|
echo "=== CYCLE $CYCLE COMPLETE — $(date) ==="
|
||||||
echo "Self-play data: $(wc -l < "$SCRIPT_DIR/../../data/processed/self_play.jsonl" 2>/dev/null || echo 0) examples"
|
echo "Total examples: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"
|
||||||
|
echo ""
|
||||||
done
|
done
|
||||||
|
|||||||
Reference in New Issue
Block a user