feat(bot): self-play harness with Casual and random baselines
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+278
-274
File diff suppressed because it is too large
Load Diff
+3
-1
@@ -12,9 +12,11 @@
|
|||||||
"test": "pnpm -r test",
|
"test": "pnpm -r test",
|
||||||
"dev:server": "pnpm --filter @blind-chess/server dev",
|
"dev:server": "pnpm --filter @blind-chess/server dev",
|
||||||
"dev:client": "pnpm --filter @blind-chess/client dev",
|
"dev:client": "pnpm --filter @blind-chess/client dev",
|
||||||
"typecheck": "pnpm -r typecheck"
|
"typecheck": "pnpm -r typecheck",
|
||||||
|
"selfplay": "tsx scripts/selfplay.ts"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"tsx": "^4.21.0",
|
||||||
"typescript": "^5.6.0",
|
"typescript": "^5.6.0",
|
||||||
"vitest": "^3.0.0"
|
"vitest": "^3.0.0"
|
||||||
}
|
}
|
||||||
|
|||||||
Generated
+3
@@ -8,6 +8,9 @@ importers:
|
|||||||
|
|
||||||
.:
|
.:
|
||||||
devDependencies:
|
devDependencies:
|
||||||
|
tsx:
|
||||||
|
specifier: ^4.21.0
|
||||||
|
version: 4.21.0
|
||||||
typescript:
|
typescript:
|
||||||
specifier: ^5.6.0
|
specifier: ^5.6.0
|
||||||
version: 5.9.3
|
version: 5.9.3
|
||||||
|
|||||||
@@ -0,0 +1,195 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
/**
|
||||||
|
* Self-play harness for the Casual bot.
|
||||||
|
*
|
||||||
|
* Runs N games in-process (no HTTP). Reports stats and optionally writes a
|
||||||
|
* transcript per game. Supports CasualBrain on either color and a
|
||||||
|
* RandomBrain baseline for measuring Casual's strength.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* pnpm selfplay --games 100 --mode vanilla
|
||||||
|
* pnpm selfplay --white casual --black random --games 100 --mode vanilla
|
||||||
|
* pnpm selfplay --white random --black casual --games 100 --mode vanilla
|
||||||
|
* pnpm selfplay --games 50 --mode blind --transcripts
|
||||||
|
* pnpm selfplay --games 10 --seed 42
|
||||||
|
*/
|
||||||
|
import { mkdirSync, writeFileSync } from 'node:fs';
|
||||||
|
import { resolve } from 'node:path';
|
||||||
|
import { CasualBrain, BotDriver } from '../packages/server/src/bot/index.js';
|
||||||
|
import type { Brain, BrainAction, BrainInitArgs, BrainInput }
|
||||||
|
from '../packages/server/src/bot/brain.js';
|
||||||
|
import { createGame } from '../packages/server/src/games.js';
|
||||||
|
|
||||||
|
interface Args {
|
||||||
|
white: 'casual' | 'random';
|
||||||
|
black: 'casual' | 'random';
|
||||||
|
games: number;
|
||||||
|
mode: 'blind' | 'vanilla';
|
||||||
|
seed: number;
|
||||||
|
transcripts: boolean;
|
||||||
|
maxPly: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(): Args {
|
||||||
|
const args: Args = {
|
||||||
|
white: 'casual', black: 'casual',
|
||||||
|
games: 10, mode: 'blind', seed: 1, transcripts: false, maxPly: 400,
|
||||||
|
};
|
||||||
|
const a = process.argv.slice(2);
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
const k = a[i]!;
|
||||||
|
const v = a[i + 1]!;
|
||||||
|
if (k === '--white') { args.white = v as 'casual' | 'random'; i++; }
|
||||||
|
else if (k === '--black') { args.black = v as 'casual' | 'random'; i++; }
|
||||||
|
else if (k === '--games') { args.games = parseInt(v, 10); i++; }
|
||||||
|
else if (k === '--mode') { args.mode = v as 'blind' | 'vanilla'; i++; }
|
||||||
|
else if (k === '--seed') { args.seed = parseInt(v, 10); i++; }
|
||||||
|
else if (k === '--max-ply') { args.maxPly = parseInt(v, 10); i++; }
|
||||||
|
else if (k === '--transcripts') { args.transcripts = true; }
|
||||||
|
else if (k === '--help' || k === '-h') {
|
||||||
|
console.log('Usage: pnpm selfplay [--white casual|random] [--black casual|random]');
|
||||||
|
console.log(' [--games N] [--mode blind|vanilla]');
|
||||||
|
console.log(' [--seed N] [--max-ply N] [--transcripts]');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
class RandomBrain implements Brain {
|
||||||
|
private rng: () => number;
|
||||||
|
constructor(seed: number) {
|
||||||
|
let a = seed >>> 0;
|
||||||
|
this.rng = () => {
|
||||||
|
a = (a + 0x6d2b79f5) >>> 0;
|
||||||
|
let t = a;
|
||||||
|
t = Math.imul(t ^ (t >>> 15), t | 1);
|
||||||
|
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
|
||||||
|
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
async init(_args: BrainInitArgs): Promise<void> {}
|
||||||
|
async decide(input: BrainInput): Promise<BrainAction> {
|
||||||
|
const cs = input.legalCandidates;
|
||||||
|
if (cs.length === 0) throw new Error('no candidates');
|
||||||
|
const i = Math.floor(this.rng() * cs.length);
|
||||||
|
const c = cs[i]!;
|
||||||
|
return { type: 'commit', from: c.from, to: c.to, promotion: c.promotion };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeBrain(kind: 'casual' | 'random', seed: number): Brain {
|
||||||
|
return kind === 'casual' ? new CasualBrain({ seed }) : new RandomBrain(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GameResult {
|
||||||
|
result: 'w' | 'b' | 'draw' | 'maxply' | 'error';
|
||||||
|
endReason: string;
|
||||||
|
ply: number;
|
||||||
|
ms: number;
|
||||||
|
transcript: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runOneGame(args: Args, gameIdx: number): Promise<GameResult> {
|
||||||
|
const startMs = Date.now();
|
||||||
|
const transcript: string[] = [];
|
||||||
|
|
||||||
|
const { game } = createGame({
|
||||||
|
mode: args.mode, creatorSide: 'w', highlightingEnabled: false,
|
||||||
|
vsAi: { brain: 'casual' },
|
||||||
|
});
|
||||||
|
// createGame already filled both slots when vsAi is set. Clear the
|
||||||
|
// aiOpponent tag (this is a self-play game, not a vs-AI game) and flip
|
||||||
|
// status to 'active' (no hello will arrive in self-play).
|
||||||
|
game.aiOpponent = undefined;
|
||||||
|
game.status = 'active';
|
||||||
|
|
||||||
|
const wBrain = makeBrain(args.white, args.seed + gameIdx * 2);
|
||||||
|
const bBrain = makeBrain(args.black, args.seed + gameIdx * 2 + 1);
|
||||||
|
const wDriver = new BotDriver({ game, brain: wBrain, color: 'w' });
|
||||||
|
const bDriver = new BotDriver({ game, brain: bBrain, color: 'b' });
|
||||||
|
await wDriver.init();
|
||||||
|
await bDriver.init();
|
||||||
|
|
||||||
|
let ply = 0;
|
||||||
|
while (game.status === 'active' && ply < args.maxPly) {
|
||||||
|
const turn = game.chess.turn() as 'w' | 'b';
|
||||||
|
const driver = turn === 'w' ? wDriver : bDriver;
|
||||||
|
try {
|
||||||
|
await driver.onStateChange();
|
||||||
|
} catch (err) {
|
||||||
|
transcript.push(`!! error at ply ${ply}: ${(err as Error).message}`);
|
||||||
|
return { result: 'error', endReason: (err as Error).message,
|
||||||
|
ply, ms: Date.now() - startMs, transcript };
|
||||||
|
}
|
||||||
|
const newPly = game.chess.history().length;
|
||||||
|
if (newPly === ply && game.status === 'active') {
|
||||||
|
// Driver didn't move and game didn't end — defensive break.
|
||||||
|
transcript.push(`!! stuck at ply ${ply} (${turn} to move)`);
|
||||||
|
return { result: 'error', endReason: 'stuck',
|
||||||
|
ply, ms: Date.now() - startMs, transcript };
|
||||||
|
}
|
||||||
|
if (newPly > ply) {
|
||||||
|
const lastSan = game.chess.history()[newPly - 1];
|
||||||
|
transcript.push(`${newPly}. ${turn === 'w' ? 'W' : 'B'}: ${lastSan}`);
|
||||||
|
}
|
||||||
|
ply = newPly;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ms = Date.now() - startMs;
|
||||||
|
if (game.status !== 'finished') {
|
||||||
|
return { result: 'maxply', endReason: 'max_ply', ply, ms, transcript };
|
||||||
|
}
|
||||||
|
const result: 'w' | 'b' | 'draw' = game.winner ?? 'draw';
|
||||||
|
return { result, endReason: game.endReason ?? 'unknown', ply, ms, transcript };
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarize(rs: GameResult[]): string {
|
||||||
|
const w = rs.filter((r) => r.result === 'w').length;
|
||||||
|
const b = rs.filter((r) => r.result === 'b').length;
|
||||||
|
const d = rs.filter((r) => r.result === 'draw').length;
|
||||||
|
const mp = rs.filter((r) => r.result === 'maxply').length;
|
||||||
|
const er = rs.filter((r) => r.result === 'error').length;
|
||||||
|
const avgPly = rs.reduce((s, r) => s + r.ply, 0) / Math.max(rs.length, 1);
|
||||||
|
const avgMs = rs.reduce((s, r) => s + r.ms, 0) / Math.max(rs.length, 1);
|
||||||
|
return `W=${w} B=${b} D=${d} MaxPly=${mp} Err=${er} avgPly=${avgPly.toFixed(0)} avgMs=${avgMs.toFixed(0)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const args = parseArgs();
|
||||||
|
console.log(`selfplay: ${args.games} game(s), mode=${args.mode}, white=${args.white}, black=${args.black}, seed=${args.seed}`);
|
||||||
|
const results: GameResult[] = [];
|
||||||
|
|
||||||
|
let outDir: string | null = null;
|
||||||
|
if (args.transcripts) {
|
||||||
|
outDir = resolve('tmp', 'selfplay-runs', String(Date.now()));
|
||||||
|
mkdirSync(outDir, { recursive: true });
|
||||||
|
console.log(`transcripts -> ${outDir}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < args.games; i++) {
|
||||||
|
const r = await runOneGame(args, i);
|
||||||
|
results.push(r);
|
||||||
|
if (outDir) {
|
||||||
|
writeFileSync(
|
||||||
|
resolve(outDir, `game-${String(i + 1).padStart(4, '0')}.txt`),
|
||||||
|
`result=${r.result} reason=${r.endReason} ply=${r.ply} ms=${r.ms}\n${r.transcript.join('\n')}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if ((i + 1) % 10 === 0 || i === args.games - 1) {
|
||||||
|
console.log(`[${i + 1}/${args.games}] ${summarize(results)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== summary ===');
|
||||||
|
console.log(summarize(results));
|
||||||
|
const reasons = new Map<string, number>();
|
||||||
|
for (const r of results) reasons.set(r.endReason, (reasons.get(r.endReason) ?? 0) + 1);
|
||||||
|
console.log('end reasons:');
|
||||||
|
for (const [k, v] of [...reasons.entries()].sort((a, b) => b[1] - a[1])) {
|
||||||
|
console.log(` ${k}: ${v}`);
|
||||||
|
}
|
||||||
|
console.log('errors: ' + results.filter((r) => r.result === 'error').length);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => { console.error(err); process.exit(1); });
|
||||||
Reference in New Issue
Block a user