da8f557219
GPU Scheduler (gpu.sethpc.xyz): - Live dashboard with 4 GPUs, training monitor, loss sparklines - Preset-based job scheduler with 3 triggers (time, finish_training, cost) - Model selection per GPU, pipeline configuration - Tool self-play and training pipeline types - Behind Google OAuth, live-refresh without page reload Tool Architecture (14 tools): - 3 new tools: world.nearby_entities, memory.read, memory.write - 7 script.* tools: write, validate, execute, read, list, delete, schedule - ScriptManager: full mcfunction datapack CRUD with RCON validation - Training data: 1,430 tool examples (up from 1,159) Plugin Deployment (paper-ai-25567): - WorldGuard 7.0.12, CoreProtect CE 23.1, EssentialsX 2.21.2, Vault 1.7.3 - Fresh greenfield world reset - 104 RCON-validated plugin training examples Event Dispatcher: - Watches server log for deaths, joins, advancements, PvP kills - Configurable trigger probability and cooldowns per event type - Deployed to dev server, fires god_system prompts on events - 21 event-response training examples Training Infrastructure: - train_lora.py: --save-steps 50, --resume from checkpoint - run_training.sh: stops Ollama, activates conda, restarts after - Passwordless sudo for ollama services on steel141 - Dev server added to MCSManager with autoStart Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
524 lines
11 KiB
JSON
524 lines
11 KiB
JSON
[
|
|
{
|
|
"id": "dd43e5ea",
|
|
"preset_id": "a78c48c1",
|
|
"preset_name": "Everyone Test",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "drills,self_critique,adversarial",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti",
|
|
"rtx4000"
|
|
],
|
|
"status": "failed",
|
|
"created_at": "2026-03-21T01:19:09Z",
|
|
"started_at": "2026-03-21T01:19:09Z",
|
|
"error": "'str' object cannot be interpreted as an integer",
|
|
"finished_at": "2026-03-21T01:19:09Z"
|
|
},
|
|
{
|
|
"id": "bc8112d9",
|
|
"preset_id": "a78c48c1",
|
|
"preset_name": "Everyone Test",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "drills,self_critique,adversarial",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti",
|
|
"rtx4000"
|
|
],
|
|
"status": "failed",
|
|
"created_at": "2026-03-21T01:21:03Z",
|
|
"started_at": "2026-03-21T01:21:03Z",
|
|
"error": "'str' object cannot be interpreted as an integer",
|
|
"finished_at": "2026-03-21T01:21:04Z"
|
|
},
|
|
{
|
|
"id": "be1265be",
|
|
"preset_id": "a78c48c1",
|
|
"preset_name": "Everyone Test",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "drills,self_critique,adversarial",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti",
|
|
"rtx4000"
|
|
],
|
|
"status": "cancelled",
|
|
"created_at": "2026-03-21T01:23:34Z",
|
|
"started_at": "2026-03-21T01:23:34Z"
|
|
},
|
|
{
|
|
"id": "2b895dcf",
|
|
"preset_id": "a78c48c1",
|
|
"preset_name": "Everyone Test",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "1,2,3",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578",
|
|
"rcon_pass": "REDACTED_RCON"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti",
|
|
"rtx4000"
|
|
],
|
|
"status": "cancelled",
|
|
"created_at": "2026-03-21T01:28:31Z",
|
|
"started_at": "2026-03-21T01:28:31Z",
|
|
"gpu_assignments": {
|
|
"3090ti": [
|
|
"1"
|
|
],
|
|
"2080ti": [
|
|
"2"
|
|
],
|
|
"rtx4000": [
|
|
"3"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"id": "db75e2ba",
|
|
"preset_id": "06356764",
|
|
"preset_name": "Infer during training",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "1,2,3",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578",
|
|
"rcon_pass": "REDACTED_RCON"
|
|
},
|
|
"gpus": [
|
|
"rtx4000"
|
|
],
|
|
"status": "cancelled",
|
|
"created_at": "2026-03-21T01:38:09Z",
|
|
"started_at": "2026-03-21T01:38:09Z",
|
|
"gpu_assignments": {
|
|
"rtx4000": [
|
|
"1",
|
|
"2",
|
|
"3"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"id": "d1581da3",
|
|
"preset_id": "9cc95c0a",
|
|
"preset_name": "Train",
|
|
"pipeline": "training",
|
|
"params": {
|
|
"base_model": "Qwen/Qwen3.5-9B",
|
|
"dataset": "auto",
|
|
"output_name": "mortdecai-0.5.0",
|
|
"epochs": "1",
|
|
"lr": "0.0001",
|
|
"batch_size": "2",
|
|
"grad_accum": "4",
|
|
"max_seq_len": "2048",
|
|
"save_steps": "50"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti"
|
|
],
|
|
"status": "failed",
|
|
"created_at": "2026-03-21T01:38:13Z",
|
|
"started_at": "2026-03-21T01:38:13Z",
|
|
"log_path": "training/train_run_mortdecai-0.5.0.log",
|
|
"progress": {
|
|
"active": false,
|
|
"loss_history": [
|
|
0.1309,
|
|
0.07891,
|
|
0.03225,
|
|
0.03791,
|
|
0.07594,
|
|
0.07748,
|
|
0.05243,
|
|
0.0536,
|
|
0.05368,
|
|
0.05622,
|
|
0.04548,
|
|
0.07975,
|
|
0.04655,
|
|
0.01792,
|
|
0.08467,
|
|
0.0151,
|
|
0.05061,
|
|
0.04185,
|
|
0.04518,
|
|
0.03152,
|
|
0.084,
|
|
0.06383,
|
|
0.04852,
|
|
0.0456,
|
|
0.05045,
|
|
0.05591,
|
|
0.06717,
|
|
0.05597,
|
|
0.04513,
|
|
0.04979,
|
|
0.02702,
|
|
0.04608,
|
|
0.04292,
|
|
0.04888,
|
|
0.09399,
|
|
0.03988,
|
|
0.02565,
|
|
0.05894,
|
|
0.03941,
|
|
0.04952,
|
|
0.0767,
|
|
0.0494,
|
|
0.1099,
|
|
0.03652,
|
|
0.05015,
|
|
0.07898,
|
|
0.05064,
|
|
0.03833,
|
|
0.04133,
|
|
0.03163,
|
|
0.09881,
|
|
0.05912,
|
|
0.05795,
|
|
0.02599,
|
|
0.09814,
|
|
0.04749,
|
|
0.0284,
|
|
0.06074,
|
|
0.04718,
|
|
0.03789,
|
|
0.08998,
|
|
0.04451,
|
|
0.05937,
|
|
0.04544,
|
|
0.06173,
|
|
0.04686,
|
|
0.05936,
|
|
0.0311,
|
|
0.03927,
|
|
0.08231,
|
|
0.02436,
|
|
0.05194,
|
|
0.04414,
|
|
0.03787,
|
|
0.0383,
|
|
0.0408,
|
|
0.04119,
|
|
0.03175,
|
|
0.08285,
|
|
0.05705,
|
|
0.02964,
|
|
0.0409,
|
|
0.03605,
|
|
0.04664,
|
|
0.04889,
|
|
0.03085,
|
|
0.05376,
|
|
0.0594,
|
|
0.0357,
|
|
0.0965,
|
|
0.04077,
|
|
0.07085,
|
|
0.0476,
|
|
0.04919,
|
|
0.03484,
|
|
0.02473,
|
|
0.07078,
|
|
0.08155,
|
|
0.05989,
|
|
0.06994,
|
|
0.07064
|
|
],
|
|
"pct": 47,
|
|
"current_step": 250,
|
|
"total_steps": 535,
|
|
"eta": "2:27:31",
|
|
"elapsed": "2:09:24",
|
|
"error": "OOM",
|
|
"latest_loss": 0.07064,
|
|
"learning_rate": "6.464e-05"
|
|
},
|
|
"error": "OOM",
|
|
"finished_at": "2026-03-21T01:38:49Z"
|
|
},
|
|
{
|
|
"id": "8e7909c4",
|
|
"preset_id": "06356764",
|
|
"preset_name": "Infer during training",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "1,2,3",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578",
|
|
"rcon_pass": "REDACTED_RCON"
|
|
},
|
|
"gpus": [
|
|
"rtx4000"
|
|
],
|
|
"status": "running",
|
|
"created_at": "2026-03-21T01:48:09Z",
|
|
"started_at": "2026-03-21T01:48:09Z",
|
|
"gpu_assignments": {
|
|
"rtx4000": [
|
|
"1",
|
|
"2",
|
|
"3"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"id": "c72dfada",
|
|
"preset_id": "9cc95c0a",
|
|
"preset_name": "Train",
|
|
"pipeline": "training",
|
|
"params": {
|
|
"base_model": "Qwen/Qwen3.5-9B",
|
|
"dataset": "auto",
|
|
"output_name": "mortdecai-0.5.0",
|
|
"epochs": "1",
|
|
"lr": "0.0001",
|
|
"batch_size": "2",
|
|
"grad_accum": "4",
|
|
"max_seq_len": "2048",
|
|
"save_steps": "50"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti"
|
|
],
|
|
"status": "failed",
|
|
"created_at": "2026-03-21T01:48:14Z",
|
|
"started_at": "2026-03-21T01:48:14Z",
|
|
"log_path": "training/train_run_mortdecai-0.5.0.log",
|
|
"progress": {
|
|
"active": false,
|
|
"loss_history": [
|
|
0.1309,
|
|
0.07891,
|
|
0.03225,
|
|
0.03791,
|
|
0.07594,
|
|
0.07748,
|
|
0.05243,
|
|
0.0536,
|
|
0.05368,
|
|
0.05622,
|
|
0.04548,
|
|
0.07975,
|
|
0.04655,
|
|
0.01792,
|
|
0.08467,
|
|
0.0151,
|
|
0.05061,
|
|
0.04185,
|
|
0.04518,
|
|
0.03152,
|
|
0.084,
|
|
0.06383,
|
|
0.04852,
|
|
0.0456,
|
|
0.05045,
|
|
0.05591,
|
|
0.06717,
|
|
0.05597,
|
|
0.04513,
|
|
0.04979,
|
|
0.02702,
|
|
0.04608,
|
|
0.04292,
|
|
0.04888,
|
|
0.09399,
|
|
0.03988,
|
|
0.02565,
|
|
0.05894,
|
|
0.03941,
|
|
0.04952,
|
|
0.0767,
|
|
0.0494,
|
|
0.1099,
|
|
0.03652,
|
|
0.05015,
|
|
0.07898,
|
|
0.05064,
|
|
0.03833,
|
|
0.04133,
|
|
0.03163,
|
|
0.09881,
|
|
0.05912,
|
|
0.05795,
|
|
0.02599,
|
|
0.09814,
|
|
0.04749,
|
|
0.0284,
|
|
0.06074,
|
|
0.04718,
|
|
0.03789,
|
|
0.08998,
|
|
0.04451,
|
|
0.05937,
|
|
0.04544,
|
|
0.06173,
|
|
0.04686,
|
|
0.05936,
|
|
0.0311,
|
|
0.03927,
|
|
0.08231,
|
|
0.02436,
|
|
0.05194,
|
|
0.04414,
|
|
0.03787,
|
|
0.0383,
|
|
0.0408,
|
|
0.04119,
|
|
0.03175,
|
|
0.08285,
|
|
0.05705,
|
|
0.02964,
|
|
0.0409,
|
|
0.03605,
|
|
0.04664,
|
|
0.04889,
|
|
0.03085,
|
|
0.05376,
|
|
0.0594,
|
|
0.0357,
|
|
0.0965,
|
|
0.04077,
|
|
0.07085,
|
|
0.0476,
|
|
0.04919,
|
|
0.03484,
|
|
0.02473,
|
|
0.07078,
|
|
0.08155,
|
|
0.05989,
|
|
0.06994,
|
|
0.07064
|
|
],
|
|
"pct": 47,
|
|
"current_step": 250,
|
|
"total_steps": 535,
|
|
"eta": "2:27:31",
|
|
"elapsed": "2:09:24",
|
|
"error": "OOM",
|
|
"latest_loss": 0.07064,
|
|
"learning_rate": "6.464e-05"
|
|
},
|
|
"error": "OOM",
|
|
"finished_at": "2026-03-21T01:48:50Z"
|
|
},
|
|
{
|
|
"id": "28691b1d",
|
|
"preset_id": "9cc95c0a",
|
|
"preset_name": "Train",
|
|
"pipeline": "training",
|
|
"params": {
|
|
"base_model": "Qwen/Qwen3.5-9B",
|
|
"dataset": "auto",
|
|
"output_name": "mortdecai-0.5.0",
|
|
"epochs": "1",
|
|
"lr": "0.0001",
|
|
"batch_size": "2",
|
|
"grad_accum": "4",
|
|
"max_seq_len": "2048",
|
|
"save_steps": "50"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti"
|
|
],
|
|
"status": "failed",
|
|
"created_at": "2026-03-21T01:51:47Z",
|
|
"started_at": "2026-03-21T01:51:47Z",
|
|
"log_path": "training/train_run_mortdecai-0.5.0.log",
|
|
"progress": {
|
|
"active": false,
|
|
"loss_history": [],
|
|
"error": "crashed"
|
|
},
|
|
"error": "crashed",
|
|
"finished_at": "2026-03-21T01:52:23Z"
|
|
},
|
|
{
|
|
"id": "adff373a",
|
|
"preset_id": "9cc95c0a",
|
|
"preset_name": "Train",
|
|
"pipeline": "training",
|
|
"params": {
|
|
"base_model": "Qwen/Qwen3.5-9B",
|
|
"dataset": "auto",
|
|
"output_name": "mortdecai-0.5.0",
|
|
"epochs": "1",
|
|
"lr": "0.0001",
|
|
"batch_size": "2",
|
|
"grad_accum": "4",
|
|
"max_seq_len": "2048",
|
|
"save_steps": "50"
|
|
},
|
|
"gpus": [
|
|
"3090ti",
|
|
"2080ti"
|
|
],
|
|
"status": "running",
|
|
"created_at": "2026-03-21T02:05:09Z",
|
|
"started_at": "2026-03-21T02:05:09Z",
|
|
"log_path": "training/train_run_mortdecai-0.5.0.log",
|
|
"progress": {
|
|
"active": true,
|
|
"loss_history": [],
|
|
"pct": 2,
|
|
"current_step": 9,
|
|
"total_steps": 548,
|
|
"eta": "3:19:36",
|
|
"elapsed": "03:21"
|
|
}
|
|
},
|
|
{
|
|
"id": "32cc3363",
|
|
"preset_id": "06356764",
|
|
"preset_name": "Infer during training",
|
|
"pipeline": "self_play",
|
|
"params": {
|
|
"model": "mortdecai:0.4.0",
|
|
"tiers": "1,2,3",
|
|
"rounds_per_tier": "50",
|
|
"rcon_host": "192.168.0.244",
|
|
"rcon_port": "25578",
|
|
"rcon_pass": "REDACTED_RCON"
|
|
},
|
|
"gpus": [
|
|
"rtx4000"
|
|
],
|
|
"status": "running",
|
|
"created_at": "2026-03-21T02:10:15Z",
|
|
"started_at": "2026-03-21T02:10:15Z"
|
|
}
|
|
] |