Mortdecai/data/scheduler/jobs.json

[
  {
    "id": "dd43e5ea",
    "preset_id": "a78c48c1",
    "preset_name": "Everyone Test",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "drills,self_critique,adversarial",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578"
    },
    "gpus": [
      "3090ti",
      "2080ti",
      "rtx4000"
    ],
    "status": "failed",
    "created_at": "2026-03-21T01:19:09Z",
    "started_at": "2026-03-21T01:19:09Z",
    "error": "'str' object cannot be interpreted as an integer",
    "finished_at": "2026-03-21T01:19:09Z"
  },
  {
    "id": "bc8112d9",
    "preset_id": "a78c48c1",
    "preset_name": "Everyone Test",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "drills,self_critique,adversarial",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578"
    },
    "gpus": [
      "3090ti",
      "2080ti",
      "rtx4000"
    ],
    "status": "failed",
    "created_at": "2026-03-21T01:21:03Z",
    "started_at": "2026-03-21T01:21:03Z",
    "error": "'str' object cannot be interpreted as an integer",
    "finished_at": "2026-03-21T01:21:04Z"
  },
  {
    "id": "be1265be",
    "preset_id": "a78c48c1",
    "preset_name": "Everyone Test",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "drills,self_critique,adversarial",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578"
    },
    "gpus": [
      "3090ti",
      "2080ti",
      "rtx4000"
    ],
    "status": "cancelled",
    "created_at": "2026-03-21T01:23:34Z",
    "started_at": "2026-03-21T01:23:34Z"
  },
  {
    "id": "2b895dcf",
    "preset_id": "a78c48c1",
    "preset_name": "Everyone Test",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "1,2,3",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578",
      "rcon_pass": "REDACTED_RCON"
    },
    "gpus": [
      "3090ti",
      "2080ti",
      "rtx4000"
    ],
    "status": "cancelled",
    "created_at": "2026-03-21T01:28:31Z",
    "started_at": "2026-03-21T01:28:31Z",
    "gpu_assignments": {
      "3090ti": [
        "1"
      ],
      "2080ti": [
        "2"
      ],
      "rtx4000": [
        "3"
      ]
    }
  },
  {
    "id": "db75e2ba",
    "preset_id": "06356764",
    "preset_name": "Infer during training",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "1,2,3",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578",
      "rcon_pass": "REDACTED_RCON"
    },
    "gpus": [
      "rtx4000"
    ],
    "status": "cancelled",
    "created_at": "2026-03-21T01:38:09Z",
    "started_at": "2026-03-21T01:38:09Z",
    "gpu_assignments": {
      "rtx4000": [
        "1",
        "2",
        "3"
      ]
    }
  },
  {
    "id": "d1581da3",
    "preset_id": "9cc95c0a",
    "preset_name": "Train",
    "pipeline": "training",
    "params": {
      "base_model": "Qwen/Qwen3.5-9B",
      "dataset": "auto",
      "output_name": "mortdecai-0.5.0",
      "epochs": "1",
      "lr": "0.0001",
      "batch_size": "2",
      "grad_accum": "4",
      "max_seq_len": "2048",
      "save_steps": "50"
    },
    "gpus": [
      "3090ti",
      "2080ti"
    ],
    "status": "failed",
    "created_at": "2026-03-21T01:38:13Z",
    "started_at": "2026-03-21T01:38:13Z",
    "log_path": "training/train_run_mortdecai-0.5.0.log",
    "progress": {
      "active": false,
      "loss_history": [
        0.1309,
        0.07891,
        0.03225,
        0.03791,
        0.07594,
        0.07748,
        0.05243,
        0.0536,
        0.05368,
        0.05622,
        0.04548,
        0.07975,
        0.04655,
        0.01792,
        0.08467,
        0.0151,
        0.05061,
        0.04185,
        0.04518,
        0.03152,
        0.084,
        0.06383,
        0.04852,
        0.0456,
        0.05045,
        0.05591,
        0.06717,
        0.05597,
        0.04513,
        0.04979,
        0.02702,
        0.04608,
        0.04292,
        0.04888,
        0.09399,
        0.03988,
        0.02565,
        0.05894,
        0.03941,
        0.04952,
        0.0767,
        0.0494,
        0.1099,
        0.03652,
        0.05015,
        0.07898,
        0.05064,
        0.03833,
        0.04133,
        0.03163,
        0.09881,
        0.05912,
        0.05795,
        0.02599,
        0.09814,
        0.04749,
        0.0284,
        0.06074,
        0.04718,
        0.03789,
        0.08998,
        0.04451,
        0.05937,
        0.04544,
        0.06173,
        0.04686,
        0.05936,
        0.0311,
        0.03927,
        0.08231,
        0.02436,
        0.05194,
        0.04414,
        0.03787,
        0.0383,
        0.0408,
        0.04119,
        0.03175,
        0.08285,
        0.05705,
        0.02964,
        0.0409,
        0.03605,
        0.04664,
        0.04889,
        0.03085,
        0.05376,
        0.0594,
        0.0357,
        0.0965,
        0.04077,
        0.07085,
        0.0476,
        0.04919,
        0.03484,
        0.02473,
        0.07078,
        0.08155,
        0.05989,
        0.06994,
        0.07064
      ],
      "pct": 47,
      "current_step": 250,
      "total_steps": 535,
      "eta": "2:27:31",
      "elapsed": "2:09:24",
      "error": "OOM",
      "latest_loss": 0.07064,
      "learning_rate": "6.464e-05"
    },
    "error": "OOM",
    "finished_at": "2026-03-21T01:38:49Z"
  },
  {
    "id": "8e7909c4",
    "preset_id": "06356764",
    "preset_name": "Infer during training",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "1,2,3",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578",
      "rcon_pass": "REDACTED_RCON"
    },
    "gpus": [
      "rtx4000"
    ],
    "status": "running",
    "created_at": "2026-03-21T01:48:09Z",
    "started_at": "2026-03-21T01:48:09Z",
    "gpu_assignments": {
      "rtx4000": [
        "1",
        "2",
        "3"
      ]
    }
  },
  {
    "id": "c72dfada",
    "preset_id": "9cc95c0a",
    "preset_name": "Train",
    "pipeline": "training",
    "params": {
      "base_model": "Qwen/Qwen3.5-9B",
      "dataset": "auto",
      "output_name": "mortdecai-0.5.0",
      "epochs": "1",
      "lr": "0.0001",
      "batch_size": "2",
      "grad_accum": "4",
      "max_seq_len": "2048",
      "save_steps": "50"
    },
    "gpus": [
      "3090ti",
      "2080ti"
    ],
    "status": "failed",
    "created_at": "2026-03-21T01:48:14Z",
    "started_at": "2026-03-21T01:48:14Z",
    "log_path": "training/train_run_mortdecai-0.5.0.log",
    "progress": {
      "active": false,
      "loss_history": [
        0.1309,
        0.07891,
        0.03225,
        0.03791,
        0.07594,
        0.07748,
        0.05243,
        0.0536,
        0.05368,
        0.05622,
        0.04548,
        0.07975,
        0.04655,
        0.01792,
        0.08467,
        0.0151,
        0.05061,
        0.04185,
        0.04518,
        0.03152,
        0.084,
        0.06383,
        0.04852,
        0.0456,
        0.05045,
        0.05591,
        0.06717,
        0.05597,
        0.04513,
        0.04979,
        0.02702,
        0.04608,
        0.04292,
        0.04888,
        0.09399,
        0.03988,
        0.02565,
        0.05894,
        0.03941,
        0.04952,
        0.0767,
        0.0494,
        0.1099,
        0.03652,
        0.05015,
        0.07898,
        0.05064,
        0.03833,
        0.04133,
        0.03163,
        0.09881,
        0.05912,
        0.05795,
        0.02599,
        0.09814,
        0.04749,
        0.0284,
        0.06074,
        0.04718,
        0.03789,
        0.08998,
        0.04451,
        0.05937,
        0.04544,
        0.06173,
        0.04686,
        0.05936,
        0.0311,
        0.03927,
        0.08231,
        0.02436,
        0.05194,
        0.04414,
        0.03787,
        0.0383,
        0.0408,
        0.04119,
        0.03175,
        0.08285,
        0.05705,
        0.02964,
        0.0409,
        0.03605,
        0.04664,
        0.04889,
        0.03085,
        0.05376,
        0.0594,
        0.0357,
        0.0965,
        0.04077,
        0.07085,
        0.0476,
        0.04919,
        0.03484,
        0.02473,
        0.07078,
        0.08155,
        0.05989,
        0.06994,
        0.07064
      ],
      "pct": 47,
      "current_step": 250,
      "total_steps": 535,
      "eta": "2:27:31",
      "elapsed": "2:09:24",
      "error": "OOM",
      "latest_loss": 0.07064,
      "learning_rate": "6.464e-05"
    },
    "error": "OOM",
    "finished_at": "2026-03-21T01:48:50Z"
  },
  {
    "id": "28691b1d",
    "preset_id": "9cc95c0a",
    "preset_name": "Train",
    "pipeline": "training",
    "params": {
      "base_model": "Qwen/Qwen3.5-9B",
      "dataset": "auto",
      "output_name": "mortdecai-0.5.0",
      "epochs": "1",
      "lr": "0.0001",
      "batch_size": "2",
      "grad_accum": "4",
      "max_seq_len": "2048",
      "save_steps": "50"
    },
    "gpus": [
      "3090ti",
      "2080ti"
    ],
    "status": "failed",
    "created_at": "2026-03-21T01:51:47Z",
    "started_at": "2026-03-21T01:51:47Z",
    "log_path": "training/train_run_mortdecai-0.5.0.log",
    "progress": {
      "active": false,
      "loss_history": [],
      "error": "crashed"
    },
    "error": "crashed",
    "finished_at": "2026-03-21T01:52:23Z"
  },
  {
    "id": "adff373a",
    "preset_id": "9cc95c0a",
    "preset_name": "Train",
    "pipeline": "training",
    "params": {
      "base_model": "Qwen/Qwen3.5-9B",
      "dataset": "auto",
      "output_name": "mortdecai-0.5.0",
      "epochs": "1",
      "lr": "0.0001",
      "batch_size": "2",
      "grad_accum": "4",
      "max_seq_len": "2048",
      "save_steps": "50"
    },
    "gpus": [
      "3090ti",
      "2080ti"
    ],
    "status": "running",
    "created_at": "2026-03-21T02:05:09Z",
    "started_at": "2026-03-21T02:05:09Z",
    "log_path": "training/train_run_mortdecai-0.5.0.log",
    "progress": {
      "active": true,
      "loss_history": [],
      "pct": 2,
      "current_step": 9,
      "total_steps": 548,
      "eta": "3:19:36",
      "elapsed": "03:21"
    }
  },
  {
    "id": "32cc3363",
    "preset_id": "06356764",
    "preset_name": "Infer during training",
    "pipeline": "self_play",
    "params": {
      "model": "mortdecai:0.4.0",
      "tiers": "1,2,3",
      "rounds_per_tier": "50",
      "rcon_host": "192.168.0.244",
      "rcon_port": "25578",
      "rcon_pass": "REDACTED_RCON"
    },
    "gpus": [
      "rtx4000"
    ],
    "status": "running",
    "created_at": "2026-03-21T02:10:15Z",
    "started_at": "2026-03-21T02:10:15Z"
  }
]