Add LoRA training scripts and fix bake-off token budget

- training/scripts/train_lora.py: Unsloth QLoRA trainer for qwen3:8b - training/scripts/train_lora.sh: Launch script for steel141 RTX 3090 Ti - eval/bakeoff.py: Fixed token budget (400->1500) that caused qwen3 models to exhaust tokens on thinking, added --no-think flag - agent/serve.py: Default model changed to gemma3n:e4b Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 10:40:18 -04:00
parent 6fbab8045c
commit 48b627d498
4 changed files with 240 additions and 5 deletions
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# LoRA fine-tuning launcher for qwen3:8b on steel141
+#
+# Prerequisites (already installed):
+#   conda activate mc-train  (or use full path below)
+#   Python 3.11, PyTorch 2.10+cu124, Unsloth 2026.3.5
+#
+# Usage:
+#   ssh steel141
+#   cd ~/mc-ai-training
+#   bash train_lora.sh
+#
+# Or from CT 629:
+#   ssh steel141 "cd ~/mc-ai-training && bash train_lora.sh"
+
+set -euo pipefail
+
+PYTHON=~/miniconda3/envs/mc-train/bin/python
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Use RTX 3090 Ti (GPU 0 in PyTorch ordering)
+export CUDA_VISIBLE_DEVICES=0
+
+echo "=== Minecraft AI LoRA Training ==="
+echo "Python: $($PYTHON --version)"
+echo "GPU: RTX 3090 Ti (24GB)"
+echo "Base model: Qwen/Qwen3-8B"
+echo "Dataset: seed_dataset.jsonl"
+echo ""
+
+exec $PYTHON "$SCRIPT_DIR/train_lora.py" "$@"