Add LoRA training scripts and fix bake-off token budget
- training/scripts/train_lora.py: Unsloth QLoRA trainer for qwen3:8b - training/scripts/train_lora.sh: Launch script for steel141 RTX 3090 Ti - eval/bakeoff.py: Fixed token budget (400->1500) that caused qwen3 models to exhaust tokens on thinking, added --no-think flag - agent/serve.py: Default model changed to gemma3n:e4b Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+31
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
# LoRA fine-tuning launcher for qwen3:8b on steel141
|
||||
#
|
||||
# Prerequisites (already installed):
|
||||
# conda activate mc-train (or use full path below)
|
||||
# Python 3.11, PyTorch 2.10+cu124, Unsloth 2026.3.5
|
||||
#
|
||||
# Usage:
|
||||
# ssh steel141
|
||||
# cd ~/mc-ai-training
|
||||
# bash train_lora.sh
|
||||
#
|
||||
# Or from CT 629:
|
||||
# ssh steel141 "cd ~/mc-ai-training && bash train_lora.sh"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PYTHON=~/miniconda3/envs/mc-train/bin/python
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
# Use RTX 3090 Ti (GPU 0 in PyTorch ordering)
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
echo "=== Minecraft AI LoRA Training ==="
|
||||
echo "Python: $($PYTHON --version)"
|
||||
echo "GPU: RTX 3090 Ti (24GB)"
|
||||
echo "Base model: Qwen/Qwen3-8B"
|
||||
echo "Dataset: seed_dataset.jsonl"
|
||||
echo ""
|
||||
|
||||
exec $PYTHON "$SCRIPT_DIR/train_lora.py" "$@"
|
||||
Reference in New Issue
Block a user