1,542 seed + 1,159 tool-calling examples, async processing, validator tracking
New knowledge baked in: - Enchantments (60): all 1.21 enchants, mutual exclusions, max levels, component syntax - WorldEdit (45): //set, //replace, //sphere, //stack, selection, brushes - Paper server (55): gamerules, permissions, plugins, scoreboard, moderation - Cosmetics/XP (42): title, tellraw, playsound, particle, xp, effect mechanics - Quantity boundaries (32): item tier caps, greedy→stingy, humble→generous Training infrastructure: - train_lora.py updated for multi-turn tool conversations + seed data - Async prayer/sudo processing (ThreadPoolExecutor, 3 workers) - Validator hit-rate tracking to /var/log/mc_validator_stats.json Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -49,7 +49,7 @@ def get_system_prompt(mode: str) -> str:
|
||||
return "You are a Minecraft 1.21 command translator. Return JSON: {\"commands\": [...], \"reasoning\": \"...\"}"
|
||||
|
||||
|
||||
def load_dataset(path: str) -> list:
|
||||
def load_seed_dataset(path: str) -> list:
|
||||
"""Load seed dataset and format for SFT training with system prompts and mode awareness."""
|
||||
examples = []
|
||||
with open(path) as f:
|
||||
@@ -101,6 +101,45 @@ def load_dataset(path: str) -> list:
|
||||
return examples
|
||||
|
||||
|
||||
def load_tool_dataset(path: str) -> list:
|
||||
"""Load multi-turn tool-calling training data.
|
||||
|
||||
These examples are already in Qwen3 chat format with tool_call tags.
|
||||
They contain multi-turn conversations: user → assistant tool_call → tool result → ... → final response.
|
||||
We pass them through as pre-formatted text (not as conversations for the chat template).
|
||||
"""
|
||||
examples = []
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if not line.strip():
|
||||
continue
|
||||
ex = json.loads(line)
|
||||
|
||||
# Tool training data has a 'messages' field with multi-turn conversations
|
||||
if "messages" in ex:
|
||||
examples.append({"conversations": ex["messages"]})
|
||||
# Or pre-formatted qwen3_text
|
||||
elif "qwen3_text" in ex:
|
||||
examples.append({"text": ex["qwen3_text"]})
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
def load_dataset(seed_path: str, tool_path: str = None) -> list:
|
||||
"""Load and merge all training datasets."""
|
||||
examples = load_seed_dataset(seed_path)
|
||||
print(f" Seed examples: {len(examples)}")
|
||||
|
||||
if tool_path and os.path.exists(tool_path):
|
||||
tool_examples = load_tool_dataset(tool_path)
|
||||
print(f" Tool examples: {len(tool_examples)}")
|
||||
examples.extend(tool_examples)
|
||||
else:
|
||||
print(f" Tool examples: 0 (no file)")
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI")
|
||||
parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace")
|
||||
@@ -122,6 +161,7 @@ def main():
|
||||
|
||||
if not args.dataset:
|
||||
args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl")
|
||||
tool_dataset = str(project_root / "data" / "processed" / "tool_training.jsonl")
|
||||
if not args.output:
|
||||
args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora")
|
||||
|
||||
@@ -135,10 +175,10 @@ def main():
|
||||
print(f"Max seq len: {args.max_seq_len}")
|
||||
print()
|
||||
|
||||
# Load dataset
|
||||
print("Loading dataset...")
|
||||
train_data = load_dataset(args.dataset)
|
||||
print(f" {len(train_data)} training examples loaded")
|
||||
# Load dataset (seed + tool-calling)
|
||||
print("Loading datasets...")
|
||||
train_data = load_dataset(args.dataset, tool_dataset)
|
||||
print(f" Total: {len(train_data)} training examples")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] Would load model and train. Exiting.")
|
||||
@@ -177,13 +217,27 @@ def main():
|
||||
dataset = Dataset.from_list(train_data)
|
||||
|
||||
def formatting_func(examples):
|
||||
"""Format conversations for the chat template."""
|
||||
"""Format conversations for the chat template. Handles both:
|
||||
- 'conversations': list of role/content dicts → apply chat template
|
||||
- 'text': pre-formatted Qwen3 text (tool-calling examples) → pass through
|
||||
"""
|
||||
texts = []
|
||||
for convos in examples["conversations"]:
|
||||
text = tokenizer.apply_chat_template(
|
||||
convos, tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
texts.append(text)
|
||||
convos_list = examples.get("conversations", [])
|
||||
text_list = examples.get("text", [])
|
||||
for i in range(len(convos_list)):
|
||||
convos = convos_list[i]
|
||||
pre_text = text_list[i] if i < len(text_list) else None
|
||||
if pre_text:
|
||||
# Pre-formatted tool-calling example
|
||||
texts.append(pre_text)
|
||||
elif convos:
|
||||
# Standard conversation → apply chat template
|
||||
text = tokenizer.apply_chat_template(
|
||||
convos, tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
texts.append(text)
|
||||
else:
|
||||
texts.append("")
|
||||
return {"text": texts}
|
||||
|
||||
dataset = dataset.map(formatting_func, batched=True)
|
||||
|
||||
Reference in New Issue
Block a user