1,542 seed + 1,159 tool-calling examples, async processing, validator tracking

New knowledge baked in: - Enchantments (60): all 1.21 enchants, mutual exclusions, max levels, component syntax - WorldEdit (45): //set, //replace, //sphere, //stack, selection, brushes - Paper server (55): gamerules, permissions, plugins, scoreboard, moderation - Cosmetics/XP (42): title, tellraw, playsound, particle, xp, effect mechanics - Quantity boundaries (32): item tier caps, greedy→stingy, humble→generous Training infrastructure: - train_lora.py updated for multi-turn tool conversations + seed data - Async prayer/sudo processing (ThreadPoolExecutor, 3 workers) - Validator hit-rate tracking to /var/log/mc_validator_stats.json Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 19:03:30 -04:00
parent ee764cd22a
commit 750cf15c79
6 changed files with 469 additions and 11 deletions
@@ -49,7 +49,7 @@ def get_system_prompt(mode: str) -> str:
        return "You are a Minecraft 1.21 command translator. Return JSON: {\"commands\": [...], \"reasoning\": \"...\"}"


-def load_dataset(path: str) -> list:
+def load_seed_dataset(path: str) -> list:
    """Load seed dataset and format for SFT training with system prompts and mode awareness."""
    examples = []
    with open(path) as f:
@@ -101,6 +101,45 @@ def load_dataset(path: str) -> list:
    return examples


+def load_tool_dataset(path: str) -> list:
+    """Load multi-turn tool-calling training data.
+
+    These examples are already in Qwen3 chat format with tool_call tags.
+    They contain multi-turn conversations: user → assistant tool_call → tool result → ... → final response.
+    We pass them through as pre-formatted text (not as conversations for the chat template).
+    """
+    examples = []
+    with open(path) as f:
+        for line in f:
+            if not line.strip():
+                continue
+            ex = json.loads(line)
+
+            # Tool training data has a 'messages' field with multi-turn conversations
+            if "messages" in ex:
+                examples.append({"conversations": ex["messages"]})
+            # Or pre-formatted qwen3_text
+            elif "qwen3_text" in ex:
+                examples.append({"text": ex["qwen3_text"]})
+
+    return examples
+
+
+def load_dataset(seed_path: str, tool_path: str = None) -> list:
+    """Load and merge all training datasets."""
+    examples = load_seed_dataset(seed_path)
+    print(f"  Seed examples:  {len(examples)}")
+
+    if tool_path and os.path.exists(tool_path):
+        tool_examples = load_tool_dataset(tool_path)
+        print(f"  Tool examples:  {len(tool_examples)}")
+        examples.extend(tool_examples)
+    else:
+        print(f"  Tool examples:  0 (no file)")
+
+    return examples
+
+
 def main():
    parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI")
    parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace")
@@ -122,6 +161,7 @@ def main():

    if not args.dataset:
        args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl")
+    tool_dataset = str(project_root / "data" / "processed" / "tool_training.jsonl")
    if not args.output:
        args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora")

@@ -135,10 +175,10 @@ def main():
    print(f"Max seq len: {args.max_seq_len}")
    print()

-    # Load dataset
-    print("Loading dataset...")
-    train_data = load_dataset(args.dataset)
-    print(f"  {len(train_data)} training examples loaded")
+    # Load dataset (seed + tool-calling)
+    print("Loading datasets...")
+    train_data = load_dataset(args.dataset, tool_dataset)
+    print(f"  Total: {len(train_data)} training examples")

    if args.dry_run:
        print("\n[DRY RUN] Would load model and train. Exiting.")
@@ -177,13 +217,27 @@ def main():
    dataset = Dataset.from_list(train_data)

    def formatting_func(examples):
-        """Format conversations for the chat template."""
+        """Format conversations for the chat template. Handles both:
+        - 'conversations': list of role/content dicts → apply chat template
+        - 'text': pre-formatted Qwen3 text (tool-calling examples) → pass through
+        """
        texts = []
-        for convos in examples["conversations"]:
-            text = tokenizer.apply_chat_template(
-                convos, tokenize=False, add_generation_prompt=False
-            )
-            texts.append(text)
+        convos_list = examples.get("conversations", [])
+        text_list = examples.get("text", [])
+        for i in range(len(convos_list)):
+            convos = convos_list[i]
+            pre_text = text_list[i] if i < len(text_list) else None
+            if pre_text:
+                # Pre-formatted tool-calling example
+                texts.append(pre_text)
+            elif convos:
+                # Standard conversation → apply chat template
+                text = tokenizer.apply_chat_template(
+                    convos, tokenize=False, add_generation_prompt=False
+                )
+                texts.append(text)
+            else:
+                texts.append("")
        return {"text": texts}

    dataset = dataset.map(formatting_func, batched=True)