docs: scrub PII/IPs from gpu-bakeoff

- Rename host alias matt-strix -> strix-halo (removes third-party name) - Move host URLs to env-var lookup (OLLAMA_*_URL), drop hardcoded IPs from harness source. Defaults: steel141 keeps localhost; pve197 and strix-halo require their env var to be set before use. - Update doc: remove the Tailscale IP and LAN-IP references, describe access paths without specific addresses. - Rename runs/matt-strix -> runs/strix-halo and patch the host field in each JSON. Harness still functional for the original author (set the env vars) and safe to share without leaking routable addresses. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 05:50:52 -04:00
parent 22af59756f
commit 91842f30cb
9 changed files with 43 additions and 21 deletions
@@ -0,0 +1,81 @@
+{
+  "host": "strix-halo",
+  "gpu": "AMD Strix Halo iGPU",
+  "vram_gb": null,
+  "model_alias": "gemma4:26b",
+  "model_tag": "gemma4:26b",
+  "prompt_key": "long",
+  "prompt_chars": 1614,
+  "num_predict": 256,
+  "num_ctx": 4096,
+  "runs": [
+    {
+      "prompt_tokens": 319,
+      "prompt_eval_ms": 22.5,
+      "prefill_tok_per_s": 14201.36,
+      "output_tokens": 256,
+      "eval_ms": 4883.4,
+      "decode_tok_per_s": 52.42,
+      "load_ms": 151.1,
+      "total_ms": 5120.3,
+      "harness_wall_s": 5.186,
+      "done_reason": "length"
+    },
+    {
+      "prompt_tokens": 319,
+      "prompt_eval_ms": 22.1,
+      "prefill_tok_per_s": 14448.45,
+      "output_tokens": 256,
+      "eval_ms": 4881.1,
+      "decode_tok_per_s": 52.45,
+      "load_ms": 159.1,
+      "total_ms": 5124.5,
+      "harness_wall_s": 5.18,
+      "done_reason": "length"
+    },
+    {
+      "prompt_tokens": 319,
+      "prompt_eval_ms": 22.3,
+      "prefill_tok_per_s": 14326.07,
+      "output_tokens": 256,
+      "eval_ms": 4885.3,
+      "decode_tok_per_s": 52.4,
+      "load_ms": 155.4,
+      "total_ms": 5128.9,
+      "harness_wall_s": 5.192,
+      "done_reason": "length"
+    }
+  ],
+  "warmup": {
+    "prompt_tokens": 319,
+    "prompt_eval_ms": 265.0,
+    "prefill_tok_per_s": 1203.86,
+    "output_tokens": 256,
+    "eval_ms": 4880.6,
+    "decode_tok_per_s": 52.45,
+    "load_ms": 159.8,
+    "total_ms": 5368.3,
+    "harness_wall_s": 5.429,
+    "done_reason": "length"
+  },
+  "summary": {
+    "prefill_tok_per_s": {
+      "min": 14201.36,
+      "median": 14326.07,
+      "max": 14448.45,
+      "n": 3
+    },
+    "decode_tok_per_s": {
+      "min": 52.4,
+      "median": 52.42,
+      "max": 52.45,
+      "n": 3
+    },
+    "total_ms": {
+      "min": 5120.3,
+      "median": 5124.5,
+      "max": 5128.9,
+      "n": 3
+    }
+  }
+}
@@ -0,0 +1,81 @@
+{
+  "host": "strix-halo",
+  "gpu": "AMD Strix Halo iGPU",
+  "vram_gb": null,
+  "model_alias": "gemma4:26b",
+  "model_tag": "gemma4:26b",
+  "prompt_key": "short",
+  "prompt_chars": 78,
+  "num_predict": 256,
+  "num_ctx": 4096,
+  "runs": [
+    {
+      "prompt_tokens": 28,
+      "prompt_eval_ms": 21.9,
+      "prefill_tok_per_s": 1278.99,
+      "output_tokens": 256,
+      "eval_ms": 4754.7,
+      "decode_tok_per_s": 53.84,
+      "load_ms": 172.3,
+      "total_ms": 5008.5,
+      "harness_wall_s": 5.057,
+      "done_reason": "length"
+    },
+    {
+      "prompt_tokens": 28,
+      "prompt_eval_ms": 21.9,
+      "prefill_tok_per_s": 1275.71,
+      "output_tokens": 256,
+      "eval_ms": 4755.7,
+      "decode_tok_per_s": 53.83,
+      "load_ms": 151.6,
+      "total_ms": 4988.3,
+      "harness_wall_s": 5.043,
+      "done_reason": "length"
+    },
+    {
+      "prompt_tokens": 28,
+      "prompt_eval_ms": 22.0,
+      "prefill_tok_per_s": 1271.11,
+      "output_tokens": 256,
+      "eval_ms": 4757.6,
+      "decode_tok_per_s": 53.81,
+      "load_ms": 154.4,
+      "total_ms": 4993.2,
+      "harness_wall_s": 5.048,
+      "done_reason": "length"
+    }
+  ],
+  "warmup": {
+    "prompt_tokens": 28,
+    "prompt_eval_ms": 93.1,
+    "prefill_tok_per_s": 300.9,
+    "output_tokens": 256,
+    "eval_ms": 4756.6,
+    "decode_tok_per_s": 53.82,
+    "load_ms": 2272.4,
+    "total_ms": 7250.0,
+    "harness_wall_s": 7.341,
+    "done_reason": "length"
+  },
+  "summary": {
+    "prefill_tok_per_s": {
+      "min": 1271.11,
+      "median": 1275.71,
+      "max": 1278.99,
+      "n": 3
+    },
+    "decode_tok_per_s": {
+      "min": 53.81,
+      "median": 53.83,
+      "max": 53.84,
+      "n": 3
+    },
+    "total_ms": {
+      "min": 4988.3,
+      "median": 4993.2,
+      "max": 5008.5,
+      "n": 3
+    }
+  }
+}