docs: scrub PII/IPs from gpu-bakeoff
- Rename host alias matt-strix -> strix-halo (removes third-party name) - Move host URLs to env-var lookup (OLLAMA_*_URL), drop hardcoded IPs from harness source. Defaults: steel141 keeps localhost; pve197 and strix-halo require their env var to be set before use. - Update doc: remove the Tailscale IP and LAN-IP references, describe access paths without specific addresses. - Rename runs/matt-strix -> runs/strix-halo and patch the host field in each JSON. Harness still functional for the original author (set the env vars) and safe to share without leaking routable addresses. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@ three hosts:
|
||||
|
||||
- steel141 : RTX 3090 Ti (24 GB GDDR6X, compute 8.6, ~1008 GB/s)
|
||||
- pve197 : Tesla V100-PCIE-32GB (32 GB HBM2, compute 7.0, ~900 GB/s)
|
||||
- matt-strix: AMD Strix Halo iGPU (shared LPDDR5X, ~256 GB/s)
|
||||
- strix-halo: AMD Strix Halo iGPU (shared LPDDR5X, ~256 GB/s)
|
||||
|
||||
Per (host, model, prompt_length), runs 1 warmup + N measurement runs,
|
||||
records Ollama's canonical timing fields, and writes one JSON trace to
|
||||
@@ -15,6 +15,13 @@ All three Ollama servers are polled via HTTP; no SSH required. All
|
||||
timings come from Ollama's own /api/generate response fields so wall-
|
||||
clock jitter between the harness and the server is excluded.
|
||||
|
||||
Host URLs are resolved from environment variables so routable addresses
|
||||
don't live in source. Set these before running against non-local hosts:
|
||||
|
||||
OLLAMA_STEEL141_URL=http://127.0.0.1:11434
|
||||
OLLAMA_PVE197_URL=http://<lan-ip>:11434
|
||||
OLLAMA_STRIX_URL=http://<tailscale-ip>:11434
|
||||
|
||||
Invocation:
|
||||
python3 harness.py --host steel141 --model gemma4:26b --prompt short
|
||||
python3 harness.py all # runs the full planned matrix
|
||||
@@ -24,6 +31,7 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
@@ -31,16 +39,30 @@ from pathlib import Path
|
||||
|
||||
|
||||
HOSTS = {
|
||||
"steel141": {"url": "http://127.0.0.1:11434", "gpu": "RTX 3090 Ti", "vram_gb": 24},
|
||||
"pve197": {"url": "http://192.168.0.179:11434", "gpu": "Tesla V100-PCIE-32GB", "vram_gb": 32},
|
||||
"matt-strix": {"url": "http://100.117.155.64:11434", "gpu": "AMD Strix Halo iGPU", "vram_gb": None},
|
||||
"steel141": {"url_env": "OLLAMA_STEEL141_URL", "default_url": "http://127.0.0.1:11434",
|
||||
"gpu": "RTX 3090 Ti", "vram_gb": 24},
|
||||
"pve197": {"url_env": "OLLAMA_PVE197_URL", "default_url": None,
|
||||
"gpu": "Tesla V100-PCIE-32GB", "vram_gb": 32},
|
||||
"strix-halo": {"url_env": "OLLAMA_STRIX_URL", "default_url": None,
|
||||
"gpu": "AMD Strix Halo iGPU", "vram_gb": None},
|
||||
}
|
||||
|
||||
# Per-host model tag mapping. matt-strix uses gemma4:31b, the others
|
||||
|
||||
def _host_url(host: str) -> str:
|
||||
cfg = HOSTS[host]
|
||||
url = os.environ.get(cfg["url_env"]) or cfg["default_url"]
|
||||
if not url:
|
||||
raise RuntimeError(
|
||||
f"host {host!r} has no URL — set ${cfg['url_env']} in env"
|
||||
)
|
||||
return url
|
||||
|
||||
|
||||
# Per-host model tag mapping. strix-halo uses gemma4:31b, the others
|
||||
# use gemma4:31b-it-q4_K_M — identical weights, different tags.
|
||||
MODEL_ALIASES = {
|
||||
"gemma4:26b": {"steel141": "gemma4:26b", "pve197": "gemma4:26b", "matt-strix": "gemma4:26b"},
|
||||
"gemma4:31b": {"steel141": "gemma4:31b-it-q4_K_M", "pve197": "gemma4:31b-it-q4_K_M", "matt-strix": "gemma4:31b"},
|
||||
"gemma4:26b": {"steel141": "gemma4:26b", "pve197": "gemma4:26b", "strix-halo": "gemma4:26b"},
|
||||
"gemma4:31b": {"steel141": "gemma4:31b-it-q4_K_M", "pve197": "gemma4:31b-it-q4_K_M", "strix-halo": "gemma4:31b"},
|
||||
# V100-only edge case — only 32 GB host has headroom for the Q8 MoE.
|
||||
"gemma4:26b-q8": {"pve197": "gemma4:26b-a4b-it-q8_0"},
|
||||
}
|
||||
@@ -151,7 +173,7 @@ def run_matrix(
|
||||
return {"host": host, "model_alias": model_alias, "skipped": "model not available on host"}
|
||||
|
||||
prompt = PROMPTS[prompt_key]
|
||||
url = host_cfg["url"]
|
||||
url = _host_url(host)
|
||||
|
||||
trace = {
|
||||
"host": host,
|
||||
|
||||
Reference in New Issue
Block a user