Add opt-in model update endpoint + API key support

Gateway: POST /admin/update-model downloads new GGUF and reloads. Disabled by default — requires ALLOW_MODEL_UPDATES=true in .env. Matt controls whether remote model updates are allowed. Self-play: --api-key flag for authenticated gateway connections. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 19:39:50 -04:00
parent f470f052aa
commit 0b37d7de79
2 changed files with 43 additions and 0 deletions
@@ -4,3 +4,4 @@ GPU_TDP_WATTS=54
 SYSTEM_OVERHEAD_WATTS=30
 ELECTRICITY_RATE=0.15
 SPENDING_CAP=10.00
 ALLOW_MODEL_UPDATES=false
@@ -252,8 +252,50 @@ class GatewayHandler(BaseHTTPRequestHandler):
        length = int(self.headers.get("Content-Length", 0))
        body = json.loads(self.rfile.read(length)) if length > 0 else None
        # Model update endpoint — downloads new GGUF and reloads
        if self.path == "/admin/update-model" and body:
            self._handle_model_update(body)
            return
        self._proxy_to_ollama(self.path, body)
    def _handle_model_update(self, body):
        """Download a new GGUF from a URL and reload the model.
        Request: {"url": "https://mortdec.ai/dl/...", "name": "mortdecai-v5"}
        This is opt-in — the gateway operator must enable ALLOW_MODEL_UPDATES=true.
        """
        if os.environ.get("ALLOW_MODEL_UPDATES", "false").lower() != "true":
            self._send_json(403, {"error": "Model updates disabled. Set ALLOW_MODEL_UPDATES=true in .env to enable."})
            return
        url = body.get("url")
        name = body.get("name", "mortdecai-latest")
        if not url:
            self._send_json(400, {"error": "url is required"})
            return
        try:
            import subprocess
            # Download GGUF
            gguf_path = f"/models/{name}.gguf"
            print(f"Downloading model from {url}...")
            r = requests.get(url, stream=True, timeout=600)
            r.raise_for_status()
            with open(f"models/{name}.gguf", "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
            # Create Modelfile and load
            subprocess.run(
                ["docker", "exec", "mortdecai-ollama", "ollama", "create", name, "-f", f"/models/Modelfile"],
                timeout=120, check=True
            )
            self._send_json(200, {"status": "ok", "model": name, "message": "Model updated and loaded"})
        except Exception as e:
            self._send_json(500, {"error": f"Update failed: {e}"})
    def _serve_dashboard(self):
        """Simple HTML dashboard showing usage stats."""
        with _stats_lock: