diff --git a/.env.example b/.env.example index 63f39e0..2c3096c 100644 --- a/.env.example +++ b/.env.example @@ -4,3 +4,4 @@ GPU_TDP_WATTS=54 SYSTEM_OVERHEAD_WATTS=30 ELECTRICITY_RATE=0.15 SPENDING_CAP=10.00 +ALLOW_MODEL_UPDATES=false diff --git a/gateway.py b/gateway.py index 8ab0c62..45375d9 100644 --- a/gateway.py +++ b/gateway.py @@ -252,8 +252,50 @@ class GatewayHandler(BaseHTTPRequestHandler): length = int(self.headers.get("Content-Length", 0)) body = json.loads(self.rfile.read(length)) if length > 0 else None + # Model update endpoint — downloads new GGUF and reloads + if self.path == "/admin/update-model" and body: + self._handle_model_update(body) + return + self._proxy_to_ollama(self.path, body) + def _handle_model_update(self, body): + """Download a new GGUF from a URL and reload the model. + Request: {"url": "https://mortdec.ai/dl/...", "name": "mortdecai-v5"} + This is opt-in — the gateway operator must enable ALLOW_MODEL_UPDATES=true. + """ + if os.environ.get("ALLOW_MODEL_UPDATES", "false").lower() != "true": + self._send_json(403, {"error": "Model updates disabled. Set ALLOW_MODEL_UPDATES=true in .env to enable."}) + return + + url = body.get("url") + name = body.get("name", "mortdecai-latest") + if not url: + self._send_json(400, {"error": "url is required"}) + return + + try: + import subprocess + + # Download GGUF + gguf_path = f"/models/{name}.gguf" + print(f"Downloading model from {url}...") + r = requests.get(url, stream=True, timeout=600) + r.raise_for_status() + with open(f"models/{name}.gguf", "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + # Create Modelfile and load + subprocess.run( + ["docker", "exec", "mortdecai-ollama", "ollama", "create", name, "-f", f"/models/Modelfile"], + timeout=120, check=True + ) + + self._send_json(200, {"status": "ok", "model": name, "message": "Model updated and loaded"}) + except Exception as e: + self._send_json(500, {"error": f"Update failed: {e}"}) + def _serve_dashboard(self): """Simple HTML dashboard showing usage stats.""" with _stats_lock: