Add opt-in model update endpoint + API key support
Gateway: POST /admin/update-model downloads new GGUF and reloads. Disabled by default — requires ALLOW_MODEL_UPDATES=true in .env. Matt controls whether remote model updates are allowed. Self-play: --api-key flag for authenticated gateway connections. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,3 +4,4 @@ GPU_TDP_WATTS=54
|
|||||||
SYSTEM_OVERHEAD_WATTS=30
|
SYSTEM_OVERHEAD_WATTS=30
|
||||||
ELECTRICITY_RATE=0.15
|
ELECTRICITY_RATE=0.15
|
||||||
SPENDING_CAP=10.00
|
SPENDING_CAP=10.00
|
||||||
|
ALLOW_MODEL_UPDATES=false
|
||||||
|
|||||||
+42
@@ -252,8 +252,50 @@ class GatewayHandler(BaseHTTPRequestHandler):
|
|||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
body = json.loads(self.rfile.read(length)) if length > 0 else None
|
body = json.loads(self.rfile.read(length)) if length > 0 else None
|
||||||
|
|
||||||
|
# Model update endpoint — downloads new GGUF and reloads
|
||||||
|
if self.path == "/admin/update-model" and body:
|
||||||
|
self._handle_model_update(body)
|
||||||
|
return
|
||||||
|
|
||||||
self._proxy_to_ollama(self.path, body)
|
self._proxy_to_ollama(self.path, body)
|
||||||
|
|
||||||
|
def _handle_model_update(self, body):
|
||||||
|
"""Download a new GGUF from a URL and reload the model.
|
||||||
|
Request: {"url": "https://mortdec.ai/dl/...", "name": "mortdecai-v5"}
|
||||||
|
This is opt-in — the gateway operator must enable ALLOW_MODEL_UPDATES=true.
|
||||||
|
"""
|
||||||
|
if os.environ.get("ALLOW_MODEL_UPDATES", "false").lower() != "true":
|
||||||
|
self._send_json(403, {"error": "Model updates disabled. Set ALLOW_MODEL_UPDATES=true in .env to enable."})
|
||||||
|
return
|
||||||
|
|
||||||
|
url = body.get("url")
|
||||||
|
name = body.get("name", "mortdecai-latest")
|
||||||
|
if not url:
|
||||||
|
self._send_json(400, {"error": "url is required"})
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# Download GGUF
|
||||||
|
gguf_path = f"/models/{name}.gguf"
|
||||||
|
print(f"Downloading model from {url}...")
|
||||||
|
r = requests.get(url, stream=True, timeout=600)
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(f"models/{name}.gguf", "wb") as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
# Create Modelfile and load
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "exec", "mortdecai-ollama", "ollama", "create", name, "-f", f"/models/Modelfile"],
|
||||||
|
timeout=120, check=True
|
||||||
|
)
|
||||||
|
|
||||||
|
self._send_json(200, {"status": "ok", "model": name, "message": "Model updated and loaded"})
|
||||||
|
except Exception as e:
|
||||||
|
self._send_json(500, {"error": f"Update failed: {e}"})
|
||||||
|
|
||||||
def _serve_dashboard(self):
|
def _serve_dashboard(self):
|
||||||
"""Simple HTML dashboard showing usage stats."""
|
"""Simple HTML dashboard showing usage stats."""
|
||||||
with _stats_lock:
|
with _stats_lock:
|
||||||
|
|||||||
Reference in New Issue
Block a user