Mortdecai Gateway — authenticated Ollama proxy with power metering

- API key auth on all inference endpoints
- Power/cost tracking: GPU TDP × inference time × electricity rate
- Spending cap enforcement
- Web dashboard with live stats
- Docker compose for AMD ROCm (Strix Halo) or NVIDIA
- Auto-setup script with GGUF loading
- Tested against local Ollama

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-20 19:26:43 -04:00
commit c5865feb35
7 changed files with 561 additions and 0 deletions
+62
View File
@@ -0,0 +1,62 @@
version: "3.8"
# Mortdecai Inference Gateway
# Deploy on any machine with Ollama-compatible GPU
#
# Usage:
# docker compose up -d
# # Dashboard at http://localhost:8434/dashboard
#
# For AMD ROCm (Strix Halo, RX 7000, etc):
# Ollama image auto-detects ROCm. Ensure rocm drivers are installed on host.
#
# For NVIDIA:
# Requires nvidia-container-toolkit installed on host.
services:
ollama:
image: ollama/ollama:rocm
container_name: mortdecai-ollama
restart: unless-stopped
ports:
- "127.0.0.1:11434:11434" # Only accessible to gateway, not exposed
volumes:
- ollama-data:/root/.ollama
- ./models:/models
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
environment:
- OLLAMA_HOST=0.0.0.0:11434
# For NVIDIA, replace 'devices' above with:
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
gateway:
build: .
container_name: mortdecai-gateway
restart: unless-stopped
ports:
- "8434:8434" # This is the only exposed port
environment:
- OLLAMA_URL=http://ollama:11434
- API_KEY=${API_KEY:-mk_mortdecai_default}
- GATEWAY_PORT=8434
- GPU_TDP_WATTS=${GPU_TDP_WATTS:-54}
- SYSTEM_OVERHEAD_WATTS=${SYSTEM_OVERHEAD_WATTS:-30}
- ELECTRICITY_RATE=${ELECTRICITY_RATE:-0.15}
- SPENDING_CAP=${SPENDING_CAP:-10.00}
- STATS_FILE=/data/stats.json
volumes:
- gateway-data:/data
depends_on:
- ollama
volumes:
ollama-data:
gateway-data: