c5865feb35
- API key auth on all inference endpoints - Power/cost tracking: GPU TDP × inference time × electricity rate - Spending cap enforcement - Web dashboard with live stats - Docker compose for AMD ROCm (Strix Halo) or NVIDIA - Auto-setup script with GGUF loading - Tested against local Ollama Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
63 lines
1.6 KiB
YAML
63 lines
1.6 KiB
YAML
version: "3.8"
|
|
|
|
# Mortdecai Inference Gateway
|
|
# Deploy on any machine with Ollama-compatible GPU
|
|
#
|
|
# Usage:
|
|
# docker compose up -d
|
|
# # Dashboard at http://localhost:8434/dashboard
|
|
#
|
|
# For AMD ROCm (Strix Halo, RX 7000, etc):
|
|
# Ollama image auto-detects ROCm. Ensure rocm drivers are installed on host.
|
|
#
|
|
# For NVIDIA:
|
|
# Requires nvidia-container-toolkit installed on host.
|
|
|
|
services:
|
|
ollama:
|
|
image: ollama/ollama:rocm
|
|
container_name: mortdecai-ollama
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:11434:11434" # Only accessible to gateway, not exposed
|
|
volumes:
|
|
- ollama-data:/root/.ollama
|
|
- ./models:/models
|
|
devices:
|
|
- /dev/kfd:/dev/kfd
|
|
- /dev/dri:/dev/dri
|
|
environment:
|
|
- OLLAMA_HOST=0.0.0.0:11434
|
|
# For NVIDIA, replace 'devices' above with:
|
|
# deploy:
|
|
# resources:
|
|
# reservations:
|
|
# devices:
|
|
# - driver: nvidia
|
|
# count: all
|
|
# capabilities: [gpu]
|
|
|
|
gateway:
|
|
build: .
|
|
container_name: mortdecai-gateway
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8434:8434" # This is the only exposed port
|
|
environment:
|
|
- OLLAMA_URL=http://ollama:11434
|
|
- API_KEY=${API_KEY:-mk_mortdecai_default}
|
|
- GATEWAY_PORT=8434
|
|
- GPU_TDP_WATTS=${GPU_TDP_WATTS:-54}
|
|
- SYSTEM_OVERHEAD_WATTS=${SYSTEM_OVERHEAD_WATTS:-30}
|
|
- ELECTRICITY_RATE=${ELECTRICITY_RATE:-0.15}
|
|
- SPENDING_CAP=${SPENDING_CAP:-10.00}
|
|
- STATS_FILE=/data/stats.json
|
|
volumes:
|
|
- gateway-data:/data
|
|
depends_on:
|
|
- ollama
|
|
|
|
volumes:
|
|
ollama-data:
|
|
gateway-data:
|