Files
mortdecai-gateway/docker-compose.yml
T
Seth df9f623943 Fully automated setup: downloads GGUF, loads model, tests inference
Setup script now:
1. Generates API key
2. Starts Docker containers
3. Downloads GGUF from mortdec.ai automatically (~5.3GB)
4. Creates Ollama model with correct chat template
5. Runs test inference
6. Prints connection details for Seth

Matt just runs ./setup.sh — no manual file copying.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 19:33:39 -04:00

63 lines
1.6 KiB
YAML

version: "3.8"
# Mortdecai Inference Gateway
# Deploy on any machine with Ollama-compatible GPU
#
# Usage:
# docker compose up -d
# # Dashboard at http://localhost:8434/dashboard
#
# For AMD ROCm (Strix Halo, RX 7000, etc):
# Ollama image auto-detects ROCm. Ensure rocm drivers are installed on host.
#
# For NVIDIA:
# Requires nvidia-container-toolkit installed on host.
services:
ollama:
image: ollama/ollama:rocm
container_name: mortdecai-ollama
restart: unless-stopped
ports:
- "127.0.0.1:11434:11434" # Only accessible to gateway, not exposed
volumes:
- ollama-data:/root/.ollama
- ./models:/models:ro
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
environment:
- OLLAMA_HOST=0.0.0.0:11434
# For NVIDIA, replace 'devices' above with:
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
gateway:
build: .
container_name: mortdecai-gateway
restart: unless-stopped
ports:
- "8434:8434" # This is the only exposed port
environment:
- OLLAMA_URL=http://ollama:11434
- API_KEY=${API_KEY:-mk_mortdecai_default}
- GATEWAY_PORT=8434
- GPU_TDP_WATTS=${GPU_TDP_WATTS:-54}
- SYSTEM_OVERHEAD_WATTS=${SYSTEM_OVERHEAD_WATTS:-30}
- ELECTRICITY_RATE=${ELECTRICITY_RATE:-0.15}
- SPENDING_CAP=${SPENDING_CAP:-10.00}
- STATS_FILE=/data/stats.json
volumes:
- gateway-data:/data
depends_on:
- ollama
volumes:
ollama-data:
gateway-data: