Add Anthropic API provider, short God messages, budget tracking
- llm_provider config: "anthropic" or "ollama" (default) - Anthropic call with cost tracking and budget cap - Auto-fallback to Ollama when budget exhausted - God message prompt: "1-2 sentences max, Old Testament telegram" Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+73
-2
@@ -1967,7 +1967,9 @@ def build_message_system_prompt(config) -> str:
|
|||||||
"You are God in a Minecraft server. Write a single spoken message to all players.\n"
|
"You are God in a Minecraft server. Write a single spoken message to all players.\n"
|
||||||
"You will be told what action was taken (if any) in response to a player's prayer.\n"
|
"You will be told what action was taken (if any) in response to a player's prayer.\n"
|
||||||
"Respond with ONLY the message text — no JSON, no quotes, no formatting.\n"
|
"Respond with ONLY the message text — no JSON, no quotes, no formatting.\n"
|
||||||
"Match the language the player prayed in. If they prayed in Spanish, respond in Spanish.\n\n"
|
"Match the language the player prayed in. If they prayed in Spanish, respond in Spanish.\n"
|
||||||
|
"KEEP IT SHORT — 1-2 sentences max. This appears in Minecraft chat which has limited space.\n"
|
||||||
|
"Be punchy and dramatic, not verbose. Think Old Testament telegram.\n\n"
|
||||||
)
|
)
|
||||||
if _GOD_SOUL:
|
if _GOD_SOUL:
|
||||||
base += "Your identity and voice are defined by your soul:\n" + _GOD_SOUL + "\n\n"
|
base += "Your identity and voice are defined by your soul:\n" + _GOD_SOUL + "\n\n"
|
||||||
@@ -1986,7 +1988,13 @@ def build_message_system_prompt(config) -> str:
|
|||||||
def _llm_call(model: str, system: str, user: str, config: dict,
|
def _llm_call(model: str, system: str, user: str, config: dict,
|
||||||
fmt = None, temperature: float = 0.85,
|
fmt = None, temperature: float = 0.85,
|
||||||
max_tokens: int = 400, timeout: int = 60) -> str:
|
max_tokens: int = 400, timeout: int = 60) -> str:
|
||||||
"""Single Ollama chat call. Returns raw content string."""
|
"""LLM call — routes to Anthropic API or Ollama based on config."""
|
||||||
|
provider = config.get("llm_provider", "ollama")
|
||||||
|
|
||||||
|
if provider == "anthropic":
|
||||||
|
return _anthropic_call(model, system, user, config, temperature, max_tokens, timeout)
|
||||||
|
|
||||||
|
# Default: Ollama
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": [
|
"messages": [
|
||||||
@@ -2006,6 +2014,69 @@ def _llm_call(model: str, system: str, user: str, config: dict,
|
|||||||
return r.json()["message"]["content"]
|
return r.json()["message"]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Anthropic API cost tracking ---
|
||||||
|
|
||||||
|
_anthropic_cost_lock = threading.Lock()
|
||||||
|
_anthropic_total_cost = 0.0
|
||||||
|
|
||||||
|
def _get_anthropic_cost():
|
||||||
|
with _anthropic_cost_lock:
|
||||||
|
return _anthropic_total_cost
|
||||||
|
|
||||||
|
def _anthropic_call(model: str, system: str, user: str, config: dict,
|
||||||
|
temperature: float = 0.85, max_tokens: int = 400,
|
||||||
|
timeout: int = 60) -> str:
|
||||||
|
"""Call Anthropic Claude API. Tracks cost and enforces budget."""
|
||||||
|
global _anthropic_total_cost
|
||||||
|
|
||||||
|
api_key = config.get("anthropic_api_key", "")
|
||||||
|
budget = config.get("anthropic_budget", 5.00)
|
||||||
|
|
||||||
|
with _anthropic_cost_lock:
|
||||||
|
if _anthropic_total_cost >= budget:
|
||||||
|
log.warning(f"Anthropic budget exhausted (${_anthropic_total_cost:.4f} >= ${budget:.2f}). Falling back to Ollama.")
|
||||||
|
# Fall back to Ollama
|
||||||
|
payload = {
|
||||||
|
"model": config.get("fallback_model", config.get("model", "gemma3n:e4b")),
|
||||||
|
"messages": [{"role": "system", "content": system}, {"role": "user", "content": user}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": temperature, "num_predict": max_tokens},
|
||||||
|
}
|
||||||
|
r = requests.post(f"{config['ollama_url']}/api/chat", json=payload, timeout=timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()["message"]["content"]
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"x-api-key": api_key,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"content-type": "application/json",
|
||||||
|
}
|
||||||
|
body = {
|
||||||
|
"model": model,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"system": system,
|
||||||
|
"messages": [{"role": "user", "content": user}],
|
||||||
|
"temperature": temperature,
|
||||||
|
}
|
||||||
|
|
||||||
|
r = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=body, timeout=timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
|
||||||
|
text = data["content"][0]["text"]
|
||||||
|
input_tokens = data["usage"]["input_tokens"]
|
||||||
|
output_tokens = data["usage"]["output_tokens"]
|
||||||
|
|
||||||
|
# Track cost (Haiku pricing)
|
||||||
|
cost = (input_tokens / 1_000_000) * 0.80 + (output_tokens / 1_000_000) * 4.00
|
||||||
|
with _anthropic_cost_lock:
|
||||||
|
_anthropic_total_cost += cost
|
||||||
|
if int(_anthropic_total_cost * 100) % 50 == 0 or _anthropic_total_cost >= budget * 0.9:
|
||||||
|
log.info(f"Anthropic cost: ${_anthropic_total_cost:.4f} / ${budget:.2f}")
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _gateway_enabled(config) -> bool:
|
def _gateway_enabled(config) -> bool:
|
||||||
return bool(config.get("use_langgraph_gateway", False))
|
return bool(config.get("use_langgraph_gateway", False))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user