#!/usr/bin/env python3 """ minecraft_env.py — Gymnasium environment wrapping a mineflayer bot. The bot runs in a Node.js subprocess, communicating via stdin/stdout JSON. The Python Gym env sends actions and receives observations at ~600ms ticks. Usage: from minecraft_env import MinecraftCombatEnv env = MinecraftCombatEnv() obs, info = env.reset() while True: action = env.action_space.sample() # or policy(obs) obs, reward, terminated, truncated, info = env.step(action) """ import json import subprocess import time import os import signal import numpy as np import gymnasium as gym from gymnasium import spaces from pathlib import Path INGAME_DIR = Path(__file__).resolve().parent.parent.parent / "ingame" class MinecraftCombatEnv(gym.Env): """Minecraft combat survival environment via mineflayer bot.""" metadata = {"render_modes": ["human"], "render_fps": 2} # Discrete actions ACTIONS = ["forward", "fight", "flee", "eat", "sprint", "idle"] # Hostile mob types for reward calculation HOSTILE = { "zombie", "husk", "skeleton", "creeper", "spider", "cave_spider", "witch", "enderman", "drowned", "stray", "phantom", "parched", "camel_husk", "slime", "magma_cube", } def __init__( self, host="192.168.0.244", port=25568, username="RLBot", max_steps=600, # 600 ticks × 0.6s = 6 minutes per episode tick_rate=0.6, # seconds per tick (sword cooldown rate) render_mode=None, ): super().__init__() self.host = host self.port = port self.username = username self.max_steps = max_steps self.tick_rate = tick_rate self.render_mode = render_mode # Observation space: 13 floats normalized to [0, 1] # [hp, food, nearest_mob_dist, nearest_mob_angle, mob_count, # has_sword, has_armor, has_food, is_day, on_water, # y_level_norm, damage_taken_this_tick, is_fleeing] self.observation_space = spaces.Box( low=0.0, high=1.0, shape=(13,), dtype=np.float32 ) # Action space: 6 discrete actions self.action_space = spaces.Discrete(len(self.ACTIONS)) # Internal state self.proc = None self.step_count = 0 self.total_reward = 0 self.kills = 0 self.prev_hp = 20.0 self.prev_food = 20 self.alive = False self.last_obs = None def _start_bot(self): """Start the mineflayer bot subprocess.""" if self.proc and self.proc.poll() is None: self._stop_bot() bot_script = INGAME_DIR / "rl_bot.js" self.proc = subprocess.Popen( ["node", str(bot_script), self.host, str(self.port), self.username], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1, # line buffered ) def _stop_bot(self): """Stop the bot subprocess.""" if self.proc: try: self.proc.stdin.write("quit\n") self.proc.stdin.flush() self.proc.wait(timeout=3) except Exception: try: self.proc.kill() except Exception: pass self.proc = None def _send(self, cmd): """Send a command to the bot and read the JSON response.""" try: self.proc.stdin.write(cmd + "\n") self.proc.stdin.flush() # Read lines until we get a valid JSON observation deadline = time.time() + 5.0 while time.time() < deadline: line = self.proc.stdout.readline().strip() if not line: continue try: data = json.loads(line) return data except json.JSONDecodeError: continue return None except (BrokenPipeError, OSError): return None def _parse_observation(self, data): """Convert JSON bot state to numpy observation vector.""" if not data or "hp" not in data: return np.zeros(13, dtype=np.float32) hp = (data.get("hp") or 0) / 20.0 # normalize to [0, 1] food = (data.get("food") or 0) / 20.0 mobs = data.get("mobs", []) # Nearest hostile mob hostile_mobs = [m for m in mobs if m.get("hostile", False)] if hostile_mobs: nearest = min(hostile_mobs, key=lambda m: m["dist"]) nearest_dist = min(nearest["dist"] / 24.0, 1.0) # normalize # Angle: approximate from relative position if available nearest_angle = 0.5 # default forward else: nearest_dist = 1.0 # no mob = max distance nearest_angle = 0.5 mob_count = min(len(hostile_mobs) / 10.0, 1.0) # Inventory flags inv = data.get("inv", "") has_sword = 1.0 if "sword" in inv else 0.0 has_armor = 1.0 if data.get("armor", "none") != "none" else 0.0 has_food = 1.0 if any(f in inv for f in ["beef", "bread", "pork", "chicken", "apple", "potato", "cod"]) else 0.0 # World state is_day = 1.0 if data.get("day", True) else 0.0 on_water = 1.0 if data.get("below", "") == "water" else 0.0 # Y level (normalize: 0=bedrock, 320=max → 0-1) y = (data.get("pos") or {}).get("y", 64) or 64 y_norm = min(max(y, 0), 320) / 320.0 # Damage taken this tick current_hp = float(data.get("hp") or 20) prev = float(self.prev_hp or 20) damage = max(0, prev - current_hp) / 20.0 # Is currently fleeing (HP < 5) is_fleeing = 1.0 if current_hp < 5 else 0.0 obs = np.array([ hp, food, nearest_dist, nearest_angle, mob_count, has_sword, has_armor, has_food, is_day, on_water, y_norm, damage, is_fleeing, ], dtype=np.float32) return obs def _calc_reward(self, data, action): """Calculate reward from state transition.""" if not data or "hp" not in data: return -100.0 # lost connection = death equivalent reward = 0.0 hp = float(data.get("hp") or 0) food = int(data.get("food") or 20) # Survival reward: +1 per tick alive reward += 1.0 # Damage penalty damage = max(0, float(self.prev_hp or 20) - hp) if damage > 0: reward -= damage * 2.0 # -2 per HP lost # Death penalty if hp <= 0 or data.get("died", False): reward -= 100.0 # Kill reward new_kills = data.get("kills", 0) kills_this_tick = new_kills - self.kills if kills_this_tick > 0: reward += kills_this_tick * 10.0 self.kills = new_kills # Eating when hungry: good prev_food = int(self.prev_food or 20) if action == 3 and prev_food < 14 and food > prev_food: reward += 5.0 # Eating when full: wasted action if action == 3 and prev_food >= 18: reward -= 1.0 # Fighting when no mobs nearby: wasted mobs = data.get("mobs", []) hostile_nearby = [m for m in mobs if m.get("hostile") and m["dist"] < 6] if action == 1 and not hostile_nearby: reward -= 0.5 # Fleeing when HP is low and mobs nearby: good decision if action == 2 and hp < 8 and hostile_nearby: reward += 3.0 # Idle penalty (doing nothing when threats exist) if action == 5 and hostile_nearby: reward -= 2.0 # Update state self.prev_hp = hp self.prev_food = food return reward def reset(self, seed=None, options=None): """Reset the environment — reconnect bot and start new episode.""" super().reset(seed=seed) self._start_bot() # Wait for bot to spawn deadline = time.time() + 30.0 data = None while time.time() < deadline: line = self.proc.stdout.readline().strip() if not line: continue try: d = json.loads(line) if d.get("event") == "ready": data = d break if "hp" in d: data = d break except json.JSONDecodeError: continue if not data: # Fallback: send observe time.sleep(3) data = self._send("observe") self.step_count = 0 self.total_reward = 0 self.kills = data.get("kills", 0) if data else 0 self.prev_hp = data.get("hp", 20) if data else 20 self.prev_food = data.get("food", 20) if data else 20 self.alive = True obs = self._parse_observation(data) self.last_obs = obs info = {"raw": data} return obs, info def step(self, action): """Execute one action and return (obs, reward, terminated, truncated, info).""" self.step_count += 1 action_name = self.ACTIONS[action] # Send action to bot data = self._send(action_name) # Wait for game tick time.sleep(self.tick_rate) # Get observation after action if data is None or "hp" not in data: obs_data = self._send("observe") else: obs_data = data obs = self._parse_observation(obs_data) reward = self._calc_reward(obs_data, action) self.total_reward += reward # Check termination terminated = False if obs_data and (obs_data.get("hp", 0) <= 0 or obs_data.get("died", False)): terminated = True self.alive = False # Check truncation (max steps) truncated = self.step_count >= self.max_steps info = { "raw": obs_data, "step": self.step_count, "total_reward": self.total_reward, "kills": self.kills, "alive": self.alive, } self.last_obs = obs if self.render_mode == "human": self.render() return obs, reward, terminated, truncated, info def render(self): """Print current state.""" if self.last_obs is not None: hp = self.last_obs[0] * 20 food = self.last_obs[1] * 20 mob_dist = self.last_obs[2] * 24 mob_count = int(self.last_obs[4] * 10) print(f" Step {self.step_count}: HP={hp:.0f} Food={food:.0f} " f"Mobs={mob_count}@{mob_dist:.0f}b Kills={self.kills} " f"R={self.total_reward:.1f}") def close(self): """Clean up.""" self._stop_bot()