├── .gitignore ├── README.md ├── requirements.txt ├── core.py ├── pokemon_controller.py └── game.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | __pycache__/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Setup 2 | 3 | _tested on macos only_ 4 | 5 | 1. Create `.env` file with your Claude API key `CLAUDE_KEY={{key}}` 6 | 2. Install GameBoy emulator https://sameboy.github.io/ 7 | 3. Download and run Pokemon Yellow ROM https://www.emulatorgames.net/roms/gameboy-color/pokemon-yellow-version/ 8 | 4. Add `WINDOW_TITLE={{title of the emulator window}}` to `.env` file 9 | 5. Create Python virtual env `python3 -m venv venv` and activate it `source venv/bin/activate` 10 | 6. Install deps `pip install -r requirements.txt` 11 | 7. Run `core.py` script `python core.py` 12 | 8. Make sure to bring emulator window into view 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anthropic==0.47.2 3 | anyio==4.8.0 4 | beautifulsoup4==4.13.3 5 | certifi==2025.1.31 6 | charset-normalizer==3.4.1 7 | distro==1.9.0 8 | h11==0.14.0 9 | httpcore==1.0.7 10 | httpx==0.28.1 11 | idna==3.10 12 | jiter==0.8.2 13 | MouseInfo==0.1.3 14 | pillow==11.1.0 15 | PyAutoGUI==0.9.54 16 | pydantic==2.10.6 17 | pydantic_core==2.27.2 18 | pygame==2.6.1 19 | PyGetWindow==0.0.9 20 | PyMsgBox==1.0.9 21 | pyobjc-core==11.0 22 | pyobjc-framework-Cocoa==11.0 23 | pyobjc-framework-Quartz==11.0 24 | pyperclip==1.9.0 25 | PyRect==0.2.0 26 | PyScreeze==1.0.1 27 | python-dotenv==1.0.1 28 | pytweening==1.2.0 29 | requests==2.32.3 30 | rubicon-objc==0.5.0 31 | sniffio==1.3.1 32 | soupsieve==2.6 33 | typing_extensions==4.12.2 34 | urllib3==2.3.0 35 | wikipedia==1.4.0 36 | -------------------------------------------------------------------------------- /core.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | from anthropic import Anthropic 4 | import time 5 | from pokemon_controller import PokemonController, read_image_to_base64 6 | 7 | load_dotenv() 8 | 9 | WINDOW_TITLE = os.getenv("WINDOW_TITLE") or "Pokemon - Yellow Version (UE) [C][!].gbc" 10 | CLAUDE_KEY = os.getenv("CLAUDE_KEY") 11 | client = Anthropic(api_key=CLAUDE_KEY) 12 | 13 | # Initialize the Pokémon controller 14 | # You may need to adjust these values based on your emulator position 15 | # Set region=None to capture the entire screen, or provide coordinates for the emulator window 16 | controller = PokemonController(region=None, window_title=WINDOW_TITLE) 17 | 18 | # Define the Pokémon controller tool 19 | pokemon_tool = { 20 | "name": "pokemon_controller", 21 | "description": "Control the Pokémon game using button presses.", 22 | "input_schema": { 23 | "type": "object", 24 | "properties": { 25 | "action": { 26 | "type": "string", 27 | "enum": ["up", "down", "left", "right", "a", "b", "start", "select"], 28 | "description": "The button to press on the GameBoy." 29 | } 30 | }, 31 | "required": ["action"] 32 | } 33 | } 34 | 35 | def make_image_message(): 36 | # Capture the current state of the emulator 37 | screenshot_file = controller.capture_screen() 38 | game_state = read_image_to_base64(screenshot_file) 39 | return { 40 | "role": "user", 41 | "content": [{ 42 | "type": "image", 43 | "source": { 44 | "type": "base64", 45 | "media_type": "image/jpeg", 46 | "data": game_state, 47 | }, 48 | }] 49 | } 50 | 51 | # Initialize messages for Claude 52 | messages = [ 53 | {"role": "user", "content": """You are playing Pokémon Yellow on Game Boy. You will be given screenshots of the game, and you need to decide which button to press to progress in the game. 54 | 55 | Available buttons: 56 | - Up, Down, Left, Right: Move around the game world 57 | - A: Interact with NPCs or objects, select menu options, confirm choices 58 | - B: Cancel, exit menus, speed up text 59 | - Start: Open the main menu 60 | - Select: Used rarely for specific functions 61 | 62 | Your goal is to explore the game, battle trainers, capture Pokémon, and eventually become the Pokémon League Champion. 63 | 64 | Analyze each screenshot carefully to determine what's happening in the game, and choose the most appropriate button to press. The player is always in the center of the screen. 65 | Here are some tips: 66 | - Doors into buildings are always visible, if you want to enter a building, you have to find the door and walk to it."""}, 67 | ] 68 | 69 | # Game loop 70 | running = True 71 | max_turns = 100 # Limit the number of turns 72 | turn = 0 73 | 74 | while running and turn < max_turns: 75 | messages.append({"role": "user", "content": "What button would you like to press next? Analyze the current game state and make your decision."}) 76 | messages.append(make_image_message()) 77 | 78 | # Get Claude's next move 79 | response = client.messages.create( 80 | model="claude-3-7-sonnet-20250219", 81 | messages=messages, 82 | max_tokens=1000, 83 | tools=[pokemon_tool] 84 | ) 85 | 86 | # Print Claude's thinking 87 | for content in response.content: 88 | if content.type == "text": 89 | print(f"Claude: {content.text}") 90 | 91 | # Add Claude's response to messages 92 | messages.append({"role": "assistant", "content": response.content}) 93 | 94 | # Process tool use 95 | for content in response.content: 96 | if content.type == "tool_use": 97 | tool_use = content 98 | tool_name = tool_use.name 99 | tool_input = tool_use.input 100 | 101 | if tool_name == "pokemon_controller": 102 | action = tool_input["action"] 103 | print(f"Claude chose to press: {action}") 104 | 105 | # Execute the button press 106 | controller.press_button(action) 107 | 108 | # Add the result back to Claude 109 | tool_response = { 110 | "role": "user", 111 | "content": [ 112 | { 113 | "type": "tool_result", 114 | "tool_use_id": tool_use.id, 115 | "content": f"Button {action} pressed successfully." 116 | } 117 | ] 118 | } 119 | 120 | messages.append(tool_response) 121 | 122 | # Increment turn counter 123 | turn += 1 124 | 125 | # Small delay between turns 126 | time.sleep(1.0) 127 | 128 | # Final message when the game loop ends 129 | if turn >= max_turns: 130 | print(f"Game session ended after {max_turns} turns.") 131 | else: 132 | print("Game session ended.") -------------------------------------------------------------------------------- /pokemon_controller.py: -------------------------------------------------------------------------------- 1 | import pyautogui 2 | import time 3 | import os 4 | import base64 5 | from PIL import ImageGrab, Image 6 | import Quartz 7 | import pygetwindow as gw # Add this import for window management 8 | 9 | def getWindowByTitle(title: str): 10 | """Returns a Window object of the currently active Window.""" 11 | 12 | # Source: https://stackoverflow.com/questions/5286274/front-most-window-using-cgwindowlistcopywindowinfo 13 | windows = Quartz.CGWindowListCopyWindowInfo(Quartz.kCGWindowListExcludeDesktopElements | Quartz.kCGWindowListOptionOnScreenOnly, Quartz.kCGNullWindowID) 14 | for win in windows: 15 | if title in win.get(Quartz.kCGWindowName, ''): 16 | return win 17 | raise Exception('Could not find an active window.') # Temporary hack. 18 | 19 | class PokemonController: 20 | def __init__(self, window_title=None, region=None): 21 | """ 22 | Initialize the Pokémon controller. 23 | 24 | Args: 25 | window_title (str): Title of the emulator window (for window focusing) 26 | region (tuple): Screen region to capture (left, top, right, bottom) - used as fallback 27 | """ 28 | self.window_title = window_title 29 | self.region = region 30 | self.key_delay = 0.1 # Delay between key presses 31 | 32 | # GameBoy control mapping 33 | self.controls = { 34 | 'up': 'up', 35 | 'down': 'down', 36 | 'left': 'left', 37 | 'right': 'right', 38 | 'a': 'x', # Typically X is mapped to A on emulators 39 | 'b': 'z', # Typically Z is mapped to B on emulators 40 | 'start': 'enter', 41 | 'select': 'backspace' 42 | } 43 | 44 | def find_window(self): 45 | """Find and return the emulator window""" 46 | if not self.window_title: 47 | return None 48 | 49 | try: 50 | # Get all windows and find the one that matches our title 51 | window = getWindowByTitle(self.window_title) 52 | if window: 53 | return window 54 | 55 | print(f"No window with title containing '{self.window_title}' found.") 56 | except Exception as e: 57 | print(f"Error finding window: {str(e)}") 58 | 59 | return None 60 | 61 | def capture_screen(self, filename='emulator_screen.jpg'): 62 | """Capture the emulator window in RGB mode and save as JPEG""" 63 | screenshot = None 64 | 65 | # Try to capture the specific window 66 | window = self.find_window() 67 | if window: 68 | # Bring window to front 69 | try: 70 | # window.activate() 71 | time.sleep(0.2) # Give the window time to come to front 72 | left, top, width, height = gw.getWindowGeometry(self.window_title) 73 | # Capture the window 74 | screenshot = ImageGrab.grab(bbox=(int(left), int(top), int(left + width), int(top + height))) 75 | print(f"Captured window: {self.window_title}") 76 | except Exception as e: 77 | print(f"Error capturing window: {str(e)}") 78 | 79 | # Fallback to region or full screen if window capture failed 80 | if screenshot is None: 81 | if self.region: 82 | screenshot = ImageGrab.grab(bbox=self.region) 83 | print("Captured specified region") 84 | else: 85 | screenshot = ImageGrab.grab() 86 | print("Captured entire screen") 87 | 88 | # Convert to RGB mode (in case it was captured as RGBA) 89 | screenshot = screenshot.convert('RGB') 90 | 91 | # Save the screenshot as JPEG 92 | screenshot.save(filename, 'JPEG') 93 | return filename 94 | 95 | def press_button(self, button, hold_time=0.1): 96 | """ 97 | Press a button on the emulator 98 | 99 | Args: 100 | button (str): Button to press ('up', 'down', 'left', 'right', 'a', 'b', 'start', 'select') 101 | hold_time (float): How long to hold the button down 102 | """ 103 | # Try to focus the window before pressing keys 104 | window = self.find_window() 105 | if window: 106 | try: 107 | # window.activate() 108 | time.sleep(0.1) # Short delay to ensure window is active 109 | except Exception as e: 110 | print(f"Error focusing window: {str(e)}") 111 | 112 | if button in self.controls: 113 | key = self.controls[button] 114 | pyautogui.keyDown(key) 115 | time.sleep(hold_time) 116 | pyautogui.keyUp(key) 117 | time.sleep(self.key_delay) # Short delay after button press 118 | return True 119 | else: 120 | print(f"Unknown button: {button}") 121 | return False 122 | 123 | def execute_action(self, action): 124 | """ 125 | Execute a higher-level game action 126 | 127 | Args: 128 | action (dict): Action details including type and parameters 129 | """ 130 | action_type = action.get('type', '') 131 | 132 | if action_type == 'button_press': 133 | button = action.get('button', '') 134 | return self.press_button(button) 135 | 136 | elif action_type == 'sequence': 137 | # Execute a sequence of button presses 138 | sequence = action.get('buttons', []) 139 | for button in sequence: 140 | self.press_button(button) 141 | return True 142 | 143 | elif action_type == 'navigate': 144 | # Navigate in a direction 145 | direction = action.get('direction', '') 146 | steps = action.get('steps', 1) 147 | 148 | for _ in range(steps): 149 | self.press_button(direction) 150 | return True 151 | 152 | return False 153 | 154 | def read_image_to_base64(image_path): 155 | """ 156 | Reads an image file and converts it to a base64 encoded string. 157 | 158 | Args: 159 | image_path (str): Path to the image file 160 | 161 | Returns: 162 | str: Base64 encoded string representation of the image 163 | """ 164 | try: 165 | with open(image_path, 'rb') as image_file: 166 | return base64.b64encode(image_file.read()).decode('utf-8') 167 | except Exception as e: 168 | raise Exception(f"Error reading image file: {str(e)}") -------------------------------------------------------------------------------- /game.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import random 3 | import os 4 | import time 5 | 6 | # Initialize pygame 7 | pygame.init() 8 | 9 | # Game constants 10 | WIDTH, HEIGHT = 800, 600 11 | PLAYER_RADIUS = 15 12 | ENEMY_RADIUS = 15 13 | FOOD_RADIUS = 10 14 | PLAYER_SPEED = 20 15 | ENEMY_SPEED = 3 16 | NUM_ENEMIES = 3 17 | NUM_FOOD = 5 18 | 19 | # Colors 20 | BLUE = (0, 0, 255) 21 | RED = (255, 0, 0) 22 | GREEN = (0, 255, 0) 23 | BACKGROUND = (255, 255, 255) 24 | WHITE = (255, 255, 255) 25 | 26 | class Player: 27 | def __init__(self, x, y): 28 | self.x = x 29 | self.y = y 30 | self.radius = PLAYER_RADIUS 31 | self.speed = PLAYER_SPEED 32 | self.score = 0 33 | 34 | def move(self, direction): 35 | if direction == "up": 36 | self.y = max(self.radius, self.y - self.speed) 37 | elif direction == "down": 38 | self.y = min(HEIGHT - self.radius, self.y + self.speed) 39 | elif direction == "left": 40 | self.x = max(self.radius, self.x - self.speed) 41 | elif direction == "right": 42 | self.x = min(WIDTH - self.radius, self.x + self.speed) 43 | 44 | def draw(self, screen): 45 | pygame.draw.circle(screen, BLUE, (self.x, self.y), self.radius) 46 | 47 | def collides_with(self, other): 48 | distance = ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5 49 | return distance < (self.radius + other.radius) 50 | 51 | class Enemy: 52 | def __init__(self): 53 | self.x = random.randint(ENEMY_RADIUS, WIDTH - ENEMY_RADIUS) 54 | self.y = random.randint(ENEMY_RADIUS, HEIGHT - ENEMY_RADIUS) 55 | self.radius = ENEMY_RADIUS 56 | self.speed = ENEMY_SPEED 57 | self.direction = random.choice(["up", "down", "left", "right"]) 58 | self.direction_change_timer = 0 59 | 60 | def move(self, player): 61 | # Simple AI: Sometimes move randomly, sometimes chase player 62 | self.direction_change_timer += 1 63 | 64 | # Change direction randomly or every few seconds 65 | if self.direction_change_timer > 60 or random.random() < 0.01: 66 | if random.random() < 0.7: # 70% chance to chase player 67 | # Move towards player 68 | if self.x < player.x: 69 | self.direction = "right" 70 | elif self.x > player.x: 71 | self.direction = "left" 72 | elif self.y < player.y: 73 | self.direction = "down" 74 | elif self.y > player.y: 75 | self.direction = "up" 76 | else: # 30% chance to move randomly 77 | self.direction = random.choice(["up", "down", "left", "right"]) 78 | self.direction_change_timer = 0 79 | 80 | # Move in the current direction 81 | if self.direction == "up": 82 | self.y = max(self.radius, self.y - self.speed) 83 | elif self.direction == "down": 84 | self.y = min(HEIGHT - self.radius, self.y + self.speed) 85 | elif self.direction == "left": 86 | self.x = max(self.radius, self.x - self.speed) 87 | elif self.direction == "right": 88 | self.x = min(WIDTH - self.radius, self.x + self.speed) 89 | 90 | def draw(self, screen): 91 | pygame.draw.circle(screen, RED, (self.x, self.y), self.radius) 92 | 93 | class Food: 94 | def __init__(self): 95 | self.x = random.randint(FOOD_RADIUS, WIDTH - FOOD_RADIUS) 96 | self.y = random.randint(FOOD_RADIUS, HEIGHT - FOOD_RADIUS) 97 | self.radius = FOOD_RADIUS 98 | 99 | def draw(self, screen): 100 | pygame.draw.circle(screen, GREEN, (self.x, self.y), self.radius) 101 | 102 | class Game: 103 | def __init__(self): 104 | self.screen = pygame.display.set_mode((WIDTH, HEIGHT)) 105 | pygame.display.set_caption("Claude's Game") 106 | self.clock = pygame.time.Clock() 107 | self.running = True 108 | self.game_over = False 109 | self.player = Player(WIDTH // 2, HEIGHT // 2) 110 | self.enemies = [Enemy() for _ in range(NUM_ENEMIES)] 111 | self.foods = [Food() for _ in range(NUM_FOOD)] 112 | self.font = pygame.font.SysFont(None, 36) 113 | 114 | def handle_events(self): 115 | for event in pygame.event.get(): 116 | if event.type == pygame.QUIT: 117 | self.running = False 118 | 119 | def update(self, direction=None): 120 | if self.game_over: 121 | return 122 | 123 | # Move the player if a direction is provided 124 | if direction: 125 | self.player.move(direction) 126 | 127 | # Move enemies 128 | for enemy in self.enemies: 129 | enemy.move(self.player) 130 | 131 | # Check collisions with player 132 | if self.player.collides_with(enemy): 133 | self.game_over = True 134 | 135 | # Check for food collisions 136 | for food in self.foods[:]: 137 | if self.player.collides_with(food): 138 | self.foods.remove(food) 139 | self.player.score += 1 140 | self.foods.append(Food()) # Add new food 141 | 142 | def draw(self): 143 | self.screen.fill(BACKGROUND) 144 | 145 | # Draw food 146 | for food in self.foods: 147 | food.draw(self.screen) 148 | 149 | # Draw enemies 150 | for enemy in self.enemies: 151 | enemy.draw(self.screen) 152 | 153 | # Draw player 154 | self.player.draw(self.screen) 155 | 156 | # Draw score 157 | score_text = self.font.render(f"Score: {self.player.score}", True, WHITE) 158 | self.screen.blit(score_text, (10, 10)) 159 | 160 | # Draw game over message if applicable 161 | if self.game_over: 162 | game_over_text = self.font.render("GAME OVER", True, WHITE) 163 | self.screen.blit(game_over_text, (WIDTH // 2 - 80, HEIGHT // 2 - 18)) 164 | 165 | pygame.display.flip() 166 | 167 | def take_screenshot(self, filename): 168 | pygame.image.save(self.screen, filename) 169 | 170 | def run_claude_step(self, direction=None): 171 | """ 172 | Run a single step of the game controlled by Claude 173 | Returns: game state (running, game_over, score) 174 | """ 175 | if not self.running: 176 | return False, self.game_over, self.player.score 177 | 178 | self.handle_events() 179 | self.update(direction) 180 | self.draw() 181 | self.clock.tick(30) # Cap at 30 FPS 182 | 183 | return self.running, self.game_over, self.player.score --------------------------------------------------------------------------------