├── 04_ReinforcementLearning ├── .gitignore ├── ple │ ├── __init__.py │ ├── games │ │ ├── doom │ │ │ ├── __init__.py │ │ │ ├── assets │ │ │ │ ├── README.md │ │ │ │ └── cfg │ │ │ │ │ ├── take_cover.cfg │ │ │ │ │ ├── defend_the_line.cfg │ │ │ │ │ ├── deadly_corridor.cfg │ │ │ │ │ ├── health_gathering.cfg │ │ │ │ │ ├── defend_the_center.cfg │ │ │ │ │ ├── basic.cfg │ │ │ │ │ ├── my_way_home.cfg │ │ │ │ │ ├── predict_position.cfg │ │ │ │ │ └── deathmatch.cfg │ │ │ └── doom.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── vec2d.py │ │ ├── flappybird │ │ │ └── assets │ │ │ │ ├── base.png │ │ │ │ ├── pipe-red.png │ │ │ │ ├── pipe-green.png │ │ │ │ ├── background-day.png │ │ │ │ ├── background-night.png │ │ │ │ ├── bluebird-midflap.png │ │ │ │ ├── bluebird-upflap.png │ │ │ │ ├── redbird-downflap.png │ │ │ │ ├── redbird-midflap.png │ │ │ │ ├── redbird-upflap.png │ │ │ │ ├── bluebird-downflap.png │ │ │ │ ├── yellowbird-midflap.png │ │ │ │ ├── yellowbird-upflap.png │ │ │ │ └── yellowbird-downflap.png │ │ ├── monsterkong │ │ │ ├── assets │ │ │ │ ├── coin1.png │ │ │ │ ├── coin2.png │ │ │ │ ├── coin3.png │ │ │ │ ├── coin4.png │ │ │ │ ├── coin5.png │ │ │ │ ├── left.png │ │ │ │ ├── left2.png │ │ │ │ ├── right.png │ │ │ │ ├── still.png │ │ │ │ ├── ladder.png │ │ │ │ ├── right2.png │ │ │ │ ├── background.png │ │ │ │ ├── monster0.png │ │ │ │ ├── monster01.png │ │ │ │ ├── monster1.png │ │ │ │ ├── monster11.png │ │ │ │ ├── monster2.png │ │ │ │ ├── monster21.png │ │ │ │ ├── monster3.png │ │ │ │ ├── monster31.png │ │ │ │ ├── princess.png │ │ │ │ ├── wood_block.png │ │ │ │ ├── fireballdown.png │ │ │ │ ├── fireballleft.png │ │ │ │ ├── fireballright.png │ │ │ │ ├── monsterstill0.png │ │ │ │ ├── monsterstill1.png │ │ │ │ ├── monsterstill10.png │ │ │ │ ├── monsterstill11.png │ │ │ │ └── asset_credits.txt │ │ │ ├── ladder.py │ │ │ ├── wall.py │ │ │ ├── LICENSE │ │ │ ├── onBoard.py │ │ │ ├── coin.py │ │ │ ├── person.py │ │ │ ├── player.py │ │ │ ├── fireball.py │ │ │ ├── monsterPerson.py │ │ │ └── __init__.py │ │ ├── base │ │ │ ├── __init__.py │ │ │ ├── doomwrapper.py │ │ │ └── pygamewrapper.py │ │ ├── __init__.py │ │ ├── primitives.py │ │ ├── catcher.py │ │ ├── waterworld.py │ │ ├── puckworld.py │ │ ├── raycastmaze.py │ │ ├── pixelcopter.py │ │ ├── raycast.py │ │ ├── snake.py │ │ └── pong.py │ └── ple.py ├── agents │ ├── screen_capture.png │ ├── Catcher.py │ └── Maze.py ├── requirements.txt └── README.MD ├── 02_Optimization ├── OPServer.jar └── README.md ├── 03_Validation ├── abalone_app.xlsx ├── abalone_dataset.xlsx └── README.md ├── 01_Preprocessing ├── diabetes_app.xlsx ├── diabetes_dataset.xlsx ├── diabetes_csv.py ├── diabetes_xlsx.py ├── README.md └── diabetes_app.csv ├── LICENSE ├── README.md ├── 05_IOL_H1 └── README.md └── .gitignore /04_ReinforcementLearning/.gitignore: -------------------------------------------------------------------------------- 1 | .*/ 2 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/__init__.py: -------------------------------------------------------------------------------- 1 | from .ple import PLE 2 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/__init__.py: -------------------------------------------------------------------------------- 1 | from .doom import Doom 2 | -------------------------------------------------------------------------------- /02_Optimization/OPServer.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/02_Optimization/OPServer.jar -------------------------------------------------------------------------------- /03_Validation/abalone_app.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/03_Validation/abalone_app.xlsx -------------------------------------------------------------------------------- /01_Preprocessing/diabetes_app.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/01_Preprocessing/diabetes_app.xlsx -------------------------------------------------------------------------------- /03_Validation/abalone_dataset.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/03_Validation/abalone_dataset.xlsx -------------------------------------------------------------------------------- /01_Preprocessing/diabetes_dataset.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/01_Preprocessing/diabetes_dataset.xlsx -------------------------------------------------------------------------------- /04_ReinforcementLearning/agents/screen_capture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/agents/screen_capture.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/README.md: -------------------------------------------------------------------------------- 1 | The cfg files and wad files were taken from ViZDoom. 2 | Source: https://github.com/Marqt/ViZDoom 3 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def percent_round_int(percent, x): 5 | return np.round(percent * x).astype(int) 6 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/base.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/coin1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/coin1.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/coin2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/coin2.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/coin3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/coin3.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/coin4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/coin4.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/coin5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/coin5.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/left.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/left2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/left2.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/right.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/still.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/still.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/pipe-red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/pipe-red.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/ladder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/ladder.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/right2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/right2.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/pipe-green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/pipe-green.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/background.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster0.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster01.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster1.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster11.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster2.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster21.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster3.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monster31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monster31.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/princess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/princess.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/wood_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/wood_block.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/fireballdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/fireballdown.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/fireballleft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/fireballleft.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .pygamewrapper import PyGameWrapper 2 | try: 3 | from .doomwrapper import DoomWrapper 4 | except: 5 | print("couldn't import doomish") 6 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/background-day.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/background-day.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/background-night.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/background-night.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-midflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-midflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-upflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-upflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/redbird-downflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/redbird-downflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/redbird-midflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/redbird-midflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/redbird-upflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/redbird-upflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/fireballright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/fireballright.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill0.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill1.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill10.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/monsterkong/assets/monsterstill11.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-downflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/bluebird-downflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-midflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-midflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-upflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-upflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-downflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aydanomachado/mlclass/HEAD/04_ReinforcementLearning/ple/games/flappybird/assets/yellowbird-downflap.png -------------------------------------------------------------------------------- /04_ReinforcementLearning/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.0 2 | Pillow==6.2.2 3 | -e git+https://github.com/CarlosW1998/PyGame-Learning-Environment@47c77f7d37667a5865bc8e1a811eac09d3f19aa4#egg=ple 4 | pygame==1.9.6 5 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/assets/asset_credits.txt: -------------------------------------------------------------------------------- 1 | Princess sprite is from Jason-Em. Source: http://opengameart.org/content/bushly-and-princess-sera 2 | Hero sprite is from Arachne. Source: https://forums.tigsource.com/index.php?topic=9147.0 3 | Fireball sprite is from Matheus Carvalho. Source: http://opengameart.org/content/fireball-vector 4 | Monster sprite is from dogchicken. Source: http://opengameart.org/content/cute-monster-sprite-sheet 5 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from ple.games.doom import Doom 3 | except: 4 | print("Couldn't import doom") 5 | from ple.games.catcher import Catcher 6 | from ple.games.flappybird import FlappyBird 7 | from ple.games.monsterkong import MonsterKong 8 | from ple.games.pixelcopter import Pixelcopter 9 | from ple.games.pong import Pong 10 | from ple.games.puckworld import PuckWorld 11 | from ple.games.raycastmaze import RaycastMaze 12 | from ple.games.snake import Snake 13 | from ple.games.waterworld import WaterWorld 14 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/utils/vec2d.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | class vec2d(): 5 | 6 | def __init__(self, pos): 7 | self.x = pos[0] 8 | self.y = pos[1] 9 | 10 | def __add__(self, o): 11 | x = self.x + o.x 12 | y = self.y + o.y 13 | 14 | return vec2d((x, y)) 15 | 16 | def __eq__(self, o): 17 | return self.x == o.x and self.y == o.y 18 | 19 | def normalize(self): 20 | norm = math.sqrt(self.x * self.x + self.y * self.y) 21 | self.x /= norm 22 | self.y /= norm 23 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/take_cover.cfg: -------------------------------------------------------------------------------- 1 | doom_map = map01 2 | 3 | # Rewards 4 | living_reward = 1 5 | 6 | # Rendering options 7 | screen_resolution = RES_320X240 8 | render_hud = false 9 | render_crosshair = false 10 | render_weapon = false 11 | render_decals = false 12 | render_particles = false 13 | window_visible = true 14 | 15 | # Available buttons 16 | available_buttons = 17 | { 18 | MOVE_LEFT 19 | MOVE_RIGHT 20 | } 21 | 22 | # Game variables that will be in the state 23 | available_game_variables = { HEALTH } 24 | 25 | # Change it if you wish. 26 | doom_skill = 4 27 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/doom.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ..base.doomwrapper import DoomWrapper 3 | 4 | class Doom(DoomWrapper): 5 | 6 | def __init__(self, scenario="basic"): 7 | cfg_file = "assets/cfg/%s.cfg" % scenario 8 | scenario_file = "%s.wad" % scenario 9 | width = 320 10 | height = 240 11 | 12 | package_directory = os.path.dirname(os.path.abspath(__file__)) 13 | cfg_file = os.path.join( package_directory, cfg_file ) 14 | 15 | DoomWrapper.__init__(self, width, height, 16 | cfg_file, scenario_file) 17 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/README.MD: -------------------------------------------------------------------------------- 1 | # PyGame-Learning-Environment 2 | 3 | **PyGame Learning Environment (PLE)** is a learning environment, mimicking the [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) interface, allowing a quick start to Reinforcement Learning in Python. The goal of PLE is allow practitioners to focus design of models and experiments instead of environment design. 4 | 5 | ## Installation 6 | 7 | PLE requires the following dependencies: 8 | * numpy 9 | * pygame 10 | * pillow 11 | 12 | Install with pip 13 | 14 | ```bash 15 | 16 | pip install -R requirements.txt 17 | ``` -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/ladder.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | 3 | import pygame 4 | from onBoard import OnBoard 5 | 6 | ''' 7 | This class defines all our ladders in the game. 8 | Currently not much is done here, but we can add features such as ladder climb sounds etc here 9 | ''' 10 | 11 | 12 | class Ladder(OnBoard): 13 | 14 | def __init__(self, raw_image, position): 15 | super(Ladder, self).__init__(raw_image, position) 16 | 17 | # Update the ladder image 18 | def updateImage(self, raw_image): 19 | self.image = raw_image 20 | self.image = pygame.transform.scale(self.image, (15, 15)) 21 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/wall.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | from onBoard import OnBoard 3 | import pygame 4 | 5 | ''' 6 | This class defines all our walls in the game. 7 | Currently not much is done here, but we can add traps to certain walls such as spiked walls etc to damage the player 8 | ''' 9 | 10 | 11 | class Wall(OnBoard): 12 | 13 | def __init__(self, raw_image, position): 14 | super(Wall, self).__init__(raw_image, position) 15 | 16 | # Update the ladder image 17 | def updateImage(self, raw_image): 18 | self.image = raw_image 19 | self.image = pygame.transform.scale(self.image, (15, 15)) 20 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/defend_the_line.cfg: -------------------------------------------------------------------------------- 1 | # Rewards 2 | death_penalty = 1 3 | 4 | # Rendering options 5 | screen_resolution = RES_320X240 6 | render_hud = True 7 | render_crosshair = false 8 | render_weapon = true 9 | render_decals = false 10 | render_particles = false 11 | window_visible = true 12 | 13 | # make episodes start after 10 tics (after unholstering the gun) 14 | episode_start_time = 10 15 | 16 | 17 | # Available buttons 18 | available_buttons = 19 | { 20 | TURN_lEFT 21 | TURN_RIGHT 22 | ATTACK 23 | } 24 | 25 | # Game variables that will be in the state 26 | available_game_variables = { AMMO2 HEALTH} 27 | 28 | mode = PLAYER 29 | doom_skill = 3 30 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/deadly_corridor.cfg: -------------------------------------------------------------------------------- 1 | # Skill 5 is reccomanded for the scenario to be a challenge. 2 | doom_skill = 5 3 | 4 | # Rewards 5 | death_penalty = 100 6 | 7 | # Rendering options 8 | screen_resolution = RES_320X240 9 | render_hud = true 10 | render_crosshair = false 11 | render_weapon = true 12 | render_decals = false 13 | render_particles = false 14 | window_visible = true 15 | 16 | episode_timeout = 2100 17 | 18 | # Available buttons 19 | available_buttons = 20 | { 21 | MOVE_LEFT 22 | MOVE_RIGHT 23 | ATTACK 24 | MOVE_FORWARD 25 | MOVE_BACKWARD 26 | TURN_LEFT 27 | TURN_RIGHT 28 | } 29 | 30 | # Game variables that will be in the state 31 | available_game_variables = { HEALTH } 32 | 33 | mode = PLAYER 34 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/health_gathering.cfg: -------------------------------------------------------------------------------- 1 | doom_map = map01 2 | 3 | # Rewards 4 | # Each step is good for you! 5 | living_reward = 1 6 | # And death is not! 7 | death_penalty = 100 8 | 9 | # Rendering options 10 | screen_resolution = RES_320X240 11 | render_hud = false 12 | render_crosshair = false 13 | render_weapon = false 14 | render_decals = false 15 | render_particles = false 16 | window_visible = true 17 | 18 | # make episodes finish after 2100 actions (tics) 19 | episode_timeout = 2100 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | TURN_LEFT 25 | TURN_RIGHT 26 | MOVE_FORWARD 27 | } 28 | 29 | # Game variables that will be in the state 30 | available_game_variables = { HEALTH } 31 | 32 | mode = PLAYER 33 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/defend_the_center.cfg: -------------------------------------------------------------------------------- 1 | # Rewards 2 | death_penalty = 1 3 | 4 | # Rendering options 5 | screen_resolution = RES_320X240 6 | render_hud = True 7 | render_crosshair = false 8 | render_weapon = true 9 | render_decals = false 10 | render_particles = false 11 | window_visible = true 12 | 13 | # make episodes start after 10 tics (after unholstering the gun) 14 | episode_start_time = 10 15 | 16 | # make episodes finish after 2100 actions (tics) 17 | episode_timeout = 2100 18 | 19 | # Available buttons 20 | available_buttons = 21 | { 22 | TURN_LEFT 23 | TURN_RIGHT 24 | ATTACK 25 | } 26 | 27 | # Game variables that will be in the state 28 | available_game_variables = { AMMO2 HEALTH } 29 | 30 | mode = PLAYER 31 | doom_skill = 3 32 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/basic.cfg: -------------------------------------------------------------------------------- 1 | doom_map = map01 2 | 3 | # Rewards 4 | living_reward = -1 5 | 6 | # Rendering options 7 | screen_resolution = RES_320X240 8 | render_hud = True 9 | render_crosshair = false 10 | render_weapon = true 11 | render_decals = false 12 | render_particles = false 13 | window_visible = true 14 | 15 | # make episodes start after 20 tics (after unholstering the gun) 16 | episode_start_time = 14 17 | 18 | # make episodes finish after 300 actions (tics) 19 | episode_timeout = 300 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | MOVE_LEFT 25 | MOVE_RIGHT 26 | ATTACK 27 | } 28 | 29 | # Game variables that will be in the state 30 | available_game_variables = { AMMO2} 31 | 32 | mode = PLAYER 33 | doom_skill = 5 34 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/my_way_home.cfg: -------------------------------------------------------------------------------- 1 | # Rewards 2 | living_reward = -0.0001 3 | 4 | # Rendering options 5 | screen_resolution = RES_320X240 6 | render_hud = false 7 | render_crosshair = false 8 | render_weapon = true 9 | render_decals = false 10 | render_particles = false 11 | window_visible = true 12 | 13 | # make episodes start after 10 tics (after unholstering the gun) 14 | episode_start_time = 10 15 | 16 | # make episodes finish after 2100 actions (tics) 17 | episode_timeout = 2100 18 | 19 | # Available buttons 20 | available_buttons = 21 | { 22 | TURN_LEFT 23 | TURN_RIGHT 24 | MOVE_FORWARD 25 | MOVE_LEFT 26 | MOVE_RIGHT 27 | } 28 | 29 | # Game variables that will be in the state 30 | available_game_variables = { AMMO0 } 31 | 32 | mode = PLAYER 33 | doom_skill = 5 34 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/predict_position.cfg: -------------------------------------------------------------------------------- 1 | doom_map = map01 2 | 3 | # Rewards 4 | living_reward = -0.001 5 | 6 | # Rendering options 7 | screen_resolution = RES_320X240 8 | render_hud = false 9 | render_crosshair = false 10 | render_weapon = true 11 | render_decals = false 12 | render_particles = false 13 | window_visible = true 14 | 15 | # make episodes start after 16 tics (after producing the rocket launcher) 16 | episode_start_time = 16 17 | 18 | # make episodes finish after 300 actions (tics) 19 | episode_timeout = 300 20 | 21 | # Available buttons 22 | available_buttons = 23 | { 24 | TURN_LEFT 25 | TURN_RIGHT 26 | ATTACK 27 | } 28 | 29 | # Empty list is allowed, in case you are lazy. 30 | available_game_variables = { } 31 | 32 | mode = PLAYER 33 | doom_skill = 1 34 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/agents/Catcher.py: -------------------------------------------------------------------------------- 1 | from ple.games.catcher import Catcher 2 | from ple import PLE 3 | import numpy as np 4 | import random 5 | class RandomAgent: 6 | def __init__(self, actions): 7 | self.actions = actions 8 | 9 | def pickAction(self, state, reward): 10 | return random.choice(self.actions) 11 | ''' 12 | State Formate: 13 | { 14 | 'player_x': int, 15 | 'player_vel': float, 16 | 'fruit_x': int, 17 | 'fruit_y': int 18 | } 19 | Actions: 20 | [97, 100, None] 21 | ''' 22 | 23 | game = Catcher(width=256, height=256, init_lives=3) 24 | 25 | p = PLE(game, fps=30, display_screen=True, force_fps=False) 26 | p.init() 27 | 28 | agent = RandomAgent(p.getActionSet()) 29 | nb_frames = 1000 30 | reward = 0.0 31 | 32 | print(game.getGameState()) 33 | print(p.getActionSet()) 34 | 35 | for f in range(nb_frames): 36 | if p.game_over(): #check if the game is over 37 | p.reset_game() 38 | 39 | state = game.getGameState() 40 | action = agent.pickAction(state, reward) 41 | reward = p.act(action) -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/doom/assets/cfg/deathmatch.cfg: -------------------------------------------------------------------------------- 1 | # Rendering options 2 | screen_resolution = RES_320X240 3 | render_hud = true 4 | render_crosshair = false 5 | render_weapon = true 6 | render_decals = false 7 | render_particles = false 8 | window_visible = true 9 | 10 | # make episodes finish after 4200 actions (tics) 11 | episode_timeout = 4200 12 | 13 | # Available buttons 14 | available_buttons = { 15 | ATTACK 16 | SPEED 17 | STRAFE 18 | 19 | MOVE_RIGHT 20 | MOVE_LEFT 21 | MOVE_BACKWARD 22 | MOVE_FORWARD 23 | TURN_RIGHT 24 | TURN_LEFT 25 | 26 | SELECT_WEAPON1 27 | SELECT_WEAPON2 28 | SELECT_WEAPON3 29 | SELECT_WEAPON4 30 | SELECT_WEAPON5 31 | SELECT_WEAPON6 32 | 33 | SELECT_NEXT_WEAPON 34 | SELECT_PREV_WEAPON 35 | 36 | LOOK_UP_DOWN_DELTA 37 | TURN_LEFT_RIGHT_DELTA 38 | MOVE_LEFT_RIGHT_DELTA 39 | } 40 | 41 | # Game variables that will be in the state 42 | available_game_variables = { 43 | KILLCOUNT 44 | HEALTH 45 | ARMOR 46 | SELECTED_WEAPON 47 | SELECTED_WEAPON_AMMO 48 | } 49 | 50 | mode = PLAYER 51 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Batchu Vishal 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Aydano Machado 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/agents/Maze.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ple import PLE 3 | from ple.games.raycastmaze import RaycastMaze 4 | 5 | 6 | class NaiveAgent(): 7 | """ 8 | This is our naive agent. It picks actions at random! 9 | """ 10 | 11 | def __init__(self, actions): 12 | self.actions = actions 13 | 14 | def pickAction(self, reward, obs): 15 | return self.actions[np.random.randint(0, len(self.actions))] 16 | 17 | ################################### 18 | game = RaycastMaze( 19 | map_size=6 20 | ) # create our game 21 | 22 | fps = 30 # fps we want to run at 23 | frame_skip = 2 24 | num_steps = 1 25 | force_fps = False # slower speed 26 | display_screen = True 27 | 28 | reward = 0.0 29 | max_noops = 20 30 | nb_frames = 15000 31 | 32 | # make a PLE instance. 33 | p = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, 34 | force_fps=force_fps, display_screen=display_screen) 35 | 36 | # our Naive agent! 37 | agent = NaiveAgent(p.getActionSet()) 38 | 39 | # init agent and game. 40 | p.init() 41 | 42 | # lets do a random number of NOOP's 43 | for i in range(np.random.randint(0, max_noops)): 44 | reward = p.act(p.NOOP) 45 | 46 | # start our training loop 47 | for f in range(nb_frames): 48 | # if the game is over 49 | if p.game_over(): 50 | p.reset_game() 51 | 52 | obs = p.getScreenRGB() 53 | action = agent.pickAction(reward, obs) 54 | reward = p.act(action) 55 | 56 | if f % 50 == 0: 57 | p.saveScreen("screen_capture.png") 58 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/onBoard.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | import pygame 3 | 4 | 5 | class OnBoard(pygame.sprite.Sprite): 6 | ''' 7 | This class defines all inanimate objects that we need to display on our board. 8 | Any object that is on the board and not a person, comes under this class (ex. Coins,Ladders,Walls etc) 9 | Sets up the image and its position for all its child classes. 10 | ''' 11 | 12 | def __init__(self, raw_image, position): 13 | pygame.sprite.Sprite.__init__(self) 14 | self.__position = position 15 | self.image = raw_image 16 | self.image = pygame.transform.scale(self.image, 17 | (15, 15)) # Image and Rect required for the draw function on sprites 18 | self.rect = self.image.get_rect() 19 | self.rect.center = self.__position 20 | 21 | # Getters and Setters 22 | def setCenter(self, position): 23 | self.rect.center = position 24 | 25 | def getPosition(self): 26 | return self.__position 27 | 28 | def setPosition(self, position): 29 | self.__position = position 30 | 31 | # Update Image, this is an abstract method, needs to be implemented in the 32 | # subclass with whatever size required 33 | def updateImage(self, raw_image): # Abstract Method 34 | raise NotImplementedError("Subclass must implement this") 35 | 36 | # Modify the size of the image 37 | def modifySize(self, raw_image, height, width): 38 | self.image = raw_image 39 | self.image = pygame.transform.scale(self.image, (width, height)) 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aprendizagem de Máquina - IC/UFAL 2 | 3 | Repositório da disciplina de Aprendizagem de Máquina dos cursos de Ciência da Computação e Engenharia da Computação do Instituto de Computação da Universidade Federal de Alagoas (IC/UFAL). 4 | 5 | A disciplina tem como objetivo apresentar os principais paradigmas de aprendizagem de máquina, incluindo uma variedade de algoritmos e técnicas. Ela também contempla uma gama de atividades práticas onde os principais algoritmos serão implementados e testados em dados sintéticos e reais. Fornecendo uma visão mais próxima da resolução de problemas e aplicação da aprendizagem de máquina. 6 | 7 | Se você está cursando a disciplina, nós encorajamos todas as atividades que fortaleçam o seu aprendizado. Tais como, formar grupos de estudo e discutir os assuntos ministrados e exercícios/atividades etc. No entanto, as respostas e soluções que você envia devem ser o seu próprio trabalho. Para os exercícios de programação, você pode discutir com outros alunos, discutir algoritmos específicos, propriedades de algoritmos, etc., pedimos apenas que você não olhe para nenhum código-fonte escrito por outra pessoa, nem passe sua solução para outro aluno. 8 | 9 | Para tudo que nos é enviado, assumimos que você está seguindo o código de honra a seguir. 10 | 11 | ## Código de Honra 12 | 13 | >"Como membro da comunidade deste curso, não vou participar nem tolerar a desonestidade acadêmica". 14 | 15 | ## Licença 16 | 17 | Todas as soluções desenvolvidas por mim para este repositório estão sob a licença MIT. Vide arquivo LICENSE para mais detalhes. 18 | 19 | ## Copyright 20 | 21 | Copyright (c) 2018 [Aydano Machado](http://www.aydanomachado.com). 22 | -------------------------------------------------------------------------------- /01_Preprocessing/diabetes_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Atividade para trabalhar o pré-processamento dos dados. 5 | 6 | Criação de modelo preditivo para diabetes e envio para verificação de peformance 7 | no servidor. 8 | 9 | @author: Aydano Machado 10 | """ 11 | 12 | import pandas as pd 13 | from sklearn.neighbors import KNeighborsClassifier 14 | import requests 15 | 16 | print('\n - Lendo o arquivo com o dataset sobre diabetes') 17 | data = pd.read_csv('diabetes_dataset.csv') 18 | 19 | # Criando X and y par ao algorítmo de aprendizagem de máquina.\ 20 | print(' - Criando X e y para o algoritmo de aprendizagem a partir do arquivo diabetes_dataset') 21 | # Caso queira modificar as colunas consideradas basta algera o array a seguir. 22 | feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 23 | 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'] 24 | X = data[feature_cols] 25 | y = data.Outcome 26 | 27 | # Ciando o modelo preditivo para a base trabalhada 28 | print(' - Criando modelo preditivo') 29 | neigh = KNeighborsClassifier(n_neighbors=3) 30 | neigh.fit(X, y) 31 | 32 | #realizando previsões com o arquivo de 33 | print(' - Aplicando modelo e enviando para o servidor') 34 | data_app = pd.read_csv('diabetes_app.csv') 35 | data_app = data_app[feature_cols] 36 | y_pred = neigh.predict(data_app) 37 | 38 | # Enviando previsões realizadas com o modelo para o servidor 39 | URL = "https://aydanomachado.com/mlclass/01_Preprocessing.php" 40 | 41 | #TODO Substituir pela sua chave aqui 42 | DEV_KEY = "COLOCAR_SUA_KEY_AQUI" 43 | 44 | # json para ser enviado para o servidor 45 | data = {'dev_key':DEV_KEY, 46 | 'predictions':pd.Series(y_pred).to_json(orient='values')} 47 | 48 | # Enviando requisição e salvando o objeto resposta 49 | r = requests.post(url = URL, data = data) 50 | 51 | # Extraindo e imprimindo o texto da resposta 52 | pastebin_url = r.text 53 | print(" - Resposta do servidor:\n", r.text, "\n") -------------------------------------------------------------------------------- /01_Preprocessing/diabetes_xlsx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Atividade para trabalhar o pré-processamento dos dados. 5 | 6 | Criação de modelo preditivo para diabetes e envio para verificação de peformance 7 | no servidor. 8 | 9 | @author: Aydano Machado 10 | """ 11 | 12 | import pandas as pd 13 | from sklearn.neighbors import KNeighborsClassifier 14 | import requests 15 | 16 | print('\n - Lendo o arquivo com o dataset sobre diabetes') 17 | data = pd.read_excel('diabetes_dataset.xlsx') 18 | 19 | # Criando X and y par ao algorítmo de aprendizagem de máquina.\ 20 | print(' - Criando X e y para o algoritmo de aprendizagem a partir do arquivo diabetes_dataset') 21 | # Caso queira modificar as colunas consideradas basta algera o array a seguir. 22 | feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 23 | 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'] 24 | X = data[feature_cols] 25 | y = data.Outcome 26 | 27 | # Ciando o modelo preditivo para a base trabalhada 28 | print(' - Criando modelo preditivo') 29 | neigh = KNeighborsClassifier(n_neighbors=3) 30 | neigh.fit(X, y) 31 | 32 | #realizando previsões com o arquivo de 33 | print(' - Aplicando modelo e enviando para o servidor') 34 | data_app = pd.read_excel('diabetes_app.xlsx') 35 | data_app = data_app[feature_cols] 36 | y_pred = neigh.predict(data_app) 37 | 38 | # Enviando previsões realizadas com o modelo para o servidor 39 | URL = "https://aydanomachado.com/mlclass/01_Preprocessing.php" 40 | 41 | #TODO Substituir pela sua chave aqui 42 | DEV_KEY = "COLOCAR_SUA_KEY_AQUI" 43 | 44 | # json para ser enviado para o servidor 45 | data = {'dev_key':DEV_KEY, 46 | 'predictions':pd.Series(y_pred).to_json(orient='values')} 47 | 48 | # Enviando requisição e salvando o objeto resposta 49 | r = requests.post(url = URL, data = data) 50 | 51 | # Extraindo e imprimindo o texto da resposta 52 | pastebin_url = r.text 53 | print(" - Resposta do servidor:\n", r.text, "\n") -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/coin.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | import pygame 3 | import os 4 | from .onBoard import OnBoard 5 | 6 | 7 | class Coin(OnBoard): 8 | """ 9 | This class defines all our coins. 10 | Each coin will increase our score by an amount of 'value' 11 | We animate each coin with 5 images 12 | A coin inherits from the OnBoard class since we will use it as an inanimate object on our board. 13 | """ 14 | 15 | def __init__(self, raw_image, position, _dir): 16 | OnBoard.__init__(self, raw_image, position) 17 | self.__coinAnimState = 0 # Initialize animation state to 0 18 | self.IMAGES = { 19 | "coin1": pygame.transform.scale(pygame.image.load(os.path.join(_dir, 'assets/coin1.png')), (15, 15)).convert_alpha(), 20 | "coin2": pygame.transform.scale(pygame.image.load(os.path.join(_dir, 'assets/coin2.png')), (15, 15)).convert_alpha(), 21 | "coin3": pygame.transform.scale(pygame.image.load(os.path.join(_dir, 'assets/coin3.png')), (15, 15)).convert_alpha(), 22 | "coin4": pygame.transform.scale(pygame.image.load(os.path.join(_dir, 'assets/coin4.png')), (15, 15)).convert_alpha(), 23 | "coin5": pygame.transform.scale(pygame.image.load(os.path.join(_dir, 'assets/coin5.png')), (15, 15)).convert_alpha() 24 | } 25 | 26 | # Update the image of the coin 27 | def updateImage(self, raw_image): 28 | self.image = raw_image 29 | 30 | # Animate the coin 31 | def animateCoin(self): 32 | self.__coinAnimState = (self.__coinAnimState + 1) % 25 33 | if self.__coinAnimState / 5 == 0: 34 | self.updateImage(self.IMAGES["coin1"]) 35 | if self.__coinAnimState / 5 == 1: 36 | self.updateImage(self.IMAGES["coin2"]) 37 | if self.__coinAnimState / 5 == 2: 38 | self.updateImage(self.IMAGES["coin3"]) 39 | if self.__coinAnimState / 5 == 3: 40 | self.updateImage(self.IMAGES["coin4"]) 41 | if self.__coinAnimState / 5 == 4: 42 | self.updateImage(self.IMAGES["coin5"]) 43 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/person.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | import pygame 3 | 4 | ''' 5 | This class defines all living things in the game, ex.Donkey Kong, Player etc 6 | Each of these objects can move in any direction specified. 7 | ''' 8 | 9 | 10 | class Person(pygame.sprite.Sprite): 11 | 12 | def __init__(self, raw_image, position, width, height): 13 | super(Person, self).__init__() 14 | self.width = width 15 | self.height = height 16 | self.__position = position 17 | self.image = raw_image 18 | self.image = pygame.transform.scale( 19 | self.image, (width, height)).convert_alpha() 20 | self.rect = self.image.get_rect() 21 | self.rect.center = self.__position 22 | 23 | ''' 24 | We set these as abstract methods since this class does not have a speed variable set, but we want all the child classes to 25 | set a movement speed and they should have setters and getters for this movement speed. 26 | ''' 27 | 28 | def getSpeed(self): # Abstract method 29 | raise NotImplementedError("Subclass must implement this") 30 | 31 | def setSpeed(self): # Abstract method 32 | raise NotImplementedError("Subclass must implement this") 33 | 34 | # Getters and Setters 35 | def setCenter(self, position): 36 | self.rect.center = position 37 | 38 | def getPosition(self): 39 | return self.__position 40 | 41 | def setPosition(self, position): 42 | self.__position = position 43 | 44 | # Move the person in the horizontal ("H") or vertical ("V") axis 45 | def updateWH(self, raw_image, direction, value, width, height): 46 | if direction == "H": 47 | self.__position = (self.__position[0] + value, self.__position[1]) 48 | if direction == "V": 49 | self.__position = (self.__position[0], self.__position[1] + value) 50 | self.image = raw_image 51 | # Update the image to the specified width and height 52 | #self.image = pygame.transform.scale(self.image, (width, height)) 53 | self.rect.center = self.__position 54 | 55 | # When you only need to update vertically 56 | def updateY(self, value): 57 | self.__position = (self.__position[0], self.__position[1] + value) 58 | self.rect.center = self.__position 59 | 60 | # Given a collider list, just check if the person instance collides with 61 | # any of them 62 | def checkCollision(self, colliderGroup): 63 | Colliders = pygame.sprite.spritecollide(self, colliderGroup, False) 64 | return Colliders 65 | 66 | # This is another abstract function, and it must be implemented in child 67 | # classes inheriting from this class 68 | def continuousUpdate(self, GroupList, GroupList2): 69 | # continuousUpdate that gets called frequently for collision checks, 70 | # movement etc 71 | raise NotImplementedError("Subclass must implement this") 72 | -------------------------------------------------------------------------------- /01_Preprocessing/README.md: -------------------------------------------------------------------------------- 1 | # Atividade 01 - Pré-processamento 2 | 3 | Para tudo que nos é enviado, assumimos que você está seguindo o código de honra a seguir. 4 | 5 | ## Código de Honra 6 | 7 | >"Como membro da comunidade deste curso, não vou participar nem tolerar a desonestidade acadêmica". 8 | 9 | ## Objetivo da atividade 10 | *Trabalhar o pré-processamento de dados para o algoritmo k-NN* 11 | 12 | ## Descrição da atividade 13 | Nesta atividade apresentamos duas versões de um mesmo programa em python que lê um banco de dados de mulheres descendentes do povo Pima. Que segundo a [Wikipédia](https://en.wikipedia.org/wiki/Pima_people): "Os Pima são um povo nativo dos Estados Unidos da América que viviam às margens dos rios Gila e Sal, na parte sul do estado de Arizona." 14 | 15 | O primeira versão do programa python (`diabetes_csv.py`) trabalha com arquivos de dados em formato csv e a segunda com os arquivos em formato Excel (`diabetes_xlsx.py`), deste modo você pode escolher o formato que preferir para trabalhar com os dados e fazer seu pré-processamento. 16 | 17 | A única modificação que precisa ser realizada no programa python é a inserção da chave enviada no local indicado, substituindo o texto entre aspas pela chave da equipe. 18 | 19 | ```python 20 | DEV_KEY = "COLOCAR_SUA_KEY_AQUI" 21 | ``` 22 | 23 | Feito isso o programa já está completo, porém ainda não funcional, pois existem erros nos dados que precisam ser pré-processados para que cumpram tudo que é demandado pelo algoritmo k-NN para que este funcione bem. 24 | 25 | A atividade da equipe consiste em pré-processar os dados, modificando os arquivos no formato escolhido, para que estes se encontrem da melhor maneira para o funcionamento do algoritmo k-NN. 26 | 27 | Supondo que o formato escolhido tenha sido o xlsx (ou Excel), o programa `diabetes_xlsx.py` lê o arquivo `diabetes_dataset.xlsx` e o armazena nos vetores `X` e `y`. Em seguida ele constrói o modelo preditivo utilizando o k-NN com um `k = 3` e utiliza esse modelo para classificar os dados encontrados no arquivo `diabetes_app.xlsx`. Tais previsões são enviadas para o servidor que vai contabilizar a acurácia conseguida com as previsões realizadas, para em seguida retornar e armazenar o melhor desempenho conseguido pela equipe. 28 | 29 | ## Descrição da base de dados 30 | 31 | Esse conjunto de dados, intitulado Pima Indians Diabetes Data Set, foi originalmente do National Institute of Diabetes and Digestive and Kidney Diseases, cujo objetivo é prever se o paciente tem diabetes. Os pacientes selecionados são mulheres com pelo menos 21 de herança indiana Prima. As informações da base de dados são descritas a seguir. 32 | 33 | #### Atributos do dataset: 34 | 1. **Pregnancies**: número de vezes grávida 35 | 2. **Glucose**: concentração plasmática de glicose a 2 horas em um teste oral de tolerância à glicose 36 | 3. **BloodPressure**: pressão arterial diastólica (mm Hg) 37 | 4. **SkinThickness**: espessura da dobra da pele do tríceps (mm) 38 | 5. **Insulin**: insulina sérica de 2 horas (mu U/ml) 39 | 6. **BMI**: índice de massa corporal (peso em kg / (altura em m) ^ 2) 40 | 7. **DiabetesPedigreeFunction**: função de pedigree do diabetes 41 | 8. **Age**: idade (anos) 42 | 9. **Outcome**: variável de classe (0 ou 1) para diabetes 43 | 44 | ## Instalando o python e suas dependências 45 | 46 | Uma maneira fácil de instalar a versão mais nova do python com todas as dependências necessárias para aprendizagem de máquina e manipulação de dados é baixar a distribuição [Anaconda](https://www.anaconda.com/download/). 47 | 48 | -------------------------------------------------------------------------------- /05_IOL_H1/README.md: -------------------------------------------------------------------------------- 1 | # Atividade 05 - Avaliação de classificadores 2 | 3 | Para tudo que nos é enviado, assumimos que você está seguindo o código de honra a seguir. 4 | 5 | ## Código de Honra 6 | 7 | >"Como membro da comunidade deste curso, não vou participar nem tolerar a desonestidade acadêmica". 8 | 9 | ## Objetivo da atividade 10 | *Trabalhar a metodologia e as técnicas para a avaliação de classificadores* 11 | 12 | ## Descrição da atividade 13 | A atividade da equipe consiste em construir e validar um modelo(s) preditivo(s) com o intuito de garantir o seu poder de generalização, utilizando a metodologia e técnicas vistas em sala de aula. 14 | 15 | Nessa atividade vocês poderão utilizar qualquer algoritmo de aprendizagem mesmo os ainda não vistos em sala de aula tais como: SVM, Redes Neurais, RBFs, etc. Um detalhe importante é que o entendimento do algoritmo é necessário pois as equipes melhores ranqueadas terão, como nas outras atividades, compartilhar o que foi aprendido e isso inclui o algoritmo utilizado. 16 | 17 | Para o envio da atividade poderão ser utilizados os mesmos modelos de programas para ler e enviar os resultados utilizados na Atividade 01 - Pré-processamento, fazendo as devidas modificações como por exemplo alterar a URL de envio para https://aydanomachado.com/mlclass/05_Validation.php. 18 | 19 | **Atenção:** nessa atividade só será permitido **1 envio a cada 12h** pois o objetivo é fazer uma boa validação do modelo antes desse ser enviado. 20 | 21 | Ainda nos mesmos moldes da Atividade 01 os arquivos `h1_dataset.xlsx` ou `h1_dataset.csv` devem ser utilizados para a construção e validação do modelo preditivo (classificador) e os arquivos `h1_app.xlsx` ou `h1_app.csv` utilizados para teste do modelo e as previsões enviadas para o servidor onde será registrado o desempenho do modelo construído, correspondente a sua acurácia. 22 | 23 | ## Descrição da base de dados 24 | 25 | Esse conjunto de dados foi estruturado a partir de um estudo sobre o implante de lente intraoculares para a correção da catarata senil utilizando a técnica de facoemulsificação. 26 | 27 | A base consiste de informações do paciente, da anatomia do olho do paciente bem como os dados da lente implantada. 28 | 29 | O objetivo é prever o sucesso da cirurgia de acordo com que foi planejado, representado pela coluna label. 30 | 31 | Para quem quiser uma leitura rápida para introduzir o assunto deixo dois liks da wikipedia, para uma visão mais detalhada e acurada procurar outras fontes: 32 | - Catarata ([Wikipédia, a enciclopédia livre](https://pt.wikipedia.org/wiki/Catarata)) 33 | - Cirurgia de catarata ([Wikipédia, a enciclopédia livre](https://pt.wikipedia.org/wiki/Cirurgia_de_catarata)) 34 | 35 | 36 | #### Atributos do dataset: 37 | 1. **Registro_x**: registo identificador **não deve ser usado para fazer a predição** 38 | 2. **Idade**: idade do paciente no momento da cirurgia 39 | 3. **Sexo**: M e F 40 | 4. **Modelo_x**: modelo da lente intraocular implantada 41 | 5. **SF**: constante representando o "fator cirurgião", tenta representar as variáveis não computadas diretamente para o resultado da cirurgia 42 | 6. **Dioptria**: grau da lente implantada 43 | 7. **ACD**: profundidade de câmara anterior do olho 44 | 8. **AL**: comprimento axial do olho 45 | 9. **WTW**: distância branco à brando do olho 46 | 10. **K1**: ceratometria do meridiano mais plano da superfície anterior do olho 47 | 11. **K2**: ceratometria do meridiano mais curvo da superfície anterior do olho 48 | 12. **K**: ceratometria média da superfície anterior do olho (média do K1 e K2) 49 | 13. **label**: 0 e 1, variável a ser predita, representa o resultado cirúrgico 50 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/player.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | from .person import Person 3 | 4 | ''' 5 | This class defines our player. 6 | It inherits from the Person class since a Player is also a person. 7 | We specialize the person by adding capabilities such as jump etc.. 8 | ''' 9 | 10 | 11 | class Player(Person): 12 | 13 | def __init__(self, raw_image, position, width, height): 14 | super(Player, self).__init__(raw_image, position, width, height) 15 | self.isJumping = 0 16 | self.onLadder = 0 17 | self.currentJumpSpeed = 0 18 | self.__gravity = 0.85 # Gravity affecting the jump velocity of the player 19 | self.__speed = 5 # Movement speed of the player 20 | 21 | # Getters and Setters 22 | def getSpeed(self): 23 | return self.__speed 24 | 25 | def setSpeed(self): 26 | return self.__speed 27 | 28 | # This manages the players jump 29 | # Only the player can jump (For the player's jump) 30 | def continuousUpdate(self, wallGroupList, ladderGroupList): 31 | # Only gets run when the player is not on the ladder 32 | if self.onLadder == 0: 33 | wallsCollided = self.checkCollision(wallGroupList) 34 | 35 | # If the player is not jumping 36 | if self.isJumping == 0: 37 | # We move down a little and check if we collide with anything 38 | self.updateY(2) 39 | laddersCollided = self.checkCollision(ladderGroupList) 40 | wallsCollided = self.checkCollision(wallGroupList) 41 | self.updateY(-2) 42 | # If we are not colliding with anything below, then we start a 43 | # jump with 0 speed so that we just fall down 44 | if len(wallsCollided) == 0 and len(laddersCollided) == 0: 45 | self.isJumping = 1 46 | self.currentJumpSpeed = 0 47 | 48 | # If the player is jumping 49 | if self.isJumping: 50 | if wallsCollided: 51 | # If you collide a wall while jumping and its below you, 52 | # then you stop the jump 53 | if wallsCollided[0].getPosition()[1] > self.getPosition()[ 54 | 1]: # wallsize/2 and charsize/2 and +1 55 | self.isJumping = 0 56 | self.setPosition(((self.getPosition()[0], wallsCollided[0].getPosition()[ 57 | 1] - (self.height + 1)))) # Wall size/2 and charactersize/2 and +1 58 | # print "HIT FLOOR" 59 | # If you collide a wall while jumping and its above you, 60 | # then you hit the ceiling so you make jump speed 0 so he 61 | # falls down 62 | elif wallsCollided[0].getPosition()[1] < self.getPosition()[1]: 63 | self.currentJumpSpeed = 0 64 | self.setPosition((self.getPosition()[0], wallsCollided[ 65 | 0].getPosition()[1] + (self.height + 1))) 66 | # print "HIT TOP" 67 | self.setCenter(self.getPosition()) 68 | # If he is still jumping (ie. hasnt touched the floor yet) 69 | if self.isJumping: 70 | # We move him down by the currentJumpSpeed 71 | self.updateY(-self.currentJumpSpeed) 72 | self.setCenter(self.getPosition()) 73 | self.currentJumpSpeed -= self.__gravity # Affect the jump speed with gravity 74 | if self.currentJumpSpeed < -8: 75 | self.currentJumpSpeed = -8 76 | -------------------------------------------------------------------------------- /03_Validation/README.md: -------------------------------------------------------------------------------- 1 | # Atividade 03 - Avaliação de classificadores 2 | 3 | Para tudo que nos é enviado, assumimos que você está seguindo o código de honra a seguir. 4 | 5 | ## Código de Honra 6 | 7 | >"Como membro da comunidade deste curso, não vou participar nem tolerar a desonestidade acadêmica". 8 | 9 | ## Objetivo da atividade 10 | *Trabalhar a metodologia e as técnicas para a avaliação de classificadores* 11 | 12 | ## Descrição da atividade 13 | A atividade da equipe consiste em construir e validar um modelo(s) preditivo(s) com o intuito de garantir o seu poder de generalização, utilizando a metodologia e técnicas vistas em sala de aula. 14 | 15 | Nessa atividade vocês poderão utilizar qualquer algoritmo de aprendizagem mesmo os ainda não vistos em sala de aula tais como: SVM, Redes Neurais, RBFs, etc. Um detalhe importante é que o entendimento do algoritmo é necessário pois as equipes melhores ranqueadas terão, como nas outras atividades, compartilhar o que foi aprendido e isso inclui o algoritmo utilizado. 16 | 17 | Para o envio da atividade poderão ser utilizados os mesmos modelos de programas para ler e enviar os resultados utilizados na Atividade 01 - Pré-processamento, fazendo as devidas modificações como por exemplo alterar a URL de envio para https://aydanomachado.com/mlclass/03_Validation.php. 18 | 19 | **Atenção:** nessa atividade só será permitido **1 envio a cada 12h** pois o objetivo é fazer uma boa validação do modelo antes desse ser enviado. 20 | 21 | Ainda nos mesmos moldes da Atividade 01 os arquivos `abalone_dataset.xlsx` ou `abalone_dataset.csv` devem ser utilizados para a construção e validação do modelo preditivo (classificador) e os arquivos `abalone_app.xlsx` ou `abalone_app.csv` utilizados para teste do modelo e as previsões enviadas para o servidor onde será registrado o desempenho do modelo construído, correspondente a sua acurácia. 22 | 23 | ## Descrição da base de dados 24 | 25 | Esse conjunto de dados foi modificado a partir da base encontrada no [UCI Machine Learning Repository: Abalone Data Set](http://archive.ics.uci.edu/ml/datasets/Abalone). 26 | Que foi originalmente utilizado no estudo Warwick J Nash, Tracy L Sellers, Simon R Talbot, Andrew J Cawthorn and Wes B Ford (1994) "The Population Biology of Abalone (Haliotis species) in Tasmania. I. Blacklip Abalone (H. rubra) from the North Coast and Islands of Bass Strait", Sea Fisheries Division, Technical Report No. 48 (ISSN 1034-3288). 27 | 28 | A base consiste de informações de um molusco chamado Abalone, e o objetivo do classificador é identificar o tipo do exemplar (entre as classes I, II e III) utilizando as informações fornecidas e detalhadas a seguir. 29 | 30 | #### Abalone 31 | (Origem: [Wikipédia, a enciclopédia livre](https://pt.wikipedia.org/wiki/Abalone)) 32 | "Haliotis (popularmente conhecidos em português e inglês por abalone, também em inglês por ear shell ou ormer, em espanhol por oreja de mar e abulone, em francês por oreille de mer, em italiano por abaloni e em alemão por seeohren) é um gênero de moluscos gastrópodes marinhos da família Haliotidae e o único gênero catalogado desta família. Foi proposto por Linnaeus em 1758 e contém diversas espécies em águas costeiras de quase todo o mundo. Na gastronomia, o abalone é um molusco valorizado em países asiáticos. Suas dimensões variam de dois a trinta centímetros." 33 | 34 | #### Atributos do dataset: 35 | 1. **Sex**: M, F e I (infantil) 36 | 2. **Length**: maior medida em mm da concha 37 | 3. **Diameter**: diametro em mm perpendicular a medida Length 38 | 4. **Height**: altura em mm com a carne dentro da concha 39 | 5. **Whole weight**: peso em gramas de toda a abalone 40 | 6. **Shucked weight**: peso em gramas da carne 41 | 7. **Viscera weight**: peso em gramas das víceras após escorrer 42 | 8. **Shell weight**: peso em gramas para a concha após estar seca 43 | 9. **Type**: variável de classe (1, 2 ou 3) para o abalone 44 | -------------------------------------------------------------------------------- /02_Optimization/README.md: -------------------------------------------------------------------------------- 1 | # Atividade 02 - Otimização de antena para padrão de radiação 2 | 3 | Para tudo que nos é enviado, assumimos que você está seguindo o código de honra a seguir. 4 | 5 | ## Código de Honra 6 | 7 | >"Como membro da comunidade deste curso, não vou participar nem tolerar a desonestidade acadêmica". 8 | 9 | ## Objetivo da atividade 10 | *Trabalhar os algorítmos de busca de melhoria iterativa para problemas de otmização* 11 | 12 | ## Descrição da atividade 13 | Esta atividade consiste na construção de agentes que têm como objetivo encontrar a melhor configuração de uma antena de modo a conseguir o maior ganho com o padrão de radiação emitido/recebido pela sua disposição. 14 | 15 | Para tal são sugeridos a implementação dos algorítmos de Subida da Encosta (Hill Climbing) e suas variações, bem como os algoritmos da Têmpera Simulada (Simulated Annealing) e os Algorítmos Genéticos (Genetic Algorithm). 16 | 17 | Existem vários exemplos da aplicação dessa classe de algoritmos na resolução de diversos problemas práticos, como o do projeto automático de antenas para a espaçonave ST5 da NASA (Figura 1). 18 | 19 | O formato da antena apresentado na Figura 1 foi encontrado por um programa de desenho evolutivo, de forma a criar o melhor padrão de radiação possível. O complicado formato apresentado dificilmente seria desenhado por um humano e o resultado dos algorítmos implementados conseguiu apresentar os melhores resultados, superando o desempenho dos projetistas humanos. 20 | 21 |

22 | Antena 23 |
24 | Figura 1 - Antena criada para a espaçonave ST5 da NASA. 25 |

26 | 27 | ## Abstração, representação, descrição e teste da antena 28 | A antena que iremos manipular será um modelo simplificado e será representado utilizando como inspiração o sistema esférico de coordenadas, representado na Figura 2. 29 | 30 | Um ponto P(r, φ, θ) nesse sistema esférico de coordenadas é composto de um raio r e mais dois ângulos φ e θ que permitem localizar esse ponto, tal como indicado na Figura 2. 31 | 32 |

33 | Coordenadas Esféricas 34 |
35 | Figura 2 - Sistema esférico de coordenadas. 36 |

37 | 38 | Nossa antena será composta de segmentos de mesmo tamanho, desse modo o valor de r não vai variar para cada parte dela. E assim as variações do design ou projeto serão somente nos pares de ângulos (φ, θ) para cada junção. 39 | 40 | A antenha será composta de 3(três) junções e cada ângulo será representado em graus inteiros variando no intervalo [0º, 360º) 41 | 42 | Com os 3(três) pares de angulos definidos esses poderão ser submetidos para teste pode meio da url descrita a seguir, substituindo os ___ pelo valor do respectivo ângulo dentro do intervalo [0;360) 43 | 44 | (Obs.: Será mostrado como inicializar o servidor na próxima seção) 45 | 46 | ``` 47 | /antenna/simulate?phi1=___&theta1=___&phi2=___&theta2=___&phi3=___&theta3=___ 48 | ``` 49 | Exemplo de requisição: 50 | 51 | ``` 52 | http://localhost:8080/antenna/simulate?phi1=90&theta1=90&phi2=90&theta2=90&phi3=90&theta3=90 53 | ``` 54 | 55 | Caso a requisição esteja formatada corretamente o servidor retornará o ganho conseguido pela antena (primeira linha), bem como os ângulos utilizados (nas demais linhas), tal como no exemplo descrito a seguir. 56 | 57 | Exemplo de resposta: 58 | ``` 59 | -6.146398388793574 60 | phi1 = 90 61 | theta1 = 90 62 | phi2 = 90 63 | theta2 = 90 64 | phi3 = 90 65 | theta3 = 90 66 | ``` 67 | 68 | Tal como já mencionado anteriormente o objetivo é obter o maior ganho, ou seja, o mair valor representado na primeira linha da resposta do teste da antena. 69 | 70 | ## Inicializando o ambiente para teste da antena 71 | 72 | Para inicializar o ambiente você vai precisar do Java instalado. Em seguida baixe o arquivo `OPServer.jar` (deste repositório), abra o Terminal (no linux ou mac) ou a Linha de Comando (no Windows) e execute o seguinte comando: 73 | 74 | ``` 75 | java -jar OPServer.jar 76 | ``` 77 | 78 | Isso vai inicializar o ambiente de simulação que foi implementado sob a forma de um servidor web em Java para que este receba as requisições, comentadas anteriormente, calcule o ganho conseguido pela antena e retorne o resultado. 79 | 80 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/r,java,linux,macos,python,windows 3 | 4 | ### Java ### 5 | # Compiled class file 6 | *.class 7 | 8 | # Log file 9 | *.log 10 | 11 | # BlueJ files 12 | *.ctxt 13 | 14 | # Mobile Tools for Java (J2ME) 15 | .mtj.tmp/ 16 | 17 | # Package Files # 18 | *.jar 19 | *.war 20 | *.ear 21 | *.zip 22 | *.tar.gz 23 | *.rar 24 | 25 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 26 | hs_err_pid* 27 | 28 | ### Linux ### 29 | *~ 30 | 31 | # temporary files which can be created if a process still has a handle open of a deleted file 32 | .fuse_hidden* 33 | 34 | # KDE directory preferences 35 | .directory 36 | 37 | # Linux trash folder which might appear on any partition or disk 38 | .Trash-* 39 | 40 | # .nfs files are created when an open file is removed but is still being accessed 41 | .nfs* 42 | 43 | ### macOS ### 44 | *.DS_Store 45 | .AppleDouble 46 | .LSOverride 47 | 48 | # Icon must end with two \r 49 | Icon 50 | 51 | # Thumbnails 52 | ._* 53 | 54 | # Files that might appear in the root of a volume 55 | .DocumentRevisions-V100 56 | .fseventsd 57 | .Spotlight-V100 58 | .TemporaryItems 59 | .Trashes 60 | .VolumeIcon.icns 61 | .com.apple.timemachine.donotpresent 62 | 63 | # Directories potentially created on remote AFP share 64 | .AppleDB 65 | .AppleDesktop 66 | Network Trash Folder 67 | Temporary Items 68 | .apdisk 69 | 70 | ### Python ### 71 | # Byte-compiled / optimized / DLL files 72 | __pycache__/ 73 | *.py[cod] 74 | *$py.class 75 | 76 | # C extensions 77 | *.so 78 | 79 | # Distribution / packaging 80 | .Python 81 | build/ 82 | develop-eggs/ 83 | dist/ 84 | downloads/ 85 | eggs/ 86 | .eggs/ 87 | lib/ 88 | lib64/ 89 | parts/ 90 | sdist/ 91 | var/ 92 | wheels/ 93 | *.egg-info/ 94 | .installed.cfg 95 | *.egg 96 | 97 | # PyInstaller 98 | # Usually these files are written by a python script from a template 99 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 100 | *.manifest 101 | *.spec 102 | 103 | # Installer logs 104 | pip-log.txt 105 | pip-delete-this-directory.txt 106 | 107 | # Unit test / coverage reports 108 | htmlcov/ 109 | .tox/ 110 | .coverage 111 | .coverage.* 112 | .cache 113 | .pytest_cache/ 114 | nosetests.xml 115 | coverage.xml 116 | *.cover 117 | .hypothesis/ 118 | 119 | # Translations 120 | *.mo 121 | *.pot 122 | 123 | # Flask stuff: 124 | instance/ 125 | .webassets-cache 126 | 127 | # Scrapy stuff: 128 | .scrapy 129 | 130 | # Sphinx documentation 131 | docs/_build/ 132 | 133 | # PyBuilder 134 | target/ 135 | 136 | # Jupyter Notebook 137 | .ipynb_checkpoints 138 | 139 | # pyenv 140 | .python-version 141 | 142 | # celery beat schedule file 143 | celerybeat-schedule.* 144 | 145 | # SageMath parsed files 146 | *.sage.py 147 | 148 | # Environments 149 | .env 150 | .venv 151 | env/ 152 | venv/ 153 | ENV/ 154 | env.bak/ 155 | venv.bak/ 156 | 157 | # Spyder project settings 158 | .spyderproject 159 | .spyproject 160 | 161 | # Rope project settings 162 | .ropeproject 163 | 164 | # mkdocs documentation 165 | /site 166 | 167 | # mypy 168 | .mypy_cache/ 169 | 170 | ### R ### 171 | # History files 172 | .Rhistory 173 | .Rapp.history 174 | 175 | # Session Data files 176 | .RData 177 | 178 | # Example code in package build process 179 | *-Ex.R 180 | 181 | # Output files from R CMD build 182 | /*.tar.gz 183 | 184 | # Output files from R CMD check 185 | /*.Rcheck/ 186 | 187 | # RStudio files 188 | .Rproj.user/ 189 | 190 | # produced vignettes 191 | vignettes/*.html 192 | vignettes/*.pdf 193 | 194 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 195 | .httr-oauth 196 | 197 | # knitr and R markdown default cache directories 198 | /*_cache/ 199 | /cache/ 200 | 201 | # Temporary files created by R markdown 202 | *.utf8.md 203 | *.knit.md 204 | 205 | ### Windows ### 206 | # Windows thumbnail cache files 207 | Thumbs.db 208 | ehthumbs.db 209 | ehthumbs_vista.db 210 | 211 | # Folder config file 212 | Desktop.ini 213 | 214 | # Recycle Bin used on file shares 215 | $RECYCLE.BIN/ 216 | 217 | # Windows Installer files 218 | *.cab 219 | *.msi 220 | *.msm 221 | *.msp 222 | 223 | # Windows shortcuts 224 | *.lnk 225 | 226 | 227 | # End of https://www.gitignore.io/api/r,java,linux,macos,python,windows 228 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/base/doomwrapper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | import pygame 5 | 6 | try: 7 | #ty @ gdb & ppaquette 8 | import doom_py 9 | import doom_py.vizdoom as vizdoom 10 | except ImportError: 11 | raise ImportError("Please install doom_py.") 12 | 13 | class DoomWrapper(object): 14 | 15 | 16 | def __init__(self, width, height, cfg_file, scenario_file): 17 | 18 | self.doom_game = doom_py.DoomGame() 19 | self._loader = doom_py.Loader() 20 | 21 | #make most sense to keep cfg and wads together. 22 | #which is why we ship them all together 23 | self.cfg_file = cfg_file 24 | self.scenario_file = self._loader.get_scenario_path(scenario_file) 25 | 26 | self.freedom_file = self._loader.get_freedoom_path() 27 | self.vizdoom_file = self._loader.get_vizdoom_path() 28 | 29 | self.state = None 30 | self.num_actions = 0 31 | self.action = None 32 | self.NOOP = [0]*40 33 | 34 | self.height = height 35 | self.width = width 36 | self.screen_dim = (width, height) 37 | self.allowed_fps = None 38 | self.rng = None 39 | 40 | self._window = DoomWindow(width, height) 41 | 42 | def _setup(self): 43 | self.doom_game.set_screen_format(vizdoom.ScreenFormat.BGR24) 44 | 45 | #load the cfg 46 | self.doom_game.load_config(self.cfg_file) 47 | 48 | self.doom_game.set_vizdoom_path(self.vizdoom_file) 49 | self.doom_game.set_doom_game_path(self.freedom_file) 50 | self.doom_game.set_doom_scenario_path(self.scenario_file) 51 | self.doom_game.set_window_visible(False) #we use our own window... 52 | 53 | self.doom_game.init() 54 | 55 | self.num_actions = self.doom_game.get_available_buttons_size() 56 | 57 | self.actions = [] 58 | for i in range(self.num_actions): 59 | action = [0]*self.num_actions 60 | action[i] = 1 61 | self.actions.append(action) 62 | 63 | def _setAction(self, action, last_action): 64 | #make the game perform the action 65 | self.action = action 66 | 67 | def _draw_frame(self, draw_screen): 68 | if draw_screen: 69 | self._window.show_frame(self.getScreenRGB()) 70 | 71 | def setRNG(self, rng): 72 | if isinstance(rng, int): 73 | self.rng = rng 74 | self.doom_game.set_seed(rng) 75 | else: 76 | raise ValueError("ViZDoom needs an int passed as rng") 77 | 78 | def getScreenRGB(self): 79 | return self.state.image_buffer.copy() 80 | 81 | def tick(self, fps): 82 | time.sleep(1.0/fps) #sleep a bit here (in seconds) 83 | return fps 84 | 85 | def adjustRewards(self, rewards): 86 | if "tick" in rewards: 87 | self.doom_game.set_living_reward(rewards["tick"]) 88 | 89 | if "loss" in rewards: 90 | self.doom_game.set_death_penalty(rewards["loss"]) 91 | 92 | def getGameState(self): 93 | return self.doom_game.get_state().game_variables 94 | 95 | def getScreenDims(self): 96 | return self.screen_dim 97 | 98 | def getActions(self): 99 | return self.actions 100 | 101 | def init(self): 102 | self.action = None 103 | self.doom_game.new_episode() 104 | self.state = self.doom_game.get_state() 105 | 106 | def reset(self): 107 | self.init() 108 | 109 | def getScore(self): 110 | return self.doom_game.get_total_reward() 111 | 112 | def game_over(self): 113 | return self.doom_game.is_episode_finished() 114 | 115 | def _handle_window_events(self): 116 | for event in pygame.event.get(): 117 | if event.type == pygame.QUIT: 118 | self.doom_game.close() #doom quit 119 | pygame.quit() #close window 120 | sys.exit() #close game 121 | 122 | def step(self, dt): 123 | self._handle_window_events() 124 | 125 | self.state = self.doom_game.get_state() 126 | 127 | if self.action is None: 128 | _ = self.doom_game.make_action(self.NOOP) 129 | else: 130 | _ = self.doom_game.make_action(self.action) 131 | 132 | class DoomWindow(object): 133 | 134 | def __init__(self, width, height): 135 | self.width = width 136 | self.height = height 137 | 138 | pygame.init() 139 | self.window = pygame.display.set_mode( (self.width, self.height), pygame.DOUBLEBUF, 24 ) 140 | pygame.display.set_caption("PLE ViZDoom") 141 | 142 | def show_frame(self, frame): 143 | frame = np.rollaxis(frame, 0, 2) #its HEIGHT, WIDTH, 3 144 | pygame.surfarray.blit_array(self.window, frame) 145 | pygame.display.update() 146 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/primitives.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import math 3 | from .utils.vec2d import vec2d 4 | 5 | 6 | class Creep(pygame.sprite.Sprite): 7 | 8 | def __init__(self, 9 | color, 10 | radius, 11 | pos_init, 12 | dir_init, 13 | speed, 14 | reward, 15 | TYPE, 16 | SCREEN_WIDTH, 17 | SCREEN_HEIGHT, 18 | jitter_speed): 19 | 20 | pygame.sprite.Sprite.__init__(self) 21 | 22 | self.SCREEN_WIDTH = SCREEN_WIDTH 23 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 24 | self.TYPE = TYPE 25 | self.jitter_speed = jitter_speed 26 | self.speed = speed 27 | self.reward = reward 28 | self.radius = radius 29 | self.pos = vec2d(pos_init) 30 | 31 | self.direction = vec2d(dir_init) 32 | self.direction.normalize() # normalized 33 | 34 | image = pygame.Surface((radius * 2, radius * 2)) 35 | image.fill((0, 0, 0)) 36 | image.set_colorkey((0, 0, 0)) 37 | 38 | pygame.draw.circle( 39 | image, 40 | color, 41 | (radius, radius), 42 | radius, 43 | 0 44 | ) 45 | 46 | self.image = image.convert() 47 | self.rect = self.image.get_rect() 48 | self.rect.center = pos_init 49 | 50 | def update(self, dt): 51 | 52 | dx = self.direction.x * self.speed * dt 53 | dy = self.direction.y * self.speed * dt 54 | 55 | if self.pos.x + dx > self.SCREEN_WIDTH - self.radius: 56 | self.pos.x = self.SCREEN_WIDTH - self.radius 57 | self.direction.x = -1 * self.direction.x * \ 58 | (1 + 0.5 * self.jitter_speed) # a little jitter 59 | elif self.pos.x + dx <= self.radius: 60 | self.pos.x = self.radius 61 | self.direction.x = -1 * self.direction.x * \ 62 | (1 + 0.5 * self.jitter_speed) # a little jitter 63 | else: 64 | self.pos.x = self.pos.x + dx 65 | 66 | if self.pos.y + dy > self.SCREEN_HEIGHT - self.radius: 67 | self.pos.y = self.SCREEN_HEIGHT - self.radius 68 | self.direction.y = -1 * self.direction.y * \ 69 | (1 + 0.5 * self.jitter_speed) # a little jitter 70 | elif self.pos.y + dy <= self.radius: 71 | self.pos.y = self.radius 72 | self.direction.y = -1 * self.direction.y * \ 73 | (1 + 0.5 * self.jitter_speed) # a little jitter 74 | else: 75 | self.pos.y = self.pos.y + dy 76 | 77 | self.direction.normalize() 78 | 79 | self.rect.center = ((self.pos.x, self.pos.y)) 80 | 81 | 82 | class Wall(pygame.sprite.Sprite): 83 | 84 | def __init__(self, pos, w, h): 85 | pygame.sprite.Sprite.__init__(self) 86 | 87 | self.pos = vec2d(pos) 88 | self.w = w 89 | self.h = h 90 | 91 | image = pygame.Surface([w, h]) 92 | image.fill((10, 10, 10)) 93 | self.image = image.convert() 94 | 95 | self.rect = self.image.get_rect() 96 | self.rect.center = pos 97 | 98 | def draw(self, screen): 99 | pygame.draw.rect( 100 | screen, (10, 10, 10), [ 101 | self.pos.x, self.pos.y, self.w, self.h], 0) 102 | 103 | 104 | class Player(pygame.sprite.Sprite): 105 | 106 | def __init__(self, 107 | radius, 108 | color, 109 | speed, 110 | pos_init, 111 | SCREEN_WIDTH, 112 | SCREEN_HEIGHT): 113 | 114 | pygame.sprite.Sprite.__init__(self) 115 | 116 | self.SCREEN_WIDTH = SCREEN_WIDTH 117 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 118 | 119 | self.pos = vec2d(pos_init) 120 | self.vel = vec2d((0, 0)) 121 | 122 | image = pygame.Surface([radius * 2, radius * 2]) 123 | image.set_colorkey((0, 0, 0)) 124 | 125 | pygame.draw.circle( 126 | image, 127 | color, 128 | (radius, radius), 129 | radius, 130 | 0 131 | ) 132 | 133 | self.image = image.convert() 134 | self.rect = self.image.get_rect() 135 | self.radius = radius 136 | 137 | def update(self, dx, dy, dt): 138 | self.vel.x += dx 139 | self.vel.y += dy 140 | 141 | new_x = self.pos.x + self.vel.x * dt 142 | new_y = self.pos.y + self.vel.y * dt 143 | 144 | # if its not against a wall we want a total decay of 50 145 | if new_x >= self.SCREEN_WIDTH - self.radius * 2: 146 | self.pos.x = self.SCREEN_WIDTH - self.radius * 2 147 | self.vel.x = 0.0 148 | elif new_x < 0.0: 149 | self.pos.x = 0.0 150 | self.vel.x = 0.0 151 | else: 152 | self.pos.x = new_x 153 | self.vel.x = self.vel.x * 0.975 154 | 155 | if new_y > self.SCREEN_HEIGHT - self.radius * 2: 156 | self.pos.y = self.SCREEN_HEIGHT - self.radius * 2 157 | self.vel.y = 0.0 158 | elif new_y < 0.0: 159 | self.pos.y = 0.0 160 | self.vel.y = 0.0 161 | else: 162 | self.pos.y = new_y 163 | self.vel.y = self.vel.y * 0.975 164 | 165 | self.rect.center = (self.pos.x, self.pos.y) 166 | 167 | def draw(self, screen): 168 | screen.blit(self.image, self.rect.center) 169 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/fireball.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Erilyth' 2 | import pygame 3 | import math 4 | import os 5 | from .onBoard import OnBoard 6 | 7 | ''' 8 | This class defines all our fireballs. 9 | A fireball inherits from the OnBoard class since we will use it as an inanimate object on our board. 10 | Each fireball can check for collisions in order to decide when to turn and when they hit a player. 11 | ''' 12 | 13 | 14 | class Fireball(OnBoard): 15 | 16 | def __init__(self, raw_image, position, index, speed, rng, dir): 17 | super(Fireball, self).__init__(raw_image, position) 18 | # Set the fireball direction randomly 19 | self.rng = rng 20 | self.__direction = int(math.floor(self.rng.rand() * 100)) % 2 21 | self.index = index 22 | self.wallsBelow = [] 23 | self.laddersBelow = [] 24 | 25 | self.IMAGES = { 26 | "fireballright": pygame.transform.scale(pygame.image.load(os.path.join(dir, 'assets/fireballright.png')), (20, 20)).convert_alpha(), 27 | "fireballleft": pygame.transform.scale(pygame.image.load(os.path.join(dir, 'assets/fireballleft.png')), (20, 20)).convert_alpha() 28 | } 29 | # The newly spawned fireball is not falling 30 | self.__fall = 0 31 | # The speed of a fireball is set 32 | self.__speed = speed 33 | 34 | # Update the image of a fireball 35 | def updateImage(self, raw_image): 36 | self.image = raw_image 37 | 38 | # Getters and Setters for some private variables 39 | def getSpeed(self): 40 | return self.__speed 41 | 42 | def setSpeed(self, speed): 43 | self.__speed = speed 44 | 45 | def getFall(self): 46 | return self.__fall 47 | 48 | def getDirection(self): 49 | return self.__direction 50 | 51 | # Moves the fireball in the required direction 52 | def continuousUpdate(self, wallGroup, ladderGroup): 53 | 54 | # The fireball is falling 55 | if self.__fall == 1: 56 | # We move the fireball downwards with speed of self.__speed 57 | self.update(self.image, "V", self.__speed) 58 | if self.checkCollision(wallGroup, "V"): 59 | # We have collided with a wall below, so the fireball can stop 60 | # falling 61 | self.__fall = 0 62 | # Set the direction randomly 63 | self.__direction = int(math.floor(self.rng.rand() * 100)) % 2 64 | 65 | else: 66 | 67 | # While we are on the ladder, we use a probability of 4/20 to make 68 | # the fireball start falling 69 | if self.checkCollision(ladderGroup, "V") and len( 70 | self.checkCollision(wallGroup, "V")) == 0: 71 | randVal = int(math.floor(self.rng.rand() * 100)) % 20 72 | if randVal < 15: 73 | self.__fall = 0 74 | else: 75 | self.__fall = 1 76 | 77 | # We are at the edge of the floor so the fireball starts falling 78 | if len(self.checkCollision(ladderGroup, "V")) == 0 and len( 79 | self.checkCollision(wallGroup, "V")) == 0: 80 | self.__fall = 1 81 | 82 | # We are moving right, so update the fireball image to the right 83 | if self.__direction == 0: 84 | self.update(self.IMAGES["fireballright"], "H", self.__speed) 85 | # When we hit a wall, we change direction 86 | if self.checkCollision(wallGroup, "H"): 87 | self.__direction = 1 88 | self.update(self.image, "H", -self.__speed) 89 | 90 | # We are moving left, so update the fireball image to the left 91 | else: 92 | self.update(self.IMAGES["fireballleft"], "H", -self.__speed) 93 | # When we hit a wall, we change direction 94 | if self.checkCollision(wallGroup, "H"): 95 | self.__direction = 0 96 | self.update(self.image, "H", self.__speed) 97 | 98 | # Move the fireball in the required direction with the required value and 99 | # also set the image of the fireball 100 | def update(self, raw_image, direction, value): 101 | if direction == "H": 102 | self.setPosition( 103 | (self.getPosition()[0] + value, 104 | self.getPosition()[1])) 105 | self.image = raw_image 106 | if direction == "V": 107 | self.setPosition( 108 | (self.getPosition()[0], 109 | self.getPosition()[1] + value)) 110 | self.rect.center = self.getPosition() 111 | 112 | ''' 113 | We check for collisions in the direction in which we are moving if the parameter direction is "H". 114 | The way we do this is move a little forward in the direction in which we are moving, then check for collisions then move back to the original location 115 | We check for collisions below the fireball if the parameter direction is "V" 116 | We do this by moving down a little, then check for collisions then move back up to the original location 117 | ''' 118 | 119 | def checkCollision(self, colliderGroup, direction): 120 | if direction == "H": 121 | if self.__direction == 0: 122 | self.update(self.image, "H", self.__speed) # Right collision 123 | if self.__direction == 1: 124 | self.update(self.image, "H", -self.__speed) # Left collision 125 | Colliders = pygame.sprite.spritecollide(self, colliderGroup, False) 126 | if self.__direction == 0: 127 | self.update(self.image, "H", -self.__speed) # Right collision 128 | if self.__direction == 1: 129 | self.update(self.image, "H", self.__speed) # Left collision 130 | else: 131 | self.update(self.image, "V", self.__speed) # Bottom collision 132 | Colliders = pygame.sprite.spritecollide(self, colliderGroup, False) 133 | self.update(self.image, "V", -self.__speed) 134 | return Colliders 135 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/base/pygamewrapper.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import numpy as np 3 | from pygame.constants import KEYDOWN, KEYUP, K_F15 4 | 5 | 6 | class PyGameWrapper(object): 7 | """PyGameWrapper class 8 | 9 | ple.games.base.PyGameWrapper(width, height, actions={}) 10 | 11 | This :class:`PyGameWrapper` class sets methods all games require. It should be subclassed when creating new games. 12 | 13 | Parameters 14 | ---------- 15 | width: int 16 | The width of the game screen. 17 | 18 | height: int 19 | The height of the game screen. 20 | 21 | actions: dict 22 | Contains possible actions that the game responds too. The dict keys are used by the game, while the values are `pygame.constants` referring the keys. 23 | 24 | Possible actions dict: 25 | 26 | >>> from pygame.constants import K_w, K_s 27 | >>> actions = { 28 | >>> "up": K_w, 29 | >>> "down": K_s 30 | >>> } 31 | """ 32 | 33 | def __init__(self, width, height, actions={}): 34 | 35 | # Required fields 36 | self.actions = actions # holds actions 37 | 38 | self.score = 0.0 # required. 39 | self.lives = 0 # required. Can be 0 or -1 if not required. 40 | self.screen = None # must be set to None 41 | self.clock = None # must be set to None 42 | self.height = height 43 | self.width = width 44 | self.screen_dim = (width, height) # width and height 45 | self.allowed_fps = None # fps that the game is allowed to run at. 46 | self.NOOP = K_F15 # the noop key 47 | self.rng = None 48 | 49 | self.rewards = { 50 | "positive": 1.0, 51 | "negative": -1.0, 52 | "tick": 0, 53 | "loss": -5.0, 54 | "win": 5.0 55 | } 56 | 57 | def _setup(self): 58 | """ 59 | Setups up the pygame env, the display and game clock. 60 | """ 61 | pygame.init() 62 | self.screen = pygame.display.set_mode(self.getScreenDims(), 0, 32) 63 | self.clock = pygame.time.Clock() 64 | 65 | def _setAction(self, action, last_action): 66 | """ 67 | Pushes the action to the pygame event queue. 68 | """ 69 | if action is None: 70 | action = self.NOOP 71 | 72 | if last_action is None: 73 | last_action = self.NOOP 74 | 75 | kd = pygame.event.Event(KEYDOWN, {"key": action}) 76 | ku = pygame.event.Event(KEYUP, {"key": last_action}) 77 | 78 | pygame.event.post(kd) 79 | pygame.event.post(ku) 80 | 81 | def _draw_frame(self, draw_screen): 82 | """ 83 | Decides if the screen will be drawn too 84 | """ 85 | 86 | if draw_screen == True: 87 | pygame.display.update() 88 | 89 | def getScreenRGB(self): 90 | """ 91 | Returns the current game screen in RGB format. 92 | 93 | Returns 94 | -------- 95 | numpy uint8 array 96 | Returns a numpy array with the shape (width, height, 3). 97 | 98 | """ 99 | 100 | return pygame.surfarray.array3d( 101 | pygame.display.get_surface()).astype(np.uint8) 102 | 103 | def tick(self, fps): 104 | """ 105 | This sleeps the game to ensure it runs at the desired fps. 106 | """ 107 | return self.clock.tick_busy_loop(fps) 108 | 109 | def adjustRewards(self, rewards): 110 | """ 111 | 112 | Adjusts the rewards the game gives the agent 113 | 114 | Parameters 115 | ---------- 116 | rewards : dict 117 | A dictonary of reward events to float rewards. Only updates if key matches those specificed in the init function. 118 | 119 | """ 120 | for key in rewards.keys(): 121 | if key in self.rewards: 122 | self.rewards[key] = rewards[key] 123 | 124 | def setRNG(self, rng): 125 | """ 126 | Sets the rng for games. 127 | """ 128 | 129 | if self.rng is None: 130 | self.rng = rng 131 | 132 | def getGameState(self): 133 | """ 134 | Gets a non-visual state representation of the game. 135 | 136 | Returns 137 | ------- 138 | dict or None 139 | dict if the game supports it and None otherwise. 140 | 141 | """ 142 | return None 143 | 144 | def getScreenDims(self): 145 | """ 146 | Gets the screen dimensions of the game in tuple form. 147 | 148 | Returns 149 | ------- 150 | tuple of int 151 | Returns tuple as follows (width, height). 152 | 153 | """ 154 | return self.screen_dim 155 | 156 | def getActions(self): 157 | """ 158 | Gets the actions used within the game. 159 | 160 | Returns 161 | ------- 162 | list of `pygame.constants` 163 | 164 | """ 165 | return self.actions.values() 166 | 167 | def init(self): 168 | """ 169 | This is used to initialize the game, such reseting the score, lives, and player position. 170 | 171 | This is game dependent. 172 | 173 | """ 174 | raise NotImplementedError("Please override this method") 175 | 176 | def reset(self): 177 | """ 178 | Wraps the init() function, can be setup to reset certain poritions of the game only if needed. 179 | """ 180 | self.init() 181 | 182 | def getScore(self): 183 | """ 184 | Return the current score of the game. 185 | 186 | 187 | Returns 188 | ------- 189 | int 190 | The current reward the agent has received since the last init() or reset() call. 191 | """ 192 | raise NotImplementedError("Please override this method") 193 | 194 | def game_over(self): 195 | """ 196 | Gets the status of the game, returns True if game has hit a terminal state. False otherwise. 197 | 198 | This is game dependent. 199 | 200 | Returns 201 | ------- 202 | bool 203 | 204 | """ 205 | raise NotImplementedError("Please override this method") 206 | 207 | def step(self, dt): 208 | """ 209 | This method steps the game forward one step in time equal to the dt parameter. The game does not run unless this method is called. 210 | 211 | Parameters 212 | ---------- 213 | dt : integer 214 | This is the amount of time elapsed since the last frame in milliseconds. 215 | 216 | """ 217 | raise NotImplementedError("Please override this method") 218 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/monsterPerson.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Erilyth' 2 | import pygame 3 | import os 4 | from .person import Person 5 | 6 | ''' 7 | This class defines all the Monsters present in our game. 8 | Each Monster can only move on the top floor and cannot move vertically. 9 | ''' 10 | 11 | 12 | class MonsterPerson(Person): 13 | 14 | def __init__(self, raw_image, position, rng, dir, width=15, height=15): 15 | super(MonsterPerson, self).__init__(raw_image, position, width, height) 16 | self.__speed = 2 17 | self.rng = rng 18 | self.__direction = int(self.rng.rand() * 100) % 2 19 | self.__cycles = 0 20 | self.__stopDuration = 0 21 | self.IMAGES = { 22 | "monster0": pygame.image.load(os.path.join(dir, 'assets/monster0.png')).convert_alpha(), 23 | "monster1": pygame.image.load(os.path.join(dir, 'assets/monster1.png')).convert_alpha(), 24 | "monster2": pygame.image.load(os.path.join(dir, 'assets/monster2.png')).convert_alpha(), 25 | "monster3": pygame.image.load(os.path.join(dir, 'assets/monster3.png')).convert_alpha(), 26 | "monster01": pygame.image.load(os.path.join(dir, 'assets/monster01.png')).convert_alpha(), 27 | "monster11": pygame.image.load(os.path.join(dir, 'assets/monster11.png')).convert_alpha(), 28 | "monster21": pygame.image.load(os.path.join(dir, 'assets/monster21.png')).convert_alpha(), 29 | "monster31": pygame.image.load(os.path.join(dir, 'assets/monster31.png')).convert_alpha(), 30 | "monsterstill0": pygame.image.load(os.path.join(dir, 'assets/monsterstill0.png')).convert_alpha(), 31 | "monsterstill10": pygame.image.load(os.path.join(dir, 'assets/monsterstill10.png')).convert_alpha(), 32 | "monsterstill1": pygame.image.load(os.path.join(dir, 'assets/monsterstill1.png')).convert_alpha(), 33 | "monsterstill11": pygame.image.load(os.path.join(dir, 'assets/monsterstill11.png')).convert_alpha() 34 | } 35 | 36 | # Getters and Setters 37 | def getSpeed(self): 38 | return self.__speed 39 | 40 | def setSpeed(self): 41 | return self.__speed 42 | 43 | def getStopDuration(self): 44 | return self.__stopDuration 45 | 46 | def setStopDuration(self, stopDuration): 47 | self.__stopDuration = stopDuration 48 | 49 | # Checks for collisions with walls in order to change direction when hit 50 | # by a wall 51 | def checkWall(self, colliderGroup): 52 | if self.__direction == 0: 53 | # Right collision with wall 54 | self.updateWH(self.image, "H", 20, 40, 40) 55 | if self.__direction == 1: 56 | # Left collision with wall 57 | self.updateWH(self.image, "H", -20, 40, 40) 58 | Colliders = pygame.sprite.spritecollide(self, colliderGroup, False) 59 | if self.__direction == 0: 60 | # Right collision with wall 61 | self.updateWH(self.image, "H", -20, 40, 40) 62 | if self.__direction == 1: 63 | # Left collision with wall 64 | self.updateWH(self.image, "H", 20, 40, 40) 65 | return Colliders 66 | 67 | # This is used to animate the monster 68 | def continuousUpdate(self, GroupList, GroupList2): 69 | 70 | # If the stop duration is 0 then monster is currently moving either 71 | # left or right 72 | if self.__stopDuration == 0: 73 | 74 | # Currently moving right 75 | if self.__direction == 0: 76 | self.__cycles += 1 77 | if self.__cycles % 24 < 6: 78 | self.updateWH( 79 | self.IMAGES["monster0"], "H", self.__speed, 45, 45) 80 | elif self.__cycles % 24 < 12: 81 | self.updateWH( 82 | self.IMAGES["monster1"], "H", self.__speed, 45, 45) 83 | elif self.__cycles % 24 < 18: 84 | self.updateWH( 85 | self.IMAGES["monster2"], "H", self.__speed, 45, 45) 86 | else: 87 | self.updateWH( 88 | self.IMAGES["monster3"], "H", self.__speed, 45, 45) 89 | if self.checkWall(GroupList): 90 | self.__direction = 1 91 | self.__cycles = 0 92 | self.updateWH(self.image, "H", -self.__speed, 45, 45) 93 | 94 | # Currently moving left 95 | else: 96 | self.__cycles += 1 97 | if self.__cycles % 24 < 6: 98 | self.updateWH( 99 | self.IMAGES["monster01"], "H", -self.__speed, 45, 45) 100 | elif self.__cycles % 24 < 12: 101 | self.updateWH( 102 | self.IMAGES["monster11"], "H", -self.__speed, 45, 45) 103 | elif self.__cycles % 24 < 18: 104 | self.updateWH( 105 | self.IMAGES["monster21"], "H", -self.__speed, 45, 45) 106 | else: 107 | self.updateWH( 108 | self.IMAGES["monster31"], "H", -self.__speed, 45, 45) 109 | if self.checkWall(GroupList): 110 | self.__direction = 0 111 | self.__cycles = 0 112 | self.updateWH(self.image, "H", self.__speed, 45, 45) 113 | 114 | # Donkey Kong is currently not moving, which means he is launching a 115 | # fireball 116 | else: 117 | self.__stopDuration -= 1 118 | if self.__stopDuration == 0: # Once he finishes launching a fireball, we go back to our normal movement animation 119 | self.updateWH(self.image, "V", 12, 50, 50) 120 | if self.__stopDuration >= 10: 121 | if self.__direction == 0: 122 | self.updateWH(self.IMAGES["monsterstill0"], "H", 0, 45, 45) 123 | else: 124 | self.updateWH( 125 | self.IMAGES["monsterstill10"], "H", 0, 45, 45) 126 | elif self.__stopDuration >= 5: 127 | if self.__direction == 0: 128 | self.updateWH(self.IMAGES["monsterstill1"], "H", 0, 45, 45) 129 | else: 130 | self.updateWH( 131 | self.IMAGES["monsterstill11"], "H", 0, 45, 45) 132 | else: 133 | if self.__direction == 0: 134 | self.updateWH(self.IMAGES["monsterstill0"], "H", 0, 45, 45) 135 | else: 136 | self.updateWH( 137 | self.IMAGES["monsterstill10"], "H", 0, 45, 45) 138 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/catcher.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pygame 3 | from .utils import percent_round_int 4 | 5 | from ple.games import base 6 | from pygame.constants import K_a, K_d 7 | 8 | 9 | class Paddle(pygame.sprite.Sprite): 10 | 11 | def __init__(self, speed, width, height, SCREEN_WIDTH, SCREEN_HEIGHT): 12 | self.speed = speed 13 | self.width = width 14 | 15 | self.SCREEN_WIDTH = SCREEN_WIDTH 16 | self.vel = 0.0 17 | 18 | pygame.sprite.Sprite.__init__(self) 19 | 20 | image = pygame.Surface((width, height)) 21 | image.fill((0, 0, 0, 0)) 22 | image.set_colorkey((0, 0, 0)) 23 | 24 | pygame.draw.rect( 25 | image, 26 | (255, 255, 255), 27 | (0, 0, width, height), 28 | 0 29 | ) 30 | 31 | self.image = image 32 | self.rect = self.image.get_rect() 33 | self.rect.center = ( 34 | SCREEN_WIDTH / 2 - self.width / 2, 35 | SCREEN_HEIGHT - height - 3) 36 | 37 | def update(self, dx, dt): 38 | self.vel += dx 39 | self.vel *= 0.9 40 | 41 | x, y = self.rect.center 42 | n_x = x + self.vel 43 | 44 | if n_x <= 0: 45 | self.vel = 0.0 46 | n_x = 0 47 | 48 | if n_x + self.width >= self.SCREEN_WIDTH: 49 | self.vel = 0.0 50 | n_x = self.SCREEN_WIDTH - self.width 51 | 52 | self.rect.center = (n_x, y) 53 | 54 | def draw(self, screen): 55 | screen.blit(self.image, self.rect.center) 56 | 57 | 58 | class Fruit(pygame.sprite.Sprite): 59 | 60 | def __init__(self, speed, size, SCREEN_WIDTH, SCREEN_HEIGHT, rng): 61 | self.speed = speed 62 | self.size = size 63 | 64 | self.SCREEN_WIDTH = SCREEN_WIDTH 65 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 66 | 67 | self.rng = rng 68 | 69 | pygame.sprite.Sprite.__init__(self) 70 | 71 | image = pygame.Surface((size, size)) 72 | image.fill((0, 0, 0, 0)) 73 | image.set_colorkey((0, 0, 0)) 74 | 75 | pygame.draw.rect( 76 | image, 77 | (255, 120, 120), 78 | (0, 0, size, size), 79 | 0 80 | ) 81 | 82 | self.image = image 83 | self.rect = self.image.get_rect() 84 | self.rect.center = (-30, -30) 85 | 86 | def update(self, dt): 87 | x, y = self.rect.center 88 | n_y = y + self.speed * dt 89 | 90 | self.rect.center = (x, n_y) 91 | 92 | def reset(self): 93 | x = self.rng.choice( 94 | range( 95 | self.size * 96 | 2, 97 | self.SCREEN_WIDTH - 98 | self.size * 99 | 2, 100 | self.size)) 101 | y = self.rng.choice( 102 | range( 103 | self.size, 104 | int(self.SCREEN_HEIGHT / 2), 105 | self.size)) 106 | 107 | self.rect.center = (x, -1 * y) 108 | 109 | def draw(self, screen): 110 | screen.blit(self.image, self.rect.center) 111 | 112 | 113 | class Catcher(base.PyGameWrapper): 114 | """ 115 | Based on `Eder Santana`_'s game idea. 116 | 117 | .. _`Eder Santana`: https://github.com/EderSantana 118 | 119 | Parameters 120 | ---------- 121 | width : int 122 | Screen width. 123 | 124 | height : int 125 | Screen height, recommended to be same dimension as width. 126 | 127 | init_lives : int (default: 3) 128 | The number lives the agent has. 129 | 130 | """ 131 | 132 | def __init__(self, width=64, height=64, init_lives=3): 133 | 134 | actions = { 135 | "left": K_a, 136 | "right": K_d 137 | } 138 | 139 | base.PyGameWrapper.__init__(self, width, height, actions=actions) 140 | 141 | self.fruit_size = percent_round_int(height, 0.06) 142 | self.fruit_fall_speed = 0.00095 * height 143 | 144 | self.player_speed = 0.021 * width 145 | self.paddle_width = percent_round_int(width, 0.2) 146 | self.paddle_height = percent_round_int(height, 0.04) 147 | 148 | self.dx = 0.0 149 | self.init_lives = init_lives 150 | 151 | def _handle_player_events(self): 152 | self.dx = 0.0 153 | for event in pygame.event.get(): 154 | if event.type == pygame.QUIT: 155 | pygame.quit() 156 | sys.exit() 157 | 158 | if event.type == pygame.KEYDOWN: 159 | key = event.key 160 | 161 | if key == self.actions['left']: 162 | self.dx -= self.player_speed 163 | 164 | if key == self.actions['right']: 165 | self.dx += self.player_speed 166 | 167 | def init(self): 168 | self.score = 0 169 | self.lives = self.init_lives 170 | 171 | self.player = Paddle(self.player_speed, self.paddle_width, 172 | self.paddle_height, self.width, self.height) 173 | 174 | self.fruit = Fruit(self.fruit_fall_speed, self.fruit_size, 175 | self.width, self.height, self.rng) 176 | 177 | self.fruit.reset() 178 | 179 | def getGameState(self): 180 | """ 181 | Gets a non-visual state representation of the game. 182 | 183 | Returns 184 | ------- 185 | 186 | dict 187 | * player x position. 188 | * players velocity. 189 | * fruits x position. 190 | * fruits y position. 191 | 192 | See code for structure. 193 | 194 | """ 195 | state = { 196 | "player_x": self.player.rect.center[0], 197 | "player_vel": self.player.vel, 198 | "fruit_x": self.fruit.rect.center[0], 199 | "fruit_y": self.fruit.rect.center[1] 200 | } 201 | 202 | return state 203 | 204 | def getScore(self): 205 | return self.score 206 | 207 | def game_over(self): 208 | return self.lives == 0 209 | 210 | def step(self, dt): 211 | self.screen.fill((0, 0, 0)) 212 | self._handle_player_events() 213 | 214 | self.score += self.rewards["tick"] 215 | 216 | if self.fruit.rect.center[1] >= self.height: 217 | self.score += self.rewards["negative"] 218 | self.lives -= 1 219 | self.fruit.reset() 220 | 221 | if pygame.sprite.collide_rect(self.player, self.fruit): 222 | self.score += self.rewards["positive"] 223 | self.fruit.reset() 224 | 225 | self.player.update(self.dx, dt) 226 | self.fruit.update(dt) 227 | 228 | if self.lives == 0: 229 | self.score += self.rewards["loss"] 230 | 231 | self.player.draw(self.screen) 232 | self.fruit.draw(self.screen) 233 | 234 | if __name__ == "__main__": 235 | import numpy as np 236 | 237 | pygame.init() 238 | game = Catcher(width=256, height=256) 239 | game.rng = np.random.RandomState(24) 240 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 241 | game.clock = pygame.time.Clock() 242 | game.init() 243 | 244 | while True: 245 | dt = game.clock.tick_busy_loop(30) 246 | if game.game_over(): 247 | game.reset() 248 | 249 | game.step(dt) 250 | pygame.display.update() 251 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/waterworld.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import sys 3 | import math 4 | 5 | #import .base 6 | from .base.pygamewrapper import PyGameWrapper 7 | 8 | from .utils.vec2d import vec2d 9 | from .utils import percent_round_int 10 | from pygame.constants import K_w, K_a, K_s, K_d 11 | from .primitives import Player, Creep 12 | 13 | 14 | class WaterWorld(PyGameWrapper): 15 | """ 16 | Based Karpthy's WaterWorld in `REINFORCEjs`_. 17 | 18 | .. _REINFORCEjs: https://github.com/karpathy/reinforcejs 19 | 20 | Parameters 21 | ---------- 22 | width : int 23 | Screen width. 24 | 25 | height : int 26 | Screen height, recommended to be same dimension as width. 27 | 28 | num_creeps : int (default: 3) 29 | The number of creeps on the screen at once. 30 | """ 31 | 32 | def __init__(self, 33 | width=48, 34 | height=48, 35 | num_creeps=3): 36 | 37 | actions = { 38 | "up": K_w, 39 | "left": K_a, 40 | "right": K_d, 41 | "down": K_s 42 | } 43 | 44 | PyGameWrapper.__init__(self, width, height, actions=actions) 45 | self.BG_COLOR = (255, 255, 255) 46 | self.N_CREEPS = num_creeps 47 | self.CREEP_TYPES = ["GOOD", "BAD"] 48 | self.CREEP_COLORS = [(40, 140, 40), (150, 95, 95)] 49 | radius = percent_round_int(width, 0.047) 50 | self.CREEP_RADII = [radius, radius] 51 | self.CREEP_REWARD = [ 52 | self.rewards["positive"], 53 | self.rewards["negative"]] 54 | self.CREEP_SPEED = 0.25 * width 55 | self.AGENT_COLOR = (60, 60, 140) 56 | self.AGENT_SPEED = 0.25 * width 57 | self.AGENT_RADIUS = radius 58 | self.AGENT_INIT_POS = (self.width / 2, self.height / 2) 59 | 60 | self.creep_counts = { 61 | "GOOD": 0, 62 | "BAD": 0 63 | } 64 | 65 | self.dx = 0 66 | self.dy = 0 67 | self.player = None 68 | self.creeps = None 69 | 70 | def _handle_player_events(self): 71 | self.dx = 0 72 | self.dy = 0 73 | for event in pygame.event.get(): 74 | if event.type == pygame.QUIT: 75 | pygame.quit() 76 | sys.exit() 77 | 78 | if event.type == pygame.KEYDOWN: 79 | key = event.key 80 | 81 | if key == self.actions["left"]: 82 | self.dx -= self.AGENT_SPEED 83 | 84 | if key == self.actions["right"]: 85 | self.dx += self.AGENT_SPEED 86 | 87 | if key == self.actions["up"]: 88 | self.dy -= self.AGENT_SPEED 89 | 90 | if key == self.actions["down"]: 91 | self.dy += self.AGENT_SPEED 92 | 93 | def _add_creep(self): 94 | creep_type = self.rng.choice([0, 1]) 95 | 96 | creep = None 97 | pos = (0, 0) 98 | dist = 0.0 99 | 100 | while dist < 1.5: 101 | radius = self.CREEP_RADII[creep_type] * 1.5 102 | pos = self.rng.uniform(radius, self.height - radius, size=2) 103 | dist = math.sqrt( 104 | (self.player.pos.x - pos[0])**2 + (self.player.pos.y - pos[1])**2) 105 | 106 | creep = Creep( 107 | self.CREEP_COLORS[creep_type], 108 | self.CREEP_RADII[creep_type], 109 | pos, 110 | self.rng.choice([-1, 1], 2), 111 | self.rng.rand() * self.CREEP_SPEED, 112 | self.CREEP_REWARD[creep_type], 113 | self.CREEP_TYPES[creep_type], 114 | self.width, 115 | self.height, 116 | self.rng.rand() 117 | ) 118 | 119 | self.creeps.add(creep) 120 | 121 | self.creep_counts[self.CREEP_TYPES[creep_type]] += 1 122 | 123 | def getGameState(self): 124 | """ 125 | 126 | Returns 127 | ------- 128 | 129 | dict 130 | * player x position. 131 | * player y position. 132 | * player x velocity. 133 | * player y velocity. 134 | * player distance to each creep 135 | 136 | 137 | """ 138 | 139 | state = { 140 | "player_x": self.player.pos.x, 141 | "player_y": self.player.pos.y, 142 | "player_velocity_x": self.player.vel.x, 143 | "player_velocity_y": self.player.vel.y, 144 | "creep_dist": { 145 | "GOOD": [], 146 | "BAD": [] 147 | }, 148 | "creep_pos": { 149 | "GOOD": [], 150 | "BAD": [] 151 | } 152 | } 153 | 154 | for c in self.creeps: 155 | dist = math.sqrt((self.player.pos.x - c.pos.x) ** 156 | 2 + (self.player.pos.y - c.pos.y)**2) 157 | state["creep_dist"][c.TYPE].append(dist) 158 | state["creep_pos"][c.TYPE].append([c.pos.x, c.pos.y]) 159 | 160 | return state 161 | 162 | def getScore(self): 163 | return self.score 164 | 165 | def game_over(self): 166 | """ 167 | Return bool if the game has 'finished' 168 | """ 169 | return (self.creep_counts['GOOD'] == 0) 170 | 171 | def init(self): 172 | """ 173 | Starts/Resets the game to its inital state 174 | """ 175 | self.creep_counts = {"GOOD": 0, "BAD": 0} 176 | 177 | if self.player is None: 178 | self.player = Player( 179 | self.AGENT_RADIUS, self.AGENT_COLOR, 180 | self.AGENT_SPEED, self.AGENT_INIT_POS, 181 | self.width, self.height 182 | ) 183 | 184 | else: 185 | self.player.pos = vec2d(self.AGENT_INIT_POS) 186 | self.player.vel = vec2d((0.0, 0.0)) 187 | 188 | if self.creeps is None: 189 | self.creeps = pygame.sprite.Group() 190 | else: 191 | self.creeps.empty() 192 | 193 | for i in range(self.N_CREEPS): 194 | self._add_creep() 195 | 196 | self.score = 0 197 | self.ticks = 0 198 | self.lives = -1 199 | 200 | def step(self, dt): 201 | """ 202 | Perform one step of game emulation. 203 | """ 204 | dt /= 1000.0 205 | self.screen.fill(self.BG_COLOR) 206 | 207 | self.score += self.rewards["tick"] 208 | 209 | self._handle_player_events() 210 | self.player.update(self.dx, self.dy, dt) 211 | 212 | hits = pygame.sprite.spritecollide(self.player, self.creeps, True) 213 | for creep in hits: 214 | self.creep_counts[creep.TYPE] -= 1 215 | self.score += creep.reward 216 | self._add_creep() 217 | 218 | if self.creep_counts["GOOD"] == 0: 219 | self.score += self.rewards["win"] 220 | 221 | self.creeps.update(dt) 222 | 223 | self.player.draw(self.screen) 224 | self.creeps.draw(self.screen) 225 | 226 | if __name__ == "__main__": 227 | import numpy as np 228 | 229 | pygame.init() 230 | game = WaterWorld(width=256, height=256, num_creeps=10) 231 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 232 | game.clock = pygame.time.Clock() 233 | game.rng = np.random.RandomState(24) 234 | game.init() 235 | 236 | while True: 237 | dt = game.clock.tick_busy_loop(30) 238 | game.step(dt) 239 | pygame.display.update() 240 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/puckworld.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import sys 3 | import math 4 | 5 | #import .base 6 | from .base.pygamewrapper import PyGameWrapper 7 | 8 | from pygame.constants import K_w, K_a, K_s, K_d 9 | from .primitives import Player, Creep 10 | from .utils.vec2d import vec2d 11 | from .utils import percent_round_int 12 | 13 | 14 | class PuckCreep(pygame.sprite.Sprite): 15 | 16 | def __init__(self, pos_init, attr, SCREEN_WIDTH, SCREEN_HEIGHT): 17 | pygame.sprite.Sprite.__init__(self) 18 | 19 | self.pos = vec2d(pos_init) 20 | self.attr = attr 21 | self.SCREEN_WIDTH = SCREEN_WIDTH 22 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 23 | 24 | image = pygame.Surface( 25 | (self.attr["radius_outer"] * 2, 26 | self.attr["radius_outer"] * 2)) 27 | image.fill((0, 0, 0, 0)) 28 | image.set_colorkey((0, 0, 0)) 29 | pygame.draw.circle( 30 | image, 31 | self.attr["color_outer"], 32 | (self.attr["radius_outer"], self.attr["radius_outer"]), 33 | self.attr["radius_outer"], 34 | 0 35 | ) 36 | 37 | image.set_alpha(int(255 * 0.75)) 38 | 39 | pygame.draw.circle( 40 | image, 41 | self.attr["color_center"], 42 | (self.attr["radius_outer"], self.attr["radius_outer"]), 43 | self.attr["radius_center"], 44 | 0 45 | ) 46 | 47 | self.image = image 48 | self.rect = self.image.get_rect() 49 | self.rect.center = pos_init 50 | 51 | def update(self, ndx, ndy, dt): 52 | self.pos.x += ndx * self.attr['speed'] * dt 53 | self.pos.y += ndy * self.attr['speed'] * dt 54 | 55 | self.rect.center = (self.pos.x, self.pos.y) 56 | 57 | 58 | class PuckWorld(PyGameWrapper): 59 | """ 60 | Based Karpthy's PuckWorld in `REINFORCEjs`_. 61 | 62 | .. _REINFORCEjs: https://github.com/karpathy/reinforcejs 63 | 64 | Parameters 65 | ---------- 66 | width : int 67 | Screen width. 68 | 69 | height : int 70 | Screen height, recommended to be same dimension as width. 71 | 72 | """ 73 | 74 | def __init__(self, 75 | width=64, 76 | height=64): 77 | 78 | actions = { 79 | "up": K_w, 80 | "left": K_a, 81 | "right": K_d, 82 | "down": K_s 83 | } 84 | 85 | PyGameWrapper.__init__(self, width, height, actions=actions) 86 | 87 | self.CREEP_BAD = { 88 | "radius_center": percent_round_int(width, 0.047), 89 | "radius_outer": percent_round_int(width, 0.265), 90 | "color_center": (110, 45, 45), 91 | "color_outer": (150, 95, 95), 92 | "speed": 0.05 * width 93 | } 94 | 95 | self.CREEP_GOOD = { 96 | "radius": percent_round_int(width, 0.047), 97 | "color": (40, 140, 40) 98 | } 99 | 100 | self.AGENT_COLOR = (60, 60, 140) 101 | self.AGENT_SPEED = 0.2 * width 102 | self.AGENT_RADIUS = percent_round_int(width, 0.047) 103 | self.AGENT_INIT_POS = ( 104 | self.AGENT_RADIUS * 1.5, 105 | self.AGENT_RADIUS * 1.5) 106 | 107 | self.BG_COLOR = (255, 255, 255) 108 | self.dx = 0 109 | self.dy = 0 110 | self.ticks = 0 111 | 112 | def _handle_player_events(self): 113 | self.dx = 0.0 114 | self.dy = 0.0 115 | for event in pygame.event.get(): 116 | if event.type == pygame.QUIT: 117 | pygame.quit() 118 | sys.exit() 119 | 120 | if event.type == pygame.KEYDOWN: 121 | key = event.key 122 | 123 | if key == self.actions["left"]: 124 | self.dx -= self.AGENT_SPEED 125 | 126 | if key == self.actions["right"]: 127 | self.dx += self.AGENT_SPEED 128 | 129 | if key == self.actions["up"]: 130 | self.dy -= self.AGENT_SPEED 131 | 132 | if key == self.actions["down"]: 133 | self.dy += self.AGENT_SPEED 134 | 135 | def getGameState(self): 136 | """ 137 | Gets a non-visual state representation of the game. 138 | 139 | Returns 140 | ------- 141 | 142 | dict 143 | * player x position. 144 | * player y position. 145 | * players x velocity. 146 | * players y velocity. 147 | * good creep x position. 148 | * good creep y position. 149 | * bad creep x position. 150 | * bad creep y position. 151 | 152 | See code for structure. 153 | 154 | """ 155 | state = { 156 | "player_x": self.player.pos.x, 157 | "player_y": self.player.pos.y, 158 | "player_velocity_x": self.player.vel.x, 159 | "player_velocity_y": self.player.vel.y, 160 | "good_creep_x": self.good_creep.pos.x, 161 | "good_creep_y": self.good_creep.pos.y, 162 | "bad_creep_x": self.bad_creep.pos.x, 163 | "bad_creep_y": self.bad_creep.pos.y 164 | } 165 | 166 | return state 167 | 168 | def getScore(self): 169 | return self.score 170 | 171 | def game_over(self): 172 | """ 173 | Return bool if the game has 'finished' 174 | """ 175 | return False 176 | 177 | def _rngCreepPos(self): 178 | r = self.CREEP_GOOD['radius'] 179 | x = self.rng.uniform(r * 3, self.width - r * 2.5) 180 | y = self.rng.uniform(r * 3, self.height - r * 2.5) 181 | return (x, y) 182 | 183 | def init(self): 184 | """ 185 | Starts/Resets the game to its inital state 186 | """ 187 | 188 | self.player = Player( 189 | self.AGENT_RADIUS, 190 | self.AGENT_COLOR, 191 | self.AGENT_SPEED, 192 | self.AGENT_INIT_POS, 193 | self.width, 194 | self.height) 195 | 196 | self.good_creep = Creep( 197 | self.CREEP_GOOD['color'], 198 | self.CREEP_GOOD['radius'], 199 | self._rngCreepPos(), 200 | (1, 1), 201 | 0.0, 202 | 1.0, 203 | "GOOD", 204 | self.width, 205 | self.height, 206 | 0.0 # jitter 207 | ) 208 | 209 | self.bad_creep = PuckCreep( 210 | (self.width, 211 | self.height), 212 | self.CREEP_BAD, 213 | self.screen_dim[0] * 0.75, 214 | self.screen_dim[1] * 0.75) 215 | 216 | self.creeps = pygame.sprite.Group() 217 | self.creeps.add(self.good_creep) 218 | self.creeps.add(self.bad_creep) 219 | 220 | self.score = 0 221 | self.ticks = 0 222 | self.lives = -1 223 | 224 | def step(self, dt): 225 | """ 226 | Perform one step of game emulation. 227 | """ 228 | dt /= 1000.0 229 | self.ticks += 1 230 | self.screen.fill(self.BG_COLOR) 231 | 232 | self.score += self.rewards["tick"] 233 | 234 | self._handle_player_events() 235 | self.player.update(self.dx, self.dy, dt) 236 | 237 | dx = self.player.pos.x - self.good_creep.pos.x 238 | dy = self.player.pos.y - self.good_creep.pos.y 239 | dist_to_good = math.sqrt(dx * dx + dy * dy) 240 | 241 | dx = self.player.pos.x - self.bad_creep.pos.x 242 | dy = self.player.pos.y - self.bad_creep.pos.y 243 | dist_to_bad = math.sqrt(dx * dx + dy * dy) 244 | 245 | reward = -dist_to_good 246 | if dist_to_bad < self.CREEP_BAD['radius_outer']: 247 | reward += 2.0 * \ 248 | (dist_to_bad - self.CREEP_BAD['radius_outer'] 249 | ) / float(self.CREEP_BAD['radius_outer']) 250 | 251 | self.score += reward 252 | 253 | if self.ticks % 500 == 0: 254 | x, y = self._rngCreepPos() 255 | self.good_creep.pos.x = x 256 | self.good_creep.pos.y = y 257 | 258 | ndx = 0.0 if dist_to_bad == 0.0 else dx / dist_to_bad 259 | ndy = 0.0 if dist_to_bad == 0.0 else dy / dist_to_bad 260 | 261 | self.bad_creep.update(ndx, ndy, dt) 262 | self.good_creep.update(dt) 263 | 264 | self.player.draw(self.screen) 265 | self.creeps.draw(self.screen) 266 | 267 | 268 | if __name__ == "__main__": 269 | import numpy as np 270 | 271 | pygame.init() 272 | game = PuckWorld(width=256, height=256) 273 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 274 | game.clock = pygame.time.Clock() 275 | game.rng = np.random.RandomState(24) 276 | game.init() 277 | 278 | while True: 279 | dt = game.clock.tick_busy_loop(60) 280 | game.step(dt) 281 | pygame.display.update() 282 | -------------------------------------------------------------------------------- /01_Preprocessing/diabetes_app.csv: -------------------------------------------------------------------------------- 1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age 2 | 3,129.0,64.0,29.0,115.0,26.4,0.21899999999999997,28 3 | 3,191.0,68.0,15.0,130.0,30.9,0.299,34 4 | 15,136.0,70.0,32.0,110.0,37.1,0.153,43 5 | 1,109.0,60.0,8.0,182.0,25.4,0.9470000000000001,21 6 | 8,151.0,78.0,32.0,210.0,42.9,0.516,36 7 | 4,123.0,80.0,15.0,176.0,32.0,0.44299999999999995,34 8 | 3,158.0,70.0,30.0,328.0,35.5,0.344,35 9 | 6,134.0,80.0,37.0,370.0,46.2,0.23800000000000002,46 10 | 10,161.0,68.0,23.0,132.0,25.5,0.326,47 11 | 1,87.0,68.0,34.0,77.0,37.6,0.401,24 12 | 2,121.0,70.0,32.0,95.0,39.1,0.8859999999999999,23 13 | 9,145.0,88.0,34.0,165.0,30.3,0.7709999999999999,53 14 | 7,83.0,78.0,26.0,71.0,29.3,0.767,36 15 | 8,196.0,76.0,29.0,280.0,37.5,0.605,57 16 | 0,127.0,80.0,37.0,210.0,36.3,0.804,23 17 | 2,102.0,86.0,36.0,120.0,45.5,0.127,23 18 | 8,126.0,88.0,36.0,108.0,38.5,0.349,49 19 | 3,130.0,78.0,23.0,79.0,28.4,0.32299999999999995,34 20 | 1,71.0,48.0,18.0,76.0,20.4,0.32299999999999995,22 21 | 6,154.0,78.0,41.0,140.0,46.1,0.5710000000000001,27 22 | 2,68.0,62.0,13.0,15.0,20.1,0.257,23 23 | 1,117.0,88.0,24.0,145.0,34.5,0.40299999999999997,40 24 | 2,157.0,74.0,35.0,440.0,39.4,0.134,30 25 | 2,112.0,68.0,22.0,94.0,34.1,0.315,26 26 | 1,131.0,64.0,14.0,415.0,23.7,0.389,21 27 | 0,181.0,88.0,44.0,510.0,43.3,0.222,26 28 | 1,95.0,74.0,21.0,73.0,25.9,0.6729999999999999,36 29 | 4,110.0,76.0,20.0,100.0,28.4,0.11800000000000001,27 30 | 1,115.0,70.0,30.0,96.0,34.6,0.529,32 31 | 4,184.0,78.0,39.0,277.0,37.0,0.264,31 32 | 0,129.0,110.0,46.0,130.0,67.1,0.319,26 33 | 13,152.0,90.0,33.0,29.0,26.8,0.731,43 34 | 7,150.0,66.0,42.0,342.0,34.7,0.718,42 35 | 1,95.0,82.0,25.0,180.0,35.0,0.233,43 36 | 1,97.0,66.0,15.0,140.0,23.2,0.48700000000000004,22 37 | 1,108.0,60.0,46.0,178.0,35.5,0.415,24 38 | 0,177.0,60.0,29.0,478.0,34.6,1.072,21 39 | 10,125.0,70.0,26.0,115.0,31.1,0.205,41 40 | 0,84.0,64.0,22.0,66.0,35.8,0.545,21 41 | 7,195.0,70.0,33.0,145.0,25.1,0.163,55 42 | 5,139.0,80.0,35.0,160.0,31.6,0.361,25 43 | 3,187.0,70.0,22.0,200.0,36.4,0.408,36 44 | 9,120.0,72.0,22.0,56.0,20.8,0.733,48 45 | 1,77.0,56.0,30.0,56.0,33.3,1.251,24 46 | 5,123.0,74.0,40.0,77.0,34.1,0.26899999999999996,28 47 | 1,87.0,78.0,27.0,32.0,34.6,0.10099999999999999,22 48 | 5,144.0,82.0,26.0,285.0,32.0,0.452,58 49 | 1,193.0,50.0,16.0,375.0,25.9,0.655,24 50 | 2,83.0,65.0,28.0,66.0,36.8,0.629,24 51 | 2,106.0,64.0,35.0,119.0,30.5,1.4,34 52 | 3,116.0,74.0,15.0,105.0,26.3,0.107,24 53 | 1,189.0,60.0,23.0,846.0,30.1,0.39799999999999996,59 54 | 2,107.0,74.0,30.0,100.0,33.6,0.40399999999999997,23 55 | 8,100.0,74.0,40.0,215.0,39.4,0.6609999999999999,43 56 | 2,100.0,54.0,28.0,105.0,37.8,0.498,24 57 | 2,101.0,58.0,17.0,265.0,24.2,0.614,23 58 | 1,100.0,74.0,12.0,46.0,19.5,0.149,28 59 | 0,126.0,86.0,27.0,120.0,27.4,0.515,21 60 | 0,180.0,78.0,63.0,14.0,59.4,2.42,25 61 | 1,128.0,88.0,39.0,110.0,36.5,1.057,37 62 | 1,126.0,56.0,29.0,152.0,28.7,0.8009999999999999,21 63 | 4,85.0,58.0,22.0,49.0,27.8,0.306,28 64 | 0,198.0,66.0,32.0,274.0,41.3,0.502,28 65 | 0,91.0,68.0,32.0,210.0,39.9,0.381,25 66 | 2,96.0,68.0,13.0,49.0,21.1,0.647,26 67 | 0,95.0,85.0,25.0,36.0,37.4,0.247,24 68 | 2,146.0,76.0,35.0,194.0,38.2,0.32899999999999996,29 69 | 4,91.0,70.0,32.0,88.0,33.1,0.446,22 70 | 2,93.0,64.0,32.0,160.0,38.0,0.674,23 71 | 1,119.0,86.0,39.0,220.0,45.6,0.8079999999999999,29 72 | 9,152.0,78.0,34.0,171.0,34.2,0.893,33 73 | 3,111.0,90.0,12.0,78.0,28.4,0.495,29 74 | 0,162.0,76.0,56.0,100.0,53.2,0.759,25 75 | 4,146.0,85.0,27.0,100.0,28.9,0.18899999999999997,27 76 | 1,112.0,80.0,45.0,132.0,34.8,0.217,24 77 | 3,102.0,44.0,20.0,94.0,30.8,0.4,26 78 | 2,144.0,58.0,33.0,135.0,31.6,0.42200000000000004,25 79 | 8,179.0,72.0,42.0,130.0,32.7,0.7190000000000001,36 80 | 2,128.0,78.0,37.0,182.0,43.3,1.224,31 81 | 6,123.0,72.0,45.0,230.0,33.6,0.733,34 82 | 1,136.0,74.0,50.0,204.0,37.4,0.39899999999999997,24 83 | 2,146.0,70.0,38.0,360.0,28.0,0.337,29 84 | 2,122.0,52.0,43.0,158.0,36.2,0.816,28 85 | 1,130.0,70.0,13.0,105.0,25.9,0.47200000000000003,22 86 | 1,80.0,74.0,11.0,60.0,30.0,0.527,22 87 | 2,99.0,60.0,17.0,160.0,36.6,0.45299999999999996,21 88 | 6,134.0,70.0,23.0,130.0,35.4,0.542,29 89 | 1,81.0,72.0,18.0,40.0,26.6,0.28300000000000003,24 90 | 12,140.0,82.0,43.0,325.0,39.2,0.528,58 91 | 3,115.0,66.0,39.0,140.0,38.1,0.15,28 92 | 1,79.0,80.0,25.0,37.0,25.4,0.583,22 93 | 1,90.0,62.0,12.0,43.0,27.2,0.58,24 94 | 1,81.0,74.0,41.0,57.0,46.3,1.0959999999999999,32 95 | 1,100.0,72.0,12.0,70.0,25.3,0.6579999999999999,28 96 | 1,125.0,70.0,24.0,110.0,24.3,0.221,25 97 | 3,174.0,58.0,22.0,194.0,32.9,0.593,36 98 | 0,139.0,62.0,17.0,210.0,22.1,0.207,21 99 | 1,149.0,68.0,29.0,127.0,29.3,0.349,42 100 | 5,105.0,72.0,29.0,325.0,36.9,0.159,28 101 | 0,95.0,64.0,39.0,105.0,44.6,0.366,22 102 | 0,97.0,64.0,36.0,100.0,36.8,0.6,25 103 | 1,181.0,78.0,42.0,293.0,40.0,1.258,22 104 | 2,120.0,76.0,37.0,105.0,39.7,0.215,29 105 | 4,84.0,90.0,23.0,56.0,39.5,0.159,25 106 | 3,163.0,70.0,18.0,105.0,31.6,0.268,28 107 | 1,109.0,38.0,18.0,120.0,23.1,0.40700000000000003,26 108 | 2,106.0,56.0,27.0,165.0,29.0,0.426,22 109 | 0,95.0,80.0,45.0,92.0,36.5,0.33,26 110 | 3,120.0,70.0,30.0,135.0,42.9,0.452,30 111 | 6,98.0,58.0,33.0,190.0,34.0,0.43,43 112 | 2,88.0,74.0,19.0,53.0,29.0,0.22899999999999998,22 113 | 0,114.0,80.0,34.0,285.0,44.2,0.16699999999999998,27 114 | 1,95.0,66.0,13.0,38.0,19.6,0.33399999999999996,25 115 | 9,156.0,86.0,28.0,155.0,34.3,1.189,42 116 | 3,113.0,50.0,10.0,85.0,29.5,0.626,25 117 | 5,155.0,84.0,44.0,545.0,38.7,0.619,34 118 | 7,124.0,70.0,33.0,215.0,25.5,0.161,37 119 | 0,104.0,64.0,37.0,64.0,33.6,0.51,22 120 | 3,80.0,82.0,31.0,70.0,34.2,1.2919999999999998,27 121 | 7,94.0,64.0,25.0,79.0,33.3,0.738,41 122 | 3,78.0,50.0,32.0,88.0,31.0,0.248,26 123 | 2,110.0,74.0,29.0,125.0,32.4,0.698,27 124 | 7,181.0,84.0,21.0,192.0,35.9,0.586,51 125 | 1,90.0,62.0,18.0,59.0,25.1,1.268,25 126 | 5,99.0,54.0,28.0,83.0,34.0,0.499,30 127 | 10,101.0,76.0,48.0,180.0,32.9,0.171,63 128 | 1,82.0,64.0,13.0,95.0,21.2,0.415,23 129 | 2,75.0,64.0,24.0,55.0,29.7,0.37,33 130 | 4,116.0,72.0,12.0,87.0,22.1,0.46299999999999997,37 131 | 0,152.0,82.0,39.0,272.0,41.5,0.27,27 132 | 0,74.0,52.0,10.0,36.0,27.8,0.26899999999999996,22 133 | 9,124.0,70.0,33.0,402.0,35.4,0.282,34 134 | 1,119.0,44.0,47.0,63.0,35.5,0.28,25 135 | 3,84.0,68.0,30.0,106.0,31.9,0.591,25 136 | 8,181.0,68.0,36.0,495.0,30.1,0.615,60 137 | 1,116.0,78.0,29.0,180.0,36.1,0.496,25 138 | 1,140.0,74.0,26.0,180.0,24.1,0.828,23 139 | 3,111.0,58.0,31.0,44.0,29.5,0.43,22 140 | 2,94.0,68.0,18.0,76.0,26.0,0.561,21 141 | 8,124.0,76.0,24.0,600.0,28.7,0.687,52 142 | 11,120.0,80.0,37.0,150.0,42.3,0.785,48 143 | 4,125.0,70.0,18.0,122.0,28.9,1.1440000000000001,45 144 | 5,166.0,72.0,19.0,175.0,25.8,0.5870000000000001,51 145 | 6,144.0,72.0,27.0,228.0,33.9,0.255,40 146 | 2,108.0,52.0,26.0,63.0,32.5,0.318,22 147 | 2,122.0,76.0,27.0,200.0,35.9,0.483,26 148 | 1,109.0,56.0,21.0,135.0,25.2,0.833,23 149 | 1,95.0,60.0,18.0,58.0,23.9,0.26,22 150 | 2,56.0,56.0,28.0,45.0,24.2,0.332,22 151 | 5,108.0,72.0,43.0,75.0,36.1,0.263,33 152 | 1,119.0,54.0,13.0,50.0,22.3,0.205,24 153 | 1,111.0,62.0,13.0,182.0,24.0,0.138,23 154 | 2,105.0,58.0,40.0,94.0,34.9,0.225,25 155 | 1,164.0,82.0,43.0,67.0,32.8,0.341,50 156 | 1,89.0,66.0,23.0,94.0,28.1,0.16699999999999998,21 157 | 4,144.0,58.0,28.0,140.0,29.5,0.287,37 158 | 10,129.0,76.0,28.0,122.0,35.9,0.28,39 159 | 0,180.0,90.0,26.0,90.0,36.5,0.314,35 160 | 2,142.0,82.0,18.0,64.0,24.7,0.7609999999999999,21 161 | 3,129.0,92.0,49.0,155.0,36.4,0.968,32 162 | 0,104.0,64.0,23.0,116.0,27.8,0.45399999999999996,23 163 | 7,142.0,60.0,33.0,190.0,28.8,0.687,61 164 | 1,109.0,58.0,18.0,116.0,28.5,0.21899999999999997,22 165 | 1,125.0,50.0,40.0,167.0,33.3,0.9620000000000001,28 166 | 4,90.0,88.0,47.0,54.0,37.7,0.36200000000000004,29 167 | 0,117.0,80.0,31.0,53.0,45.2,0.08900000000000001,24 168 | 8,155.0,62.0,26.0,495.0,34.0,0.5429999999999999,46 169 | 4,129.0,60.0,12.0,231.0,27.5,0.527,31 170 | 1,122.0,90.0,51.0,220.0,49.7,0.325,31 171 | 3,103.0,72.0,30.0,152.0,27.6,0.73,27 172 | 9,171.0,110.0,24.0,240.0,45.4,0.721,54 173 | 0,100.0,88.0,60.0,110.0,46.8,0.9620000000000001,31 174 | 2,84.0,50.0,23.0,76.0,30.4,0.968,21 175 | 2,83.0,66.0,23.0,50.0,32.2,0.49700000000000005,22 176 | 1,92.0,62.0,25.0,41.0,19.5,0.48200000000000004,25 177 | 3,99.0,62.0,19.0,74.0,21.8,0.27899999999999997,26 178 | 2,108.0,62.0,32.0,56.0,25.2,0.128,21 179 | 0,134.0,58.0,20.0,291.0,26.4,0.35200000000000004,21 180 | 0,117.0,66.0,31.0,188.0,30.8,0.493,22 181 | 5,187.0,76.0,27.0,207.0,43.6,1.034,53 182 | 8,167.0,106.0,46.0,231.0,37.6,0.165,43 183 | 0,173.0,78.0,32.0,265.0,46.5,1.159,58 184 | 3,83.0,58.0,31.0,18.0,34.3,0.336,25 185 | 1,97.0,64.0,19.0,82.0,18.2,0.299,21 186 | 1,89.0,24.0,19.0,25.0,27.8,0.5589999999999999,21 187 | 4,103.0,60.0,33.0,192.0,24.0,0.966,33 188 | 0,102.0,64.0,46.0,78.0,40.6,0.496,21 189 | 3,100.0,68.0,23.0,81.0,31.6,0.9490000000000001,28 190 | 0,98.0,82.0,15.0,84.0,25.2,0.299,22 191 | 2,141.0,58.0,34.0,128.0,25.4,0.6990000000000001,24 192 | 7,168.0,88.0,42.0,321.0,38.2,0.787,40 193 | 3,180.0,64.0,25.0,70.0,34.0,0.271,26 194 | 3,99.0,54.0,19.0,86.0,25.6,0.154,24 195 | 8,176.0,90.0,34.0,300.0,33.7,0.467,58 196 | 0,84.0,82.0,31.0,125.0,38.2,0.233,23 197 | 3,89.0,74.0,16.0,85.0,30.4,0.551,38 198 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/monsterkong/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Batchu Vishal' 2 | import pygame 3 | import sys 4 | from pygame.constants import K_a, K_d, K_SPACE, K_w, K_s, QUIT, KEYDOWN 5 | from .board import Board 6 | #from ..base import base 7 | #from ple.games import base 8 | from ple.games.base.pygamewrapper import PyGameWrapper 9 | import numpy as np 10 | import os 11 | 12 | 13 | class MonsterKong(PyGameWrapper): 14 | 15 | def __init__(self): 16 | """ 17 | Parameters 18 | ---------- 19 | None 20 | 21 | """ 22 | 23 | self.height = 465 24 | self.width = 500 25 | 26 | actions = { 27 | "left": K_a, 28 | "right": K_d, 29 | "jump": K_SPACE, 30 | "up": K_w, 31 | "down": K_s 32 | } 33 | 34 | PyGameWrapper.__init__( 35 | self, self.width, self.height, actions=actions) 36 | 37 | self.rewards = { 38 | "positive": 5, 39 | "win": 50, 40 | "negative": -25, 41 | "tick": 0 42 | } 43 | 44 | self.allowed_fps = 30 45 | 46 | self._dir = os.path.dirname(os.path.abspath(__file__)) 47 | 48 | self.IMAGES = { 49 | "right": pygame.image.load(os.path.join(self._dir, 'assets/right.png')), 50 | "right2": pygame.image.load(os.path.join(self._dir, 'assets/right2.png')), 51 | "left": pygame.image.load(os.path.join(self._dir, 'assets/left.png')), 52 | "left2": pygame.image.load(os.path.join(self._dir, 'assets/left2.png')), 53 | "still": pygame.image.load(os.path.join(self._dir, 'assets/still.png')) 54 | } 55 | 56 | def init(self): 57 | # Create a new instance of the Board class 58 | self.newGame = Board( 59 | self.width, 60 | self.height, 61 | self.rewards, 62 | self.rng, 63 | self._dir) 64 | 65 | # Initialize the fireball timer 66 | self.fireballTimer = 0 67 | 68 | # Assign groups from the Board instance that was created 69 | self.playerGroup = self.newGame.playerGroup 70 | self.wallGroup = self.newGame.wallGroup 71 | self.ladderGroup = self.newGame.ladderGroup 72 | 73 | def getScore(self): 74 | return self.newGame.score 75 | 76 | def game_over(self): 77 | return self.newGame.lives <= 0 78 | 79 | def step(self, dt): 80 | self.newGame.score += self.rewards["tick"] 81 | # This is where the actual game is run 82 | # Get the appropriate groups 83 | self.fireballGroup = self.newGame.fireballGroup 84 | self.coinGroup = self.newGame.coinGroup 85 | 86 | # Create fireballs as required, depending on the number of monsters in 87 | # our game at the moment 88 | if self.fireballTimer == 0: 89 | self.newGame.CreateFireball( 90 | self.newGame.Enemies[0].getPosition(), 0) 91 | elif len(self.newGame.Enemies) >= 2 and self.fireballTimer == 23: 92 | self.newGame.CreateFireball( 93 | self.newGame.Enemies[1].getPosition(), 1) 94 | elif len(self.newGame.Enemies) >= 3 and self.fireballTimer == 46: 95 | self.newGame.CreateFireball( 96 | self.newGame.Enemies[2].getPosition(), 2) 97 | self.fireballTimer = (self.fireballTimer + 1) % 70 98 | 99 | # Animate the coin 100 | for coin in self.coinGroup: 101 | coin.animateCoin() 102 | 103 | # To check collisions below, we move the player downwards then check 104 | # and move him back to his original location 105 | self.newGame.Players[0].updateY(2) 106 | self.laddersCollidedBelow = self.newGame.Players[ 107 | 0].checkCollision(self.ladderGroup) 108 | self.wallsCollidedBelow = self.newGame.Players[ 109 | 0].checkCollision(self.wallGroup) 110 | self.newGame.Players[0].updateY(-2) 111 | 112 | # To check for collisions above, we move the player up then check and 113 | # then move him back down 114 | self.newGame.Players[0].updateY(-2) 115 | self.wallsCollidedAbove = self.newGame.Players[ 116 | 0].checkCollision(self.wallGroup) 117 | self.newGame.Players[0].updateY(2) 118 | 119 | # Sets the onLadder state of the player 120 | self.newGame.ladderCheck( 121 | self.laddersCollidedBelow, 122 | self.wallsCollidedBelow, 123 | self.wallsCollidedAbove) 124 | 125 | for event in pygame.event.get(): 126 | # Exit to desktop 127 | if event.type == QUIT: 128 | pygame.quit() 129 | sys.exit() 130 | 131 | if event.type == KEYDOWN: 132 | # Get the ladders collided with the player 133 | self.laddersCollidedExact = self.newGame.Players[ 134 | 0].checkCollision(self.ladderGroup) 135 | if (event.key == self.actions["jump"] and self.newGame.Players[0].onLadder == 0) or ( 136 | event.key == self.actions["up"] and self.laddersCollidedExact): 137 | # Set the player to move up 138 | self.direction = 2 139 | if self.newGame.Players[ 140 | 0].isJumping == 0 and self.wallsCollidedBelow: 141 | # We can make the player jump and set his 142 | # currentJumpSpeed 143 | self.newGame.Players[0].isJumping = 1 144 | self.newGame.Players[0].currentJumpSpeed = 7 145 | 146 | if event.key == self.actions["right"]: 147 | if self.newGame.direction != 4: 148 | self.newGame.direction = 4 149 | self.newGame.cycles = -1 # Reset cycles 150 | self.newGame.cycles = (self.newGame.cycles + 1) % 4 151 | if self.newGame.cycles < 2: 152 | # Display the first image for half the cycles 153 | self.newGame.Players[0].updateWH(self.IMAGES["right"], "H", 154 | self.newGame.Players[0].getSpeed(), 15, 15) 155 | else: 156 | # Display the second image for half the cycles 157 | self.newGame.Players[0].updateWH(self.IMAGES["right2"], "H", 158 | self.newGame.Players[0].getSpeed(), 15, 15) 159 | wallsCollidedExact = self.newGame.Players[ 160 | 0].checkCollision(self.wallGroup) 161 | if wallsCollidedExact: 162 | # If we have collided a wall, move the player back to 163 | # where he was in the last state 164 | self.newGame.Players[0].updateWH(self.IMAGES["right"], "H", 165 | -self.newGame.Players[0].getSpeed(), 15, 15) 166 | 167 | if event.key == self.actions["left"]: 168 | if self.newGame.direction != 3: 169 | self.newGame.direction = 3 170 | self.newGame.cycles = -1 # Reset cycles 171 | self.newGame.cycles = (self.newGame.cycles + 1) % 4 172 | if self.newGame.cycles < 2: 173 | # Display the first image for half the cycles 174 | self.newGame.Players[0].updateWH(self.IMAGES["left"], "H", 175 | -self.newGame.Players[0].getSpeed(), 15, 15) 176 | else: 177 | # Display the second image for half the cycles 178 | self.newGame.Players[0].updateWH(self.IMAGES["left2"], "H", 179 | -self.newGame.Players[0].getSpeed(), 15, 15) 180 | wallsCollidedExact = self.newGame.Players[ 181 | 0].checkCollision(self.wallGroup) 182 | if wallsCollidedExact: 183 | # If we have collided a wall, move the player back to 184 | # where he was in the last state 185 | self.newGame.Players[0].updateWH(self.IMAGES["left"], "H", 186 | self.newGame.Players[0].getSpeed(), 15, 15) 187 | 188 | # If we are on a ladder, then we can move up 189 | if event.key == self.actions[ 190 | "up"] and self.newGame.Players[0].onLadder: 191 | self.newGame.Players[0].updateWH(self.IMAGES["still"], "V", 192 | -self.newGame.Players[0].getSpeed() / 2, 15, 15) 193 | if len(self.newGame.Players[0].checkCollision(self.ladderGroup)) == 0 or len( 194 | self.newGame.Players[0].checkCollision(self.wallGroup)) != 0: 195 | self.newGame.Players[0].updateWH(self.IMAGES["still"], "V", 196 | self.newGame.Players[0].getSpeed() / 2, 15, 15) 197 | 198 | # If we are on a ladder, then we can move down 199 | if event.key == self.actions[ 200 | "down"] and self.newGame.Players[0].onLadder: 201 | self.newGame.Players[0].updateWH(self.IMAGES["still"], "V", 202 | self.newGame.Players[0].getSpeed() / 2, 15, 15) 203 | 204 | # Update the player's position and process his jump if he is jumping 205 | self.newGame.Players[0].continuousUpdate( 206 | self.wallGroup, self.ladderGroup) 207 | 208 | ''' 209 | We use cycles to animate the character, when we change direction we also reset the cycles 210 | We also change the direction according to the key pressed 211 | ''' 212 | 213 | # Redraws all our instances onto the screen 214 | self.newGame.redrawScreen(self.screen, self.width, self.height) 215 | 216 | # Update the fireball and check for collisions with player (ie Kill the 217 | # player) 218 | self.newGame.fireballCheck() 219 | 220 | # Collect a coin 221 | coinsCollected = pygame.sprite.spritecollide( 222 | self.newGame.Players[0], self.coinGroup, True) 223 | self.newGame.coinCheck(coinsCollected) 224 | 225 | # Check if you have reached the princess 226 | self.newGame.checkVictory() 227 | 228 | # Update all the monsters 229 | for enemy in self.newGame.Enemies: 230 | enemy.continuousUpdate(self.wallGroup, self.ladderGroup) 231 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/raycastmaze.py: -------------------------------------------------------------------------------- 1 | 2 | #import .base 3 | from .base.pygamewrapper import PyGameWrapper 4 | import pygame 5 | import numpy as np 6 | import math 7 | from .raycast import RayCastPlayer 8 | from pygame.constants import K_w, K_a, K_d, K_s 9 | 10 | 11 | class RaycastMaze(PyGameWrapper, RayCastPlayer): 12 | """ 13 | Parameters 14 | ---------- 15 | init_pos : tuple of int (default: (1,1)) 16 | The position the player starts on in the grid. The grid is zero indexed. 17 | 18 | resolution : int (default: 1) 19 | This instructs the Raycast engine on how many vertical lines to use when drawing the screen. The number is equal to the width / resolution. 20 | 21 | move_speed : int (default: 20) 22 | How fast the agent moves forwards or backwards. 23 | 24 | turn_speed : int (default: 13) 25 | The speed at which the agent turns left or right. 26 | 27 | map_size : int (default: 10) 28 | The size of the maze that is generated. Must be greater then 5. Can be incremented to increase difficulty by adjusting the attribute between game resets. 29 | 30 | width : int (default: 48) 31 | Screen width. 32 | 33 | height : int (default: 48) 34 | Screen height, recommended to be same dimension as width. 35 | 36 | init_pos_distance_to_target : int (default None aka. map_size*map_size) 37 | Useful for curriculum learning, slowly move target away from init position to improve learning 38 | 39 | """ 40 | 41 | def __init__(self, 42 | init_pos=(1, 1), resolution=1, 43 | move_speed=20, turn_speed=13, 44 | map_size=10, height=480, width=480, init_pos_distance_to_target=None): 45 | 46 | assert map_size > 5, "map_size must be gte 5" 47 | 48 | # do not change 49 | init_dir = (1.0, 0.0) 50 | init_plane = (0.0, 0.66) 51 | 52 | block_types = { 53 | 0: { 54 | "pass_through": True, 55 | "color": None 56 | }, 57 | 1: { 58 | "pass_through": False, 59 | "color": (255, 255, 255) 60 | }, 61 | 2: { 62 | "pass_through": False, 63 | "color": (255, 100, 100) 64 | } 65 | } 66 | actions = { 67 | "forward": K_w, 68 | "left": K_a, 69 | "right": K_d, 70 | "backward": K_s 71 | } 72 | 73 | PyGameWrapper.__init__(self, width, height, actions=actions) 74 | 75 | RayCastPlayer.__init__(self, None, 76 | init_pos, init_dir, width, height, resolution, 77 | move_speed, turn_speed, init_plane, actions, block_types) 78 | 79 | if init_pos_distance_to_target is None: 80 | init_pos_distance_to_target = map_size * map_size 81 | self.init_pos_distance_to_target = max(1, init_pos_distance_to_target) 82 | self.init_pos = np.array([init_pos], dtype=np.float32) 83 | self.init_dir = np.array([init_dir], dtype=np.float32) 84 | self.init_plane = np.array([init_plane], dtype=np.float32) 85 | 86 | self.obj_loc = None 87 | self.map_size = map_size 88 | self.is_game_over = False 89 | 90 | def _make_maze(self, complexity=0.75, density=0.75): 91 | """ 92 | ty wikipedia? 93 | """ 94 | dim = int(np.floor(self.map_size / 2) * 2 + 1) 95 | shape = (dim, dim) 96 | 97 | complexity = int(complexity * (5 * (shape[0] + shape[1]))) 98 | density = int(density * (shape[0] // 2 * shape[1] // 2)) 99 | 100 | # Build actual maze 101 | Z = np.zeros(shape, dtype=bool) 102 | # Fill borders 103 | Z[0, :] = Z[-1, :] = 1 104 | Z[:, 0] = Z[:, -1] = 1 105 | # Make isles 106 | for i in range(density): 107 | x = self.rng.random_integers(0, shape[1] // 2) * 2 108 | y = self.rng.random_integers(0, shape[0] // 2) * 2 109 | 110 | Z[y, x] = 1 111 | for j in range(complexity): 112 | neighbours = [] 113 | if x > 1: 114 | neighbours.append((y, x - 2)) 115 | if x < shape[1] - 2: 116 | neighbours.append((y, x + 2)) 117 | if y > 1: 118 | neighbours.append((y - 2, x)) 119 | if y < shape[0] - 2: 120 | neighbours.append((y + 2, x)) 121 | if len(neighbours): 122 | y_, x_ = neighbours[ 123 | self.rng.random_integers( 124 | 0, len(neighbours) - 1)] 125 | if Z[y_, x_] == 0: 126 | Z[y_, x_] = 1 127 | Z[y_ + (y - y_) // 2, x_ + (x - x_) // 2] = 1 128 | x, y = x_, y_ 129 | 130 | return Z.astype(int) 131 | 132 | def getGameState(self): 133 | """ 134 | 135 | Returns 136 | ------- 137 | 138 | None 139 | Does not have a non-visual representation of game state. 140 | Would be possible to return the location of the maze end. 141 | 142 | """ 143 | return None 144 | 145 | def getScore(self): 146 | return self.score 147 | 148 | def game_over(self): 149 | return self.is_game_over 150 | 151 | def getFiltredPositions(self, pos_input, pos_list, wall_list): 152 | pos_check = pos_input['pos'] 153 | if self.map_[pos_check[0], pos_check[1]] == 0: 154 | for y, x in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]: 155 | if self.map_[pos_check[0] + y, pos_check[1] + x] == 0: 156 | # aile 157 | if not any(it for it in pos_list if it['pos'][0] == pos_check[0] + y and it['pos'][1] == pos_check[1] + x): 158 | pos_list.append({ 159 | 'pos': [pos_check[0] + y, pos_check[1] + x], 160 | 'dist': pos_input['dist'] + (0 if (x == 0 and y == 0) else 1), 161 | 'checked': (x == 0 and y == 0) 162 | }) 163 | else: 164 | for it in pos_list: 165 | if it['pos'][0] == pos_check[0] + y and it['pos'][1] == pos_check[1] + x: 166 | it['checked'] = True 167 | break 168 | else: 169 | # wall 170 | if not any(it for it in wall_list if it['pos'][0] == pos_check[0] + y and it['pos'][1] == pos_check[1] + x): 171 | wall_list.append({ 172 | 'pos': [pos_check[0] + y, pos_check[1] + x], 173 | 'dist': pos_input['dist'] + (0 if (x == 0 and y == 0) else 1) 174 | }) 175 | 176 | 177 | def init(self): 178 | self.score = 0 #reset score 179 | self.is_game_over = False 180 | self.pos = np.copy(self.init_pos) 181 | self.dir = np.copy(self.init_dir) 182 | self.plane = np.copy(self.init_plane) 183 | 184 | self.map_ = self._make_maze() 185 | 186 | pos_list = [] 187 | wall_list = [] 188 | check_list = [] 189 | pos_input = { 190 | 'pos': self.pos.astype(np.int)[0], 191 | 'dist': 0, 192 | 'checked': False 193 | } 194 | pos_list.append(pos_input) 195 | check_list.append(pos_input) 196 | while len(check_list): 197 | for pos_each in check_list: 198 | self.getFiltredPositions(pos_each, pos_list, wall_list) 199 | check_list = [it for it in pos_list if not it['checked']] 200 | 201 | 202 | available_positions = [] 203 | for y in range(self.map_size + 1): 204 | for x in range(self.map_size + 1): 205 | # in a wall 206 | if self.map_[y, x] == 1: 207 | # check access to this point 208 | if any(it for it in wall_list if it['dist'] <= self.init_pos_distance_to_target and it['pos'][0] == y and it['pos'][1] == x): 209 | available_positions.append([y,x]) 210 | 211 | 212 | self.obj_loc = np.array([available_positions[self.rng.randint(0, high=len(available_positions))]]) 213 | self.map_[self.obj_loc[0][0], self.obj_loc[0][1]] = 2 214 | 215 | if self.angle_to_obj_rad() < 1.5: 216 | # turn away from target at init state 217 | self.dir *= -1.0 218 | self.plane *= -1.0 219 | 220 | def reset(self): 221 | self.init() 222 | 223 | def normalize(self, vector): 224 | norm = math.sqrt(vector[0][0] ** 2 + vector[0][1] ** 2) 225 | vector[0][0] /= norm 226 | vector[0][1] /= norm 227 | return vector 228 | 229 | def step(self, dt): 230 | self.screen.fill((0, 0, 0)) 231 | pygame.draw.rect(self.screen, (92, 92, 92), 232 | (0, self.height / 2, self.width, self.height)) 233 | 234 | if not self.is_game_over: 235 | self.score += self.rewards["tick"] 236 | 237 | self._handle_player_events(dt) 238 | 239 | c, t, b, col = self.draw() 240 | 241 | for i in range(len(c)): 242 | color = (col[i][0], col[i][1], col[i][2]) 243 | p0 = (c[i], t[i]) 244 | p1 = (c[i], b[i]) 245 | 246 | pygame.draw.line(self.screen, color, p0, p1, self.resolution) 247 | 248 | dist = np.sqrt(np.sum((self.pos[0] - (self.obj_loc[0] + 0.5))**2.0)) 249 | # Close to target object and in sight 250 | if dist < 1.1 and self.angle_to_obj_rad() < 0.8: 251 | self.score += self.rewards["win"] 252 | self.is_game_over = True 253 | 254 | def angle_to_obj_rad(self): 255 | dir_to_loc = (self.obj_loc + 0.5) - self.pos 256 | dir_to_loc = self.normalize(dir_to_loc) 257 | dir_norm = self.normalize(np.copy(self.dir)) 258 | angle_rad = np.arccos(np.dot(dir_to_loc[0], dir_norm[0])) 259 | return angle_rad 260 | 261 | if __name__ == "__main__": 262 | import numpy as np 263 | 264 | fps = 60 265 | pygame.init() 266 | 267 | game = RaycastMaze( 268 | height=256, 269 | width=256, 270 | map_size=10 271 | ) 272 | 273 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 274 | game.clock = pygame.time.Clock() 275 | game.rng = np.random.RandomState(24) 276 | game.init() 277 | 278 | while True: 279 | dt = game.clock.tick_busy_loop(fps) 280 | 281 | if game.game_over(): 282 | print("Game over!") 283 | print("Resetting!") 284 | game.reset() 285 | 286 | game.step(dt) 287 | 288 | pygame.display.update() 289 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/pixelcopter.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | 4 | #import .base 5 | from .base.pygamewrapper import PyGameWrapper 6 | 7 | import pygame 8 | from pygame.constants import K_w, K_s 9 | from .utils.vec2d import vec2d 10 | 11 | 12 | class Block(pygame.sprite.Sprite): 13 | 14 | def __init__(self, pos_init, speed, SCREEN_WIDTH, SCREEN_HEIGHT): 15 | pygame.sprite.Sprite.__init__(self) 16 | 17 | self.pos = vec2d(pos_init) 18 | 19 | self.width = int(SCREEN_WIDTH * 0.1) 20 | self.height = int(SCREEN_HEIGHT * 0.2) 21 | self.speed = speed 22 | 23 | self.SCREEN_WIDTH = SCREEN_WIDTH 24 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 25 | 26 | image = pygame.Surface((self.width, self.height)) 27 | image.fill((0, 0, 0, 0)) 28 | image.set_colorkey((0, 0, 0)) 29 | 30 | pygame.draw.rect( 31 | image, 32 | (120, 240, 80), 33 | (0, 0, self.width, self.height), 34 | 0 35 | ) 36 | 37 | self.image = image 38 | self.rect = self.image.get_rect() 39 | self.rect.center = pos_init 40 | 41 | def update(self, dt): 42 | self.pos.x -= self.speed * dt 43 | 44 | self.rect.center = (self.pos.x, self.pos.y) 45 | 46 | 47 | class HelicopterPlayer(pygame.sprite.Sprite): 48 | 49 | def __init__(self, speed, SCREEN_WIDTH, SCREEN_HEIGHT): 50 | pygame.sprite.Sprite.__init__(self) 51 | 52 | pos_init = (int(SCREEN_WIDTH * 0.35), SCREEN_HEIGHT / 2) 53 | self.pos = vec2d(pos_init) 54 | self.speed = speed 55 | self.climb_speed = speed * -0.875 # -0.0175 56 | self.fall_speed = speed * 0.09 # 0.0019 57 | self.momentum = 0 58 | 59 | self.width = SCREEN_WIDTH * 0.05 60 | self.height = SCREEN_HEIGHT * 0.05 61 | 62 | image = pygame.Surface((self.width, self.height)) 63 | image.fill((0, 0, 0, 0)) 64 | image.set_colorkey((0, 0, 0)) 65 | 66 | pygame.draw.rect( 67 | image, 68 | (255, 255, 255), 69 | (0, 0, self.width, self.height), 70 | 0 71 | ) 72 | 73 | self.image = image 74 | self.rect = self.image.get_rect() 75 | self.rect.center = pos_init 76 | 77 | def update(self, is_climbing, dt): 78 | self.momentum += (self.climb_speed if is_climbing else self.fall_speed) * dt 79 | self.momentum *= 0.99 80 | self.pos.y += self.momentum 81 | 82 | self.rect.center = (self.pos.x, self.pos.y) 83 | 84 | 85 | class Terrain(pygame.sprite.Sprite): 86 | 87 | def __init__(self, pos_init, speed, SCREEN_WIDTH, SCREEN_HEIGHT): 88 | pygame.sprite.Sprite.__init__(self) 89 | 90 | self.pos = vec2d(pos_init) 91 | self.speed = speed 92 | self.width = int(SCREEN_WIDTH * 0.1) 93 | 94 | image = pygame.Surface((self.width, SCREEN_HEIGHT * 1.5)) 95 | image.fill((0, 0, 0, 0)) 96 | image.set_colorkey((0, 0, 0)) 97 | 98 | color = (120, 240, 80) 99 | 100 | # top rect 101 | pygame.draw.rect( 102 | image, 103 | color, 104 | (0, 0, self.width, SCREEN_HEIGHT * 0.5), 105 | 0 106 | ) 107 | 108 | # bot rect 109 | pygame.draw.rect( 110 | image, 111 | color, 112 | (0, SCREEN_HEIGHT * 1.05, self.width, SCREEN_HEIGHT * 0.5), 113 | 0 114 | ) 115 | 116 | self.image = image 117 | self.rect = self.image.get_rect() 118 | self.rect.center = pos_init 119 | 120 | def update(self, dt): 121 | self.pos.x -= self.speed * dt 122 | self.rect.center = (self.pos.x, self.pos.y) 123 | 124 | 125 | class Pixelcopter(PyGameWrapper): 126 | """ 127 | Parameters 128 | ---------- 129 | width : int 130 | Screen width. 131 | 132 | height : int 133 | Screen height, recommended to be same dimension as width. 134 | 135 | """ 136 | 137 | def __init__(self, width=48, height=48): 138 | actions = { 139 | "up": K_w 140 | } 141 | 142 | PyGameWrapper.__init__(self, width, height, actions=actions) 143 | 144 | self.is_climbing = False 145 | self.speed = 0.0004 * width 146 | 147 | def _handle_player_events(self): 148 | self.is_climbing = False 149 | 150 | for event in pygame.event.get(): 151 | if event.type == pygame.QUIT: 152 | pygame.quit() 153 | sys.exit() 154 | 155 | if event.type == pygame.KEYDOWN: 156 | key = event.key 157 | if key == self.actions['up']: 158 | self.is_climbing = True 159 | 160 | def getGameState(self): 161 | """ 162 | Gets a non-visual state representation of the game. 163 | 164 | Returns 165 | ------- 166 | 167 | dict 168 | * player y position. 169 | * player velocity. 170 | * player distance to floor. 171 | * player distance to ceiling. 172 | * next block x distance to player. 173 | * next blocks top y location, 174 | * next blocks bottom y location. 175 | 176 | See code for structure. 177 | 178 | """ 179 | 180 | min_dist = 999 181 | min_block = None 182 | for b in self.block_group: # Groups do not return in order 183 | dist_to = b.pos.x - self.player.pos.x 184 | if dist_to > 0 and dist_to < min_dist: 185 | min_block = b 186 | min_dist = dist_to 187 | 188 | current_terrain = pygame.sprite.spritecollide( 189 | self.player, self.terrain_group, False)[0] 190 | state = { 191 | "player_y": self.player.pos.y, 192 | "player_vel": self.player.momentum, 193 | "player_dist_to_ceil": self.player.pos.y - (current_terrain.pos.y - self.height * 0.25), 194 | "player_dist_to_floor": (current_terrain.pos.y + self.height * 0.25) - self.player.pos.y, 195 | "next_gate_dist_to_player": min_dist, 196 | "next_gate_block_top": min_block.pos.y, 197 | "next_gate_block_bottom": min_block.pos.y + min_block.height 198 | } 199 | 200 | return state 201 | 202 | def getScreenDims(self): 203 | return self.screen_dim 204 | 205 | def getActions(self): 206 | return self.actions.values() 207 | 208 | def getScore(self): 209 | return self.score 210 | 211 | def game_over(self): 212 | return self.lives <= 0.0 213 | 214 | def init(self): 215 | self.score = 0.0 216 | self.lives = 1.0 217 | 218 | self.player = HelicopterPlayer( 219 | self.speed, 220 | self.width, 221 | self.height 222 | ) 223 | 224 | self.player_group = pygame.sprite.Group() 225 | self.player_group.add(self.player) 226 | 227 | self.block_group = pygame.sprite.Group() 228 | self._add_blocks() 229 | 230 | self.terrain_group = pygame.sprite.Group() 231 | self._add_terrain(0, self.width * 4) 232 | 233 | def _add_terrain(self, start, end): 234 | w = int(self.width * 0.1) 235 | # each block takes up 10 units. 236 | steps = range(start + int(w / 2), end + int(w / 2), w) 237 | y_jitter = [] 238 | 239 | freq = 4.5 / self.width + self.rng.uniform(-0.01, 0.01) 240 | for step in steps: 241 | jitter = (self.height * 0.125) * \ 242 | math.sin(freq * step + self.rng.uniform(0.0, 0.5)) 243 | y_jitter.append(jitter) 244 | 245 | y_pos = [int((self.height / 2.0) + y_jit) for y_jit in y_jitter] 246 | 247 | for i in range(0, len(steps)): 248 | self.terrain_group.add(Terrain( 249 | (steps[i], y_pos[i]), 250 | self.speed, 251 | self.width, 252 | self.height 253 | ) 254 | ) 255 | 256 | def _add_blocks(self): 257 | x_pos = self.rng.randint(self.width, int(self.width * 1.5)) 258 | y_pos = self.rng.randint( 259 | int(self.height * 0.25), 260 | int(self.height * 0.75) 261 | ) 262 | self.block_group.add( 263 | Block( 264 | (x_pos, y_pos), 265 | self.speed, 266 | self.width, 267 | self.height 268 | ) 269 | ) 270 | 271 | def reset(self): 272 | self.init() 273 | 274 | def step(self, dt): 275 | 276 | self.screen.fill((0, 0, 0)) 277 | self._handle_player_events() 278 | 279 | self.score += self.rewards["tick"] 280 | 281 | self.player.update(self.is_climbing, dt) 282 | self.block_group.update(dt) 283 | self.terrain_group.update(dt) 284 | 285 | hits = pygame.sprite.spritecollide( 286 | self.player, self.block_group, False) 287 | for creep in hits: 288 | self.lives -= 1 289 | 290 | hits = pygame.sprite.spritecollide( 291 | self.player, self.terrain_group, False) 292 | for t in hits: 293 | if self.player.pos.y - self.player.height <= t.pos.y - self.height * 0.25: 294 | self.lives -= 1 295 | 296 | if self.player.pos.y >= t.pos.y + self.height * 0.25: 297 | self.lives -= 1 298 | 299 | for b in self.block_group: 300 | if b.pos.x <= self.player.pos.x and len(self.block_group) == 1: 301 | self.score += self.rewards["positive"] 302 | self._add_blocks() 303 | 304 | if b.pos.x <= -b.width: 305 | b.kill() 306 | 307 | for t in self.terrain_group: 308 | if t.pos.x <= -t.width: 309 | self.score += self.rewards["positive"] 310 | t.kill() 311 | 312 | if self.player.pos.y < self.height * 0.125: # its above 313 | self.lives -= 1 314 | 315 | if self.player.pos.y > self.height * 0.875: # its below the lowest possible block 316 | self.lives -= 1 317 | 318 | if len(self.terrain_group) <= ( 319 | 10 + 3): # 10% per terrain, offset of ~2 with 1 extra 320 | self._add_terrain(self.width, self.width * 5) 321 | 322 | if self.lives <= 0.0: 323 | self.score += self.rewards["loss"] 324 | 325 | self.player_group.draw(self.screen) 326 | self.block_group.draw(self.screen) 327 | self.terrain_group.draw(self.screen) 328 | 329 | if __name__ == "__main__": 330 | import numpy as np 331 | 332 | pygame.init() 333 | game = Pixelcopter(width=256, height=256) 334 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 335 | game.clock = pygame.time.Clock() 336 | game.rng = np.random.RandomState(24) 337 | game.init() 338 | 339 | while True: 340 | if game.game_over(): 341 | game.reset() 342 | dt = game.clock.tick_busy_loop(30) 343 | game.step(dt) 344 | pygame.display.update() 345 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/raycast.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import time 3 | import os 4 | import sys 5 | 6 | import pygame 7 | import numpy as np 8 | from pygame.constants import K_w, K_a, K_d, K_s 9 | 10 | import copy 11 | 12 | class RayCastPlayer(): 13 | """ 14 | Loosely based on code from Lode's `Computer Graphics Tutorial`_. 15 | 16 | .. _Computer Graphics Tutorial: http://lodev.org/cgtutor/raycasting.html 17 | 18 | Takes input from key presses and traverses a map 19 | """ 20 | 21 | def __init__(self, map_, init_pos, init_dir, 22 | width, height, resolution, move_speed, 23 | turn_speed, plane, actions, block_types): 24 | 25 | self.actions = actions 26 | 27 | self.map_ = map_ 28 | self.width = width 29 | self.height = height 30 | 31 | self.pos = np.array([init_pos], dtype=np.float32) 32 | self.dir = np.array([init_dir], dtype=np.float32) 33 | self.plane = np.array([plane], dtype=np.float32) 34 | 35 | self.resolution = resolution 36 | self.move_speed = move_speed 37 | self.turn_speed = turn_speed 38 | 39 | self.eps = 1e-7 40 | 41 | self.block_types = block_types 42 | 43 | def _handle_player_events(self, dt): 44 | dt = dt / 1000.0 45 | for event in pygame.event.get(): 46 | if event.type == pygame.QUIT: 47 | pygame.quit() 48 | sys.exit() 49 | 50 | if event.type == pygame.KEYDOWN: 51 | key = event.key 52 | 53 | new_location = self.pos 54 | 55 | if key == self.actions["forward"]: 56 | new_location = self.pos + self.dir * self.move_speed * dt 57 | 58 | if key == self.actions["backward"]: 59 | new_location = self.pos - self.dir * self.move_speed * dt 60 | 61 | new_location = new_location.astype(int) 62 | 63 | newX, newY = new_location[0, :] 64 | 65 | if newX < self.map_.shape[0] and newY < self.map_.shape[1]: 66 | new_map = self.map_[newX, newY] 67 | 68 | if self.block_types[new_map]["pass_through"]: 69 | 70 | if key == self.actions["forward"]: 71 | self.pos[0, 0] += self.dir[0, 0] * \ 72 | self.move_speed * dt 73 | self.pos[0, 1] += self.dir[0, 1] * \ 74 | self.move_speed * dt 75 | 76 | if key == self.actions["backward"]: 77 | self.pos[0, 0] -= self.dir[0, 0] * \ 78 | self.move_speed * dt 79 | self.pos[0, 1] -= self.dir[0, 1] * \ 80 | self.move_speed * dt 81 | 82 | if key == self.actions["right"]: 83 | X_TURN = np.cos(self.turn_speed * dt) 84 | Y_TURN = np.sin(self.turn_speed * dt) 85 | 86 | _dirX = self.dir[0, 0] * X_TURN - self.dir[0, 1] * Y_TURN 87 | _dirY = self.dir[0, 0] * Y_TURN + self.dir[0, 1] * X_TURN 88 | 89 | _planeX = self.plane[0, 0] * \ 90 | X_TURN - self.plane[0, 1] * Y_TURN 91 | _planeY = self.plane[0, 0] * \ 92 | Y_TURN + self.plane[0, 1] * X_TURN 93 | 94 | self.dir[0, 0] = _dirX 95 | self.dir[0, 1] = _dirY 96 | 97 | self.plane[0, 0] = _planeX 98 | self.plane[0, 1] = _planeY 99 | 100 | if key == self.actions["left"]: 101 | X_INV_TURN = np.cos(-self.turn_speed * dt) 102 | Y_INV_TURN = np.sin(-self.turn_speed * dt) 103 | 104 | _dirX = self.dir[0, 0] * X_INV_TURN - \ 105 | self.dir[0, 1] * Y_INV_TURN 106 | _dirY = self.dir[0, 0] * Y_INV_TURN + \ 107 | self.dir[0, 1] * X_INV_TURN 108 | 109 | _planeX = self.plane[0, 0] * X_INV_TURN - \ 110 | self.plane[0, 1] * Y_INV_TURN 111 | _planeY = self.plane[0, 0] * Y_INV_TURN + \ 112 | self.plane[0, 1] * X_INV_TURN 113 | 114 | self.dir[0, 0] = _dirX 115 | self.dir[0, 1] = _dirY 116 | 117 | self.plane[0, 0] = _planeX 118 | self.plane[0, 1] = _planeY 119 | 120 | 121 | def draw(self): 122 | #N = width/resolution 123 | # N,2 124 | cameraX = np.arange( 125 | 0.0, 126 | self.width, 127 | self.resolution).astype( 128 | np.float32)[ 129 | :, 130 | np.newaxis] 131 | cameraX = 2.0 * cameraX / float(self.width) - 1.0 132 | 133 | # set the rayPos to the players current position 134 | ray_pos = np.tile(self.pos, [cameraX.shape[0], 1]) # N,2 135 | 136 | # ray direction 137 | ray_dir = self.dir + self.plane * cameraX # N,2 138 | 139 | # which box of the map we're in 140 | map_ = ray_pos.astype(int) 141 | 142 | ray_pow = np.power(ray_dir, 2.0) + self.eps 143 | ray_div = ray_pow[:, 0] / (ray_pow[:, 1]) 144 | delta_dist = np.sqrt( 145 | 1.0 + np.array([1.0 / (ray_div), ray_div])).T # N,2 146 | 147 | # N,2 148 | step = np.ones(ray_dir.shape).astype(int) 149 | step[ray_dir[:, 0] < 0, 0] = -1 150 | step[ray_dir[:, 1] < 0, 1] = -1 151 | 152 | # N,2 153 | side_dist = (map_ + 1.0 - ray_pos) * delta_dist 154 | _value = (ray_pos - map_) * delta_dist 155 | 156 | side_dist[ray_dir[:, 0] < 0, 0] = _value[ray_dir[:, 0] < 0, 0] 157 | side_dist[ray_dir[:, 1] < 0, 1] = _value[ray_dir[:, 1] < 0, 1] 158 | 159 | side_dist, delta_dist, map_, side = self._DDA( 160 | side_dist, delta_dist, map_, step) 161 | 162 | perpWallDistX = (map_[:, 0] - ray_pos[:, 0] + (1.0 - step[:, 0]) / 2.0) 163 | perpWallDistX = perpWallDistX / (ray_dir[:, 0] + self.eps) 164 | perpWallDistX = perpWallDistX[:, np.newaxis] 165 | 166 | perpWallDistY = (map_[:, 1] - ray_pos[:, 1] + (1.0 - step[:, 1]) / 2.0) 167 | perpWallDistY = perpWallDistY / (ray_dir[:, 1] + self.eps) 168 | perpWallDistY = perpWallDistY[:, np.newaxis] 169 | 170 | perpWallDist = perpWallDistY 171 | perpWallDist[side == 0] = perpWallDistX[side == 0] 172 | 173 | lineHeights = (self.height / (perpWallDist + self.eps)).astype(int) 174 | 175 | tops = -(lineHeights) / 2.0 + self.height / 2.0 176 | tops[tops < 0] = 0.0 177 | tops = tops.astype(int) 178 | 179 | bottoms = lineHeights / 2.0 + self.height / 2.0 180 | bottoms[bottoms >= self.height] = self.height - 1 181 | bottoms = bottoms.astype(int) 182 | 183 | visible_blocks = self.map_[map_[:, 0], map_[:, 1]] 184 | coloring = np.ones((bottoms.shape[0], 3)) * 255.0 185 | 186 | for k in self.block_types.keys(): 187 | if self.block_types[k] is not None: 188 | c = self.block_types[k]["color"] 189 | sel = visible_blocks == k 190 | coloring[sel] = np.tile(c, [bottoms.shape[0], 1])[sel] 191 | 192 | shading = np.abs(perpWallDist * 15) * 1.5 193 | coloring = coloring - shading 194 | coloring = np.clip(coloring, 0, 255) 195 | coloring[(side == 1.0).flatten(), :] *= 0.65 # lighting apparently 196 | 197 | cameraX = np.arange(0, self.width, self.resolution) 198 | returns = [cameraX, tops, bottoms, coloring] 199 | 200 | return [r.astype(int) for r in returns] 201 | 202 | def _DDA(self, side_dist, delta_dist, map_, step): 203 | # tested against for-loop version using line_profiler 204 | # for-loop take about 0.005968s per call 205 | # this version takes 0.000416s per call 206 | hits = np.zeros((map_.shape[0], 1)) 207 | side = np.zeros((map_.shape[0], 1)) 208 | 209 | while np.sum(hits) < side_dist.shape[0]: 210 | # only update values that havent hit a wall. So are 0 still. 211 | 212 | update_mask = np.logical_not(hits).astype(np.bool) 213 | 214 | # 1 => 1, 0 215 | # 0 => 0, 1 216 | mask = (side_dist[:, 0] < side_dist[:, 1])[:, np.newaxis] 217 | 218 | sel = (update_mask & (mask == True)).flatten() 219 | side_dist[sel, 0] += delta_dist[sel, 0] 220 | map_[sel, 0] += step[sel, 0] 221 | side[sel] = np.zeros(side.shape)[sel] 222 | 223 | sel = (update_mask & (mask == False)).flatten() 224 | side_dist[sel, 1] += delta_dist[sel, 1] 225 | map_[sel, 1] += step[sel, 1] 226 | side[sel] = np.ones(side.shape)[sel] 227 | 228 | # once it becomes 1 it never goes back to 0. 229 | hits = np.logical_or( 230 | hits, (self.map_[ 231 | map_[ 232 | :, 0], map_[ 233 | :, 1]] > 0)[ 234 | :, np.newaxis]) 235 | 236 | return side_dist, delta_dist, map_, side 237 | 238 | 239 | def make_map(dim): 240 | map_grid = np.zeros((dim, dim)) 241 | map_grid[0, :] = 1.0 242 | map_grid[:, 0] = 1.0 243 | map_grid[:, -1] = 1.0 244 | map_grid[-1, :] = 1.0 245 | 246 | return map_grid 247 | 248 | 249 | def make_box(grid, p0, p1, fill=0, isFilled=True): 250 | x0, y0 = p0 251 | x1, y1 = p1 252 | 253 | if isFilled: 254 | grid[x0:x1, y0:y1] = fill 255 | else: 256 | grid[x0, y0:y1 + 1] = fill 257 | grid[x1, y0:y1 + 1] = fill 258 | grid[x0:x1, y0] = fill 259 | grid[x0:x1, y1] = fill 260 | 261 | return grid 262 | 263 | if __name__ == "__main__": 264 | map_grid = make_map(15) 265 | 266 | block_types = { 267 | 0: { 268 | "pass_through": True, 269 | "color": None 270 | }, 271 | 272 | 1: { 273 | "pass_through": False, 274 | "color": (255, 255, 255) 275 | }, 276 | 277 | 2: { 278 | "pass_through": False, 279 | "color": (220, 100, 100) 280 | }, 281 | 282 | 3: { 283 | "pass_through": False, 284 | "color": (100, 220, 100) 285 | }, 286 | 287 | 4: { 288 | "pass_through": False, 289 | "color": (100, 100, 220) 290 | } 291 | } 292 | 293 | map_grid = make_box(map_grid, (5, 5), (9, 9), fill=2, isFilled=False) 294 | map_grid = make_box(map_grid, (8, 8), (14, 14), fill=3, isFilled=True) 295 | map_grid = make_box(map_grid, (1, 2), (3, 9), fill=4, isFilled=False) 296 | map_grid = make_box(map_grid, (11, 6), (12, 11), fill=0, isFilled=True) 297 | map_grid = make_box(map_grid, (6, 11), (12, 12), fill=0, isFilled=True) 298 | map_grid = make_box(map_grid, (2, 6), (7, 7), fill=0, isFilled=True) 299 | 300 | map_grid[map_grid > 0] = np.random.randint( 301 | 2, high=5, size=map_grid[map_grid > 0].shape) 302 | 303 | init_dir = (1.0, 0.0) 304 | init_pos = (1, 1) 305 | width = 128 306 | height = 128 307 | resolution = 1 308 | move_speed = 15 309 | turn_speed = 10.5 310 | plane = (0.0, 0.66) 311 | 312 | actions = { 313 | "forward": K_w, 314 | "left": K_a, 315 | "right": K_d, 316 | "backward": K_s 317 | } 318 | 319 | rc = RayCastPlayer( 320 | map_grid, 321 | init_pos, 322 | init_dir, 323 | width, 324 | height, 325 | resolution, 326 | move_speed, 327 | turn_speed, 328 | plane, 329 | actions, 330 | block_types 331 | ) 332 | pygame.init() 333 | 334 | screen = pygame.display.set_mode((width, height), 0, 24) 335 | clock = pygame.time.Clock() 336 | 337 | while True: 338 | dt = clock.tick(60) 339 | screen.fill((0, 0, 0)) 340 | 341 | pygame.draw.rect(screen, (92, 92, 92), (0, height / 2, width, height)) 342 | 343 | rc._handle_player_events(dt) 344 | 345 | c, t, b, col = rc.draw() 346 | 347 | for i in range(len(c)): 348 | pygame.draw.line(screen, (col[i][0], col[i][1], col[i][2]), (c[ 349 | i], t[i]), (c[i], b[i]), rc.resolution) 350 | 351 | pygame.display.update() 352 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/snake.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import sys 3 | import math 4 | 5 | #import .base 6 | from .base.pygamewrapper import PyGameWrapper 7 | 8 | from pygame.constants import K_w, K_a, K_s, K_d 9 | from .utils.vec2d import vec2d 10 | from .utils import percent_round_int 11 | 12 | 13 | class Food(pygame.sprite.Sprite): 14 | 15 | def __init__(self, pos_init, width, color, 16 | SCREEN_WIDTH, SCREEN_HEIGHT, rng): 17 | pygame.sprite.Sprite.__init__(self) 18 | 19 | self.pos = vec2d(pos_init) 20 | self.color = color 21 | 22 | self.SCREEN_WIDTH = SCREEN_WIDTH 23 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 24 | self.width = width 25 | self.rng = rng 26 | 27 | image = pygame.Surface((width, width)) 28 | image.fill((0, 0, 0, 0)) 29 | image.set_colorkey((0, 0, 0)) 30 | pygame.draw.rect( 31 | image, 32 | color, 33 | (0, 0, self.width, self.width), 34 | 0 35 | ) 36 | 37 | self.image = image 38 | self.rect = self.image.get_rect() 39 | self.rect.center = pos_init 40 | 41 | def new_position(self, snake): 42 | new_pos = snake.body[0].pos 43 | snake_body = [s.pos for s in snake.body] 44 | 45 | while (new_pos in snake_body): 46 | _x = self.rng.choice(range( 47 | self.width * 2, self.SCREEN_WIDTH - self.width * 2, self.width 48 | )) 49 | 50 | _y = self.rng.choice(range( 51 | self.width * 2, self.SCREEN_HEIGHT - self.width * 2, self.width 52 | )) 53 | 54 | new_pos = vec2d((_x, _y)) 55 | 56 | self.pos = new_pos 57 | self.rect.center = (self.pos.x, self.pos.y) 58 | 59 | def draw(self, screen): 60 | screen.blit(self.image, self.rect.center) 61 | 62 | 63 | class SnakeSegment(pygame.sprite.Sprite): 64 | 65 | def __init__(self, pos_init, width, height, color): 66 | pygame.sprite.Sprite.__init__(self) 67 | 68 | self.pos = vec2d(pos_init) 69 | self.color = color 70 | self.width = width 71 | self.height = height 72 | 73 | image = pygame.Surface((width, height)) 74 | image.fill((0, 0, 0)) 75 | image.set_colorkey((0, 0, 0)) 76 | 77 | pygame.draw.rect( 78 | image, 79 | color, 80 | (0, 0, self.width, self.height), 81 | 0 82 | ) 83 | 84 | self.image = image 85 | # use half the size 86 | self.rect = pygame.Rect(pos_init, (self.width / 2, self.height / 2)) 87 | self.rect.center = pos_init 88 | 89 | def draw(self, screen): 90 | screen.blit(self.image, self.rect.center) 91 | 92 | 93 | # basically just holds onto all of them 94 | class SnakePlayer(): 95 | 96 | def __init__(self, speed, length, pos_init, width, 97 | color, SCREEN_WIDTH, SCREEN_HEIGHT): 98 | self.dir = vec2d((1, 0)) 99 | self.speed = speed 100 | self.pos = vec2d(pos_init) 101 | self.color = color 102 | self.width = width 103 | self.length = length 104 | self.body = [] 105 | self.update_head = True 106 | 107 | # build our body up 108 | for i in range(self.length): 109 | self.body.append( 110 | # makes a neat "zapping" in effect 111 | SnakeSegment( 112 | (self.pos.x - (width) * i, self.pos.y), 113 | self.width, 114 | self.width, 115 | tuple([c - 100 for c in self.color] 116 | ) if i == 0 else self.color 117 | ) 118 | ) 119 | # we dont add the first few because it cause never actually hit it 120 | self.body_group = pygame.sprite.Group() 121 | self.head = self.body[0] 122 | 123 | def update(self, dt): 124 | for i in range(self.length - 1, 0, -1): 125 | scale = 0.1 126 | 127 | self.body[i].pos = vec2d(( 128 | ((1.0 - scale) * 129 | self.body[i - 1].pos.x + scale * self.body[i].pos.x), 130 | ((1.0 - scale) * 131 | self.body[i - 1].pos.y + scale * self.body[i].pos.y) 132 | )) 133 | 134 | self.body[i].rect.center = (self.body[i].pos.x, self.body[i].pos.y) 135 | 136 | self.head.pos.x += self.dir.x * self.speed * dt 137 | self.head.pos.y += self.dir.y * self.speed * dt 138 | self.update_hitbox() 139 | 140 | def update_hitbox(self): 141 | # need to make a small rect pointing the direction the snake is 142 | # instead of counting the entire head square as a hit box, since 143 | # the head touchs the body on turns and causes game overs. 144 | 145 | x = self.head.pos.x 146 | y = self.head.pos.y 147 | 148 | if self.dir.x == 0: 149 | w = self.width 150 | h = percent_round_int(self.width, 0.25) 151 | 152 | if self.dir.y == 1: 153 | y += percent_round_int(self.width, 1.0) 154 | 155 | if self.dir.y == -1: 156 | y -= percent_round_int(self.width, 0.25) 157 | 158 | if self.dir.y == 0: 159 | w = percent_round_int(self.width, 0.25) 160 | h = self.width 161 | 162 | if self.dir.x == 1: 163 | x += percent_round_int(self.width, 1.0) 164 | 165 | if self.dir.x == -1: 166 | x -= percent_round_int(self.width, 0.25) 167 | 168 | if self.update_head: 169 | image = pygame.Surface((w, h)) 170 | image.fill((0, 0, 0)) 171 | image.set_colorkey((0, 0, 0)) 172 | 173 | pygame.draw.rect( 174 | image, 175 | (255, 0, 0), 176 | (0, 0, w, h), 177 | 0 178 | ) 179 | 180 | self.head.image = image 181 | self.head.rect = self.head.image.get_rect() 182 | self.update_head = False 183 | 184 | self.head.rect.center = (x, y) 185 | 186 | def grow(self): 187 | self.length += 1 188 | add = 100 if self.length % 2 == 0 else -100 189 | color = (self.color[0] + add, self.color[1], self.color[2] + add) 190 | last = self.body[-1].pos 191 | 192 | self.body.append( 193 | SnakeSegment( 194 | (last.x, last.y), # initially off screen? 195 | self.width, 196 | self.width, 197 | color 198 | ) 199 | ) 200 | if self.length > 3: # we cant actually hit another segment until this point. 201 | self.body_group.add(self.body[-1]) 202 | 203 | def draw(self, screen): 204 | for b in self.body[::-1]: 205 | b.draw(screen) 206 | 207 | 208 | class Snake(PyGameWrapper): 209 | """ 210 | Parameters 211 | ---------- 212 | width : int 213 | Screen width. 214 | 215 | height : int 216 | Screen height, recommended to be same dimension as width. 217 | 218 | init_length : int (default: 3) 219 | The starting number of segments the snake has. Do not set below 3 segments. Has issues with hitbox detection with the body for lower values. 220 | 221 | """ 222 | 223 | def __init__(self, 224 | width=64, 225 | height=64, 226 | init_length=3): 227 | 228 | actions = { 229 | "up": K_w, 230 | "left": K_a, 231 | "right": K_d, 232 | "down": K_s 233 | } 234 | 235 | PyGameWrapper.__init__(self, width, height, actions=actions) 236 | 237 | self.speed = percent_round_int(width, 0.45) 238 | 239 | self.player_width = percent_round_int(width, 0.05) 240 | self.food_width = percent_round_int(width, 0.09) 241 | self.player_color = (100, 255, 100) 242 | self.food_color = (255, 100, 100) 243 | 244 | self.INIT_POS = (width / 2, height / 2) 245 | self.init_length = init_length 246 | 247 | self.BG_COLOR = (25, 25, 25) 248 | 249 | def _handle_player_events(self): 250 | for event in pygame.event.get(): 251 | if event.type == pygame.QUIT: 252 | pygame.quit() 253 | sys.exit() 254 | 255 | if event.type == pygame.KEYDOWN: 256 | key = event.key 257 | 258 | #left = -1 259 | #right = 1 260 | #up = -1 261 | #down = 1 262 | 263 | if key == self.actions["left"] and self.player.dir.x != 1: 264 | self.player.dir = vec2d((-1, 0)) 265 | 266 | if key == self.actions["right"] and self.player.dir.x != -1: 267 | self.player.dir = vec2d((1, 0)) 268 | 269 | if key == self.actions["up"] and self.player.dir.y != 1: 270 | self.player.dir = vec2d((0, -1)) 271 | 272 | if key == self.actions["down"] and self.player.dir.y != -1: 273 | self.player.dir = vec2d((0, 1)) 274 | 275 | self.player.update_head = True 276 | 277 | def getGameState(self): 278 | """ 279 | 280 | Returns 281 | ------- 282 | 283 | dict 284 | * snake head x position. 285 | * snake head y position. 286 | * food x position. 287 | * food y position. 288 | * distance from head to each snake segment. 289 | 290 | See code for structure. 291 | 292 | """ 293 | 294 | state = { 295 | "snake_head_x": self.player.head.pos.x, 296 | "snake_head_y": self.player.head.pos.y, 297 | "food_x": self.food.pos.x, 298 | "food_y": self.food.pos.y, 299 | "snake_body": [], 300 | "snake_body_pos": [], 301 | } 302 | 303 | for s in self.player.body: 304 | dist = math.sqrt((self.player.head.pos.x - s.pos.x) 305 | ** 2 + (self.player.head.pos.y - s.pos.y)**2) 306 | state["snake_body"].append(dist) 307 | state["snake_body_pos"].append([s.pos.x, s.pos.y]) 308 | 309 | return state 310 | 311 | def getScore(self): 312 | return self.score 313 | 314 | def game_over(self): 315 | return self.lives == -1 316 | 317 | def init(self): 318 | """ 319 | Starts/Resets the game to its inital state 320 | """ 321 | 322 | self.player = SnakePlayer( 323 | self.speed, 324 | self.init_length, 325 | self.INIT_POS, 326 | self.player_width, 327 | self.player_color, 328 | self.width, 329 | self.height 330 | ) 331 | 332 | self.food = Food((0, 0), 333 | self.food_width, 334 | self.food_color, 335 | self.width, 336 | self.height, 337 | self.rng 338 | ) 339 | 340 | self.food.new_position(self.player) 341 | 342 | self.score = 0 343 | self.ticks = 0 344 | self.lives = 1 345 | 346 | def step(self, dt): 347 | """ 348 | Perform one step of game emulation. 349 | """ 350 | dt /= 1000.0 351 | 352 | self.ticks += 1 353 | self.screen.fill(self.BG_COLOR) 354 | self._handle_player_events() 355 | self.score += self.rewards["tick"] 356 | 357 | hit = pygame.sprite.collide_rect(self.player.head, self.food) 358 | if hit: # it hit 359 | self.score += self.rewards["positive"] 360 | self.player.grow() 361 | self.food.new_position(self.player) 362 | 363 | hits = pygame.sprite.spritecollide( 364 | self.player.head, self.player.body_group, False) 365 | if len(hits) > 0: 366 | self.lives = -1 367 | 368 | x_check = ( 369 | self.player.head.pos.x < 0) or ( 370 | self.player.head.pos.x + 371 | self.player_width / 372 | 2 > self.width) 373 | y_check = ( 374 | self.player.head.pos.y < 0) or ( 375 | self.player.head.pos.y + 376 | self.player_width / 377 | 2 > self.height) 378 | 379 | if x_check or y_check: 380 | self.lives = -1 381 | 382 | if self.lives <= 0.0: 383 | self.score += self.rewards["loss"] 384 | 385 | self.player.update(dt) 386 | 387 | self.player.draw(self.screen) 388 | self.food.draw(self.screen) 389 | 390 | 391 | if __name__ == "__main__": 392 | import numpy as np 393 | 394 | pygame.init() 395 | game = Snake(width=128, height=128) 396 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 397 | game.clock = pygame.time.Clock() 398 | game.rng = np.random.RandomState(24) 399 | game.init() 400 | 401 | while True: 402 | if game.game_over(): 403 | game.init() 404 | 405 | dt = game.clock.tick_busy_loop(30) 406 | game.step(dt) 407 | pygame.display.update() 408 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/ple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image # pillow 3 | import sys 4 | 5 | import pygame 6 | from .games.base.pygamewrapper import PyGameWrapper 7 | 8 | class PLE(object): 9 | """ 10 | ple.PLE( 11 | game, fps=30, 12 | frame_skip=1, num_steps=1, 13 | reward_values={}, force_fps=True, 14 | display_screen=False, add_noop_action=True, 15 | NOOP=K_F15, state_preprocessor=None, 16 | rng=24 17 | ) 18 | 19 | Main wrapper that interacts with games. 20 | Provides a similar interface to Arcade Learning Environment. 21 | 22 | Parameters 23 | ---------- 24 | game: Class from ple.games.base 25 | The game the PLE environment manipulates and maintains. 26 | 27 | fps: int (default: 30) 28 | The desired frames per second we want to run our game at. 29 | Typical settings are 30 and 60 fps. 30 | 31 | frame_skip: int (default: 1) 32 | The number of times we skip getting observations while 33 | repeat an action. 34 | 35 | num_steps: int (default: 1) 36 | The number of times we repeat an action. 37 | 38 | reward_values: dict 39 | This contains the rewards we wish to set give our agent based on 40 | different actions in game. The current defaults are as follows: 41 | 42 | .. code-block:: python 43 | 44 | rewards = { 45 | "positive": 1.0, 46 | "negative": -1.0, 47 | "tick": 0.0, 48 | "loss": -5.0, 49 | "win": 5.0 50 | } 51 | 52 | Tick is given to the agent at each game step. You can selectively 53 | adjust the rewards by passing a dictonary with the key you want to 54 | change. Eg. If we want to adjust the negative reward and the tick 55 | reward we would pass in the following: 56 | 57 | .. code-block:: python 58 | 59 | rewards = { 60 | "negative": -2.0, 61 | "tick": -0.01 62 | } 63 | 64 | Keep in mind that the tick is applied at each frame. If the game is 65 | running at 60fps the agent will get a reward of 60*tick. 66 | 67 | force_fps: bool (default: True) 68 | If False PLE delays between game.step() calls to ensure the fps is 69 | specified. If not PLE passes an elapsed time delta to ensure the 70 | game steps by an amount of time consistent with the specified fps. 71 | This is usally set to True as it allows the game to run as fast as 72 | possible which speeds up training. 73 | 74 | display_screen: bool (default: False) 75 | If we draw updates to the screen. Disabling this speeds up 76 | interation speed. This can be toggled to True during testing phases 77 | so you can observe the agents progress. 78 | 79 | add_noop_action: bool (default: True) 80 | This inserts the NOOP action specified as a valid move the agent 81 | can make. 82 | 83 | state_preprocessor: python function (default: None) 84 | Python function which takes a dict representing game state and 85 | returns a numpy array. 86 | 87 | rng: numpy.random.RandomState, int, array_like or None. (default: 24) 88 | Number generator which is used by PLE and the games. 89 | 90 | """ 91 | 92 | def __init__(self, 93 | game, fps=30, frame_skip=1, num_steps=1, 94 | reward_values={}, force_fps=True, display_screen=False, 95 | add_noop_action=True, state_preprocessor=None, rng=24): 96 | 97 | self.game = game 98 | self.fps = fps 99 | self.frame_skip = frame_skip 100 | self.NOOP = None 101 | self.num_steps = num_steps 102 | self.force_fps = force_fps 103 | self.display_screen = display_screen 104 | self.add_noop_action = add_noop_action 105 | 106 | self.last_action = [] 107 | self.action = [] 108 | self.previous_score = 0 109 | self.frame_count = 0 110 | 111 | # update the scores of games with values we pick 112 | if reward_values: 113 | self.game.adjustRewards(reward_values) 114 | 115 | 116 | if isinstance(self.game, PyGameWrapper): 117 | if isinstance(rng, np.random.RandomState): 118 | self.rng = rng 119 | else: 120 | self.rng = np.random.RandomState(rng) 121 | 122 | # some pygame games preload the images 123 | # to speed resetting and inits up. 124 | pygame.display.set_mode((1, 1), pygame.NOFRAME) 125 | else: 126 | # in order to use doom, install following https://github.com/openai/doom-py 127 | from .games.base.doomwrapper import DoomWrapper 128 | if isinstance(self.game, DoomWrapper): 129 | self.rng = rng 130 | 131 | self.game.setRNG(self.rng) 132 | self.init() 133 | 134 | self.state_preprocessor = state_preprocessor 135 | self.state_dim = None 136 | 137 | if self.state_preprocessor is not None: 138 | self.state_dim = self.game.getGameState() 139 | 140 | if self.state_dim is None: 141 | raise ValueError( 142 | "Asked to return non-visual state on game that does not support it!") 143 | else: 144 | self.state_dim = self.state_preprocessor(self.state_dim).shape 145 | 146 | if game.allowed_fps is not None and self.fps != game.allowed_fps: 147 | raise ValueError("Game requires %dfps, was given %d." % 148 | (game.allowed_fps, game.allowed_fps)) 149 | 150 | def _tick(self): 151 | """ 152 | Calculates the elapsed time between frames or ticks. 153 | """ 154 | if self.force_fps: 155 | return 1000.0 / self.fps 156 | else: 157 | return self.game.tick(self.fps) 158 | 159 | def init(self): 160 | """ 161 | Initializes the game. This depends on the game and could include 162 | doing things such as setting up the display, clock etc. 163 | 164 | This method should be explicitly called. 165 | """ 166 | self.game._setup() 167 | self.game.init() #this is the games setup/init 168 | 169 | def getActionSet(self): 170 | """ 171 | Gets the actions the game supports. Optionally inserts the NOOP 172 | action if PLE has add_noop_action set to True. 173 | 174 | Returns 175 | -------- 176 | 177 | list of pygame.constants 178 | The agent can simply select the index of the action 179 | to perform. 180 | 181 | """ 182 | actions = self.game.actions 183 | 184 | if (sys.version_info > (3, 0)): #python ver. 3 185 | if isinstance(actions, dict) or isinstance(actions, dict_values): 186 | actions = actions.values() 187 | else: 188 | if isinstance(actions, dict): 189 | actions = actions.values() 190 | 191 | actions = list(actions) #.values() 192 | #print (actions) 193 | #assert isinstance(actions, list), "actions is not a list" 194 | 195 | if self.add_noop_action: 196 | actions.append(self.NOOP) 197 | 198 | return actions 199 | 200 | def getFrameNumber(self): 201 | """ 202 | Gets the current number of frames the agent has seen 203 | since PLE was initialized. 204 | 205 | Returns 206 | -------- 207 | 208 | int 209 | 210 | """ 211 | 212 | return self.frame_count 213 | 214 | def game_over(self): 215 | """ 216 | Returns True if the game has reached a terminal state and 217 | False otherwise. 218 | 219 | This state is game dependent. 220 | 221 | Returns 222 | ------- 223 | 224 | bool 225 | 226 | """ 227 | 228 | return self.game.game_over() 229 | 230 | def score(self): 231 | """ 232 | Gets the score the agent currently has in game. 233 | 234 | Returns 235 | ------- 236 | 237 | int 238 | 239 | """ 240 | 241 | return self.game.getScore() 242 | 243 | def lives(self): 244 | """ 245 | Gets the number of lives the agent has left. Not all games have 246 | the concept of lives. 247 | 248 | Returns 249 | ------- 250 | 251 | int 252 | 253 | """ 254 | 255 | return self.game.lives 256 | 257 | def reset_game(self): 258 | """ 259 | Performs a reset of the games to a clean initial state. 260 | """ 261 | self.last_action = [] 262 | self.action = [] 263 | self.previous_score = 0.0 264 | self.game.reset() 265 | 266 | def getScreenRGB(self): 267 | """ 268 | Gets the current game screen in RGB format. 269 | 270 | Returns 271 | -------- 272 | numpy uint8 array 273 | Returns a numpy array with the shape (width, height, 3). 274 | 275 | 276 | """ 277 | 278 | return self.game.getScreenRGB() 279 | 280 | def getScreenGrayscale(self): 281 | """ 282 | Gets the current game screen in Grayscale format. Converts from RGB using relative lumiance. 283 | 284 | Returns 285 | -------- 286 | numpy uint8 array 287 | Returns a numpy array with the shape (width, height). 288 | 289 | 290 | """ 291 | frame = self.getScreenRGB() 292 | frame = 0.21 * frame[:, :, 0] + 0.72 * \ 293 | frame[:, :, 1] + 0.07 * frame[:, :, 2] 294 | frame = np.round(frame).astype(np.uint8) 295 | 296 | return frame 297 | 298 | def saveScreen(self, filename): 299 | """ 300 | Saves the current screen to png file. 301 | 302 | Parameters 303 | ---------- 304 | 305 | filename : string 306 | The path with filename to where we want the image saved. 307 | 308 | """ 309 | frame = Image.fromarray(self.getScreenRGB()) 310 | frame.save(filename) 311 | 312 | def getScreenDims(self): 313 | """ 314 | Gets the games screen dimensions. 315 | 316 | Returns 317 | ------- 318 | 319 | tuple of int 320 | Returns a tuple of the following format (screen_width, screen_height). 321 | """ 322 | return self.game.getScreenDims() 323 | 324 | def getGameStateDims(self): 325 | """ 326 | Gets the games non-visual state dimensions. 327 | 328 | Returns 329 | ------- 330 | 331 | tuple of int or None 332 | Returns a tuple of the state vectors shape or None if the game does not support it. 333 | """ 334 | return self.state_dim 335 | 336 | def getGameState(self): 337 | """ 338 | Gets a non-visual state representation of the game. 339 | 340 | This can include items such as player position, velocity, ball location and velocity etc. 341 | 342 | Returns 343 | ------- 344 | 345 | dict or None 346 | It returns a dict of game information. This greatly depends on the game in question and must be referenced against each game. 347 | If no state is available or supported None will be returned back. 348 | 349 | """ 350 | state = self.game.getGameState() 351 | if state is not None: 352 | if self.state_preprocessor is not None: 353 | return self.state_preprocessor(state) 354 | return state 355 | else: 356 | raise ValueError( 357 | "Was asked to return state vector for game that does not support it!") 358 | 359 | def act(self, action): 360 | """ 361 | Perform an action on the game. We lockstep frames with actions. If act is not called the game will not run. 362 | 363 | Parameters 364 | ---------- 365 | 366 | action : int 367 | The index of the action we wish to perform. The index usually corresponds to the index item returned by getActionSet(). 368 | 369 | Returns 370 | ------- 371 | 372 | int 373 | Returns the reward that the agent has accumlated while performing the action. 374 | 375 | """ 376 | return sum(self._oneStepAct(action) for i in range(self.frame_skip)) 377 | 378 | def _draw_frame(self): 379 | """ 380 | Decides if the screen will be drawn too 381 | """ 382 | 383 | self.game._draw_frame(self.display_screen) 384 | 385 | def _oneStepAct(self, action): 386 | """ 387 | Performs an action on the game. Checks if the game is over or if the provided action is valid based on the allowed action set. 388 | """ 389 | if self.game_over(): 390 | return 0.0 391 | 392 | if action not in self.getActionSet(): 393 | action = self.NOOP 394 | 395 | self._setAction(action) 396 | for i in range(self.num_steps): 397 | time_elapsed = self._tick() 398 | self.game.step(time_elapsed) 399 | self._draw_frame() 400 | 401 | self.frame_count += self.num_steps 402 | 403 | return self._getReward() 404 | 405 | def _setAction(self, action): 406 | """ 407 | Instructs the game to perform an action if its not a NOOP 408 | """ 409 | 410 | if action is not None: 411 | self.game._setAction(action, self.last_action) 412 | 413 | self.last_action = action 414 | 415 | def _getReward(self): 416 | """ 417 | Returns the reward the agent has gained as the difference between the last action and the current one. 418 | """ 419 | reward = self.game.getScore() - self.previous_score 420 | self.previous_score = self.game.getScore() 421 | 422 | return reward 423 | -------------------------------------------------------------------------------- /04_ReinforcementLearning/ple/games/pong.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | 4 | import pygame 5 | from pygame.constants import K_w, K_s 6 | from ple.games.utils.vec2d import vec2d 7 | from ple.games.utils import percent_round_int 8 | 9 | #import base 10 | from ple.games.base.pygamewrapper import PyGameWrapper 11 | 12 | class Ball(pygame.sprite.Sprite): 13 | 14 | def __init__(self, radius, speed, rng, 15 | pos_init, SCREEN_WIDTH, SCREEN_HEIGHT): 16 | 17 | pygame.sprite.Sprite.__init__(self) 18 | 19 | self.rng = rng 20 | self.radius = radius 21 | self.speed = speed 22 | self.pos = vec2d(pos_init) 23 | self.pos_before = vec2d(pos_init) 24 | self.vel = vec2d((speed, -1.0 * speed)) 25 | 26 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 27 | self.SCREEN_WIDTH = SCREEN_WIDTH 28 | 29 | image = pygame.Surface((radius * 2, radius * 2)) 30 | image.fill((0, 0, 0, 0)) 31 | image.set_colorkey((0, 0, 0)) 32 | 33 | pygame.draw.circle( 34 | image, 35 | (255, 255, 255), 36 | (radius, radius), 37 | radius, 38 | 0 39 | ) 40 | 41 | self.image = image 42 | self.rect = self.image.get_rect() 43 | self.rect.center = pos_init 44 | 45 | def line_intersection(self, p0_x, p0_y, p1_x, p1_y, p2_x, p2_y, p3_x, p3_y): 46 | 47 | s1_x = p1_x - p0_x 48 | s1_y = p1_y - p0_y 49 | s2_x = p3_x - p2_x 50 | s2_y = p3_y - p2_y 51 | 52 | s = (-s1_y * (p0_x - p2_x) + s1_x * (p0_y - p2_y)) / (-s2_x * s1_y + s1_x * s2_y) 53 | t = (s2_x * (p0_y - p2_y) - s2_y * (p0_x - p2_x)) / (-s2_x * s1_y + s1_x * s2_y) 54 | 55 | return (s >= 0 and s <= 1 and t >= 0 and t <= 1) 56 | 57 | def update(self, agentPlayer, cpuPlayer, dt): 58 | 59 | self.pos.x += self.vel.x * dt 60 | self.pos.y += self.vel.y * dt 61 | 62 | is_pad_hit = False 63 | 64 | if self.pos.x <= agentPlayer.pos.x + agentPlayer.rect_width: 65 | if self.line_intersection(self.pos_before.x, self.pos_before.y, self.pos.x, self.pos.y, agentPlayer.pos.x + agentPlayer.rect_width / 2, agentPlayer.pos.y - agentPlayer.rect_height / 2, agentPlayer.pos.x + agentPlayer.rect_width / 2, agentPlayer.pos.y + agentPlayer.rect_height / 2): 66 | self.pos.x = max(0, self.pos.x) 67 | self.vel.x = -1 * (self.vel.x + self.speed * 0.05) 68 | self.vel.y += agentPlayer.vel.y * 2.0 69 | self.pos.x += self.radius 70 | is_pad_hit = True 71 | 72 | if self.pos.x >= cpuPlayer.pos.x - cpuPlayer.rect_width: 73 | if self.line_intersection(self.pos_before.x, self.pos_before.y, self.pos.x, self.pos.y, cpuPlayer.pos.x - cpuPlayer.rect_width / 2, cpuPlayer.pos.y - cpuPlayer.rect_height / 2, cpuPlayer.pos.x - cpuPlayer.rect_width / 2, cpuPlayer.pos.y + cpuPlayer.rect_height / 2): 74 | self.pos.x = min(self.SCREEN_WIDTH, self.pos.x) 75 | self.vel.x = -1 * (self.vel.x + self.speed * 0.05) 76 | self.vel.y += cpuPlayer.vel.y * 0.006 77 | self.pos.x -= self.radius 78 | is_pad_hit = True 79 | 80 | # Little randomness in order not to stuck in a static loop 81 | if is_pad_hit: 82 | self.vel.y += self.rng.random_sample() * 0.001 - 0.0005 83 | 84 | if self.pos.y - self.radius <= 0: 85 | self.vel.y *= -0.99 86 | self.pos.y += 1.0 87 | 88 | if self.pos.y + self.radius >= self.SCREEN_HEIGHT: 89 | self.vel.y *= -0.99 90 | self.pos.y -= 1.0 91 | 92 | self.pos_before.x = self.pos.x 93 | self.pos_before.y = self.pos.y 94 | 95 | self.rect.center = (self.pos.x, self.pos.y) 96 | 97 | 98 | class Player(pygame.sprite.Sprite): 99 | 100 | def __init__(self, speed, rect_width, rect_height, 101 | pos_init, SCREEN_WIDTH, SCREEN_HEIGHT): 102 | 103 | pygame.sprite.Sprite.__init__(self) 104 | 105 | self.speed = speed 106 | self.pos = vec2d(pos_init) 107 | self.vel = vec2d((0, 0)) 108 | 109 | self.rect_height = rect_height 110 | self.rect_width = rect_width 111 | self.SCREEN_HEIGHT = SCREEN_HEIGHT 112 | self.SCREEN_WIDTH = SCREEN_WIDTH 113 | 114 | image = pygame.Surface((rect_width, rect_height)) 115 | image.fill((0, 0, 0, 0)) 116 | image.set_colorkey((0, 0, 0)) 117 | 118 | pygame.draw.rect( 119 | image, 120 | (255, 255, 255), 121 | (0, 0, rect_width, rect_height), 122 | 0 123 | ) 124 | 125 | self.image = image 126 | self.rect = self.image.get_rect() 127 | self.rect.center = pos_init 128 | 129 | def update(self, dy, dt): 130 | self.vel.y += dy * dt 131 | self.vel.y *= 0.9 132 | 133 | self.pos.y += self.vel.y 134 | 135 | if self.pos.y - self.rect_height / 2 <= 0: 136 | self.pos.y = self.rect_height / 2 137 | self.vel.y = 0.0 138 | 139 | if self.pos.y + self.rect_height / 2 >= self.SCREEN_HEIGHT: 140 | self.pos.y = self.SCREEN_HEIGHT - self.rect_height / 2 141 | self.vel.y = 0.0 142 | 143 | self.rect.center = (self.pos.x, self.pos.y) 144 | 145 | def updateCpu(self, ball, dt): 146 | dy = 0.0 147 | if ball.vel.x >= 0 and ball.pos.x >= self.SCREEN_WIDTH / 2: 148 | dy = self.speed 149 | if self.pos.y > ball.pos.y: 150 | dy = -1.0 * dy 151 | else: 152 | dy = 1.0 * self.speed / 4.0 153 | 154 | if self.pos.y > self.SCREEN_HEIGHT / 2.0: 155 | dy = -1.0 * self.speed / 4.0 156 | 157 | if self.pos.y - self.rect_height / 2 <= 0: 158 | self.pos.y = self.rect_height / 2 159 | self.vel.y = 0.0 160 | 161 | if self.pos.y + self.rect_height / 2 >= self.SCREEN_HEIGHT: 162 | self.pos.y = self.SCREEN_HEIGHT - self.rect_height / 2 163 | self.vel.y = 0.0 164 | 165 | self.pos.y += dy * dt 166 | self.rect.center = (self.pos.x, self.pos.y) 167 | 168 | 169 | class Pong(PyGameWrapper): 170 | """ 171 | Loosely based on code from marti1125's `pong game`_. 172 | 173 | .. _pong game: https://github.com/marti1125/pong/ 174 | 175 | Parameters 176 | ---------- 177 | width : int 178 | Screen width. 179 | 180 | height : int 181 | Screen height, recommended to be same dimension as width. 182 | 183 | MAX_SCORE : int (default: 11) 184 | The max number of points the agent or cpu need to score to cause a terminal state. 185 | 186 | cpu_speed_ratio: float (default: 0.5) 187 | Speed of opponent (useful for curriculum learning) 188 | 189 | players_speed_ratio: float (default: 0.25) 190 | Speed of player (useful for curriculum learning) 191 | 192 | ball_speed_ratio: float (default: 0.75) 193 | Speed of ball (useful for curriculum learning) 194 | 195 | """ 196 | 197 | def __init__(self, width=64, height=48, cpu_speed_ratio=0.6, players_speed_ratio = 0.4, ball_speed_ratio=0.75, MAX_SCORE=11): 198 | 199 | actions = { 200 | "up": K_w, 201 | "down": K_s 202 | } 203 | 204 | PyGameWrapper.__init__(self, width, height, actions=actions) 205 | 206 | # the %'s come from original values, wanted to keep same ratio when you 207 | # increase the resolution. 208 | self.ball_radius = percent_round_int(height, 0.03) 209 | 210 | self.cpu_speed_ratio = cpu_speed_ratio 211 | self.ball_speed_ratio = ball_speed_ratio 212 | self.players_speed_ratio = players_speed_ratio 213 | 214 | self.paddle_width = percent_round_int(width, 0.023) 215 | self.paddle_height = percent_round_int(height, 0.15) 216 | self.paddle_dist_to_wall = percent_round_int(width, 0.0625) 217 | self.MAX_SCORE = MAX_SCORE 218 | 219 | self.dy = 0.0 220 | self.score_sum = 0.0 # need to deal with 11 on either side winning 221 | self.score_counts = { 222 | "agent": 0.0, 223 | "cpu": 0.0 224 | } 225 | 226 | def _handle_player_events(self): 227 | self.dy = 0 228 | 229 | if __name__ == "__main__": 230 | # for debugging mode 231 | pygame.event.get() 232 | keys = pygame.key.get_pressed() 233 | if keys[self.actions['up']]: 234 | self.dy = -self.agentPlayer.speed 235 | elif keys[self.actions['down']]: 236 | self.dy = self.agentPlayer.speed 237 | 238 | if keys[pygame.QUIT]: 239 | pygame.quit() 240 | sys.exit() 241 | pygame.event.pump() 242 | else: 243 | # consume events from act 244 | for event in pygame.event.get(): 245 | if event.type == pygame.QUIT: 246 | pygame.quit() 247 | sys.exit() 248 | 249 | if event.type == pygame.KEYDOWN: 250 | key = event.key 251 | if key == self.actions['up']: 252 | self.dy = -self.agentPlayer.speed 253 | 254 | if key == self.actions['down']: 255 | self.dy = self.agentPlayer.speed 256 | 257 | 258 | 259 | def getGameState(self): 260 | """ 261 | Gets a non-visual state representation of the game. 262 | 263 | Returns 264 | ------- 265 | 266 | dict 267 | * player y position. 268 | * players velocity. 269 | * cpu y position. 270 | * ball x position. 271 | * ball y position. 272 | * ball x velocity. 273 | * ball y velocity. 274 | 275 | See code for structure. 276 | 277 | """ 278 | state = { 279 | "player_y": self.agentPlayer.pos.y, 280 | "player_velocity": self.agentPlayer.vel.y, 281 | "cpu_y": self.cpuPlayer.pos.y, 282 | "ball_x": self.ball.pos.x, 283 | "ball_y": self.ball.pos.y, 284 | "ball_velocity_x": self.ball.vel.x, 285 | "ball_velocity_y": self.ball.vel.y 286 | } 287 | 288 | return state 289 | 290 | def getScore(self): 291 | return self.score_sum 292 | 293 | def game_over(self): 294 | # pong used 11 as max score 295 | return (self.score_counts['agent'] == self.MAX_SCORE) or ( 296 | self.score_counts['cpu'] == self.MAX_SCORE) 297 | 298 | def init(self): 299 | self.score_counts = { 300 | "agent": 0.0, 301 | "cpu": 0.0 302 | } 303 | 304 | self.score_sum = 0.0 305 | self.ball = Ball( 306 | self.ball_radius, 307 | self.ball_speed_ratio * self.height, 308 | self.rng, 309 | (self.width / 2, self.height / 2), 310 | self.width, 311 | self.height 312 | ) 313 | 314 | self.agentPlayer = Player( 315 | self.players_speed_ratio * self.height, 316 | self.paddle_width, 317 | self.paddle_height, 318 | (self.paddle_dist_to_wall, self.height / 2), 319 | self.width, 320 | self.height) 321 | 322 | self.cpuPlayer = Player( 323 | self.cpu_speed_ratio * self.height, 324 | self.paddle_width, 325 | self.paddle_height, 326 | (self.width - self.paddle_dist_to_wall, self.height / 2), 327 | self.width, 328 | self.height) 329 | 330 | self.players_group = pygame.sprite.Group() 331 | self.players_group.add(self.agentPlayer) 332 | self.players_group.add(self.cpuPlayer) 333 | 334 | self.ball_group = pygame.sprite.Group() 335 | self.ball_group.add(self.ball) 336 | 337 | 338 | def reset(self): 339 | self.init() 340 | # after game over set random direction of ball otherwise it will always be the same 341 | self._reset_ball(1 if self.rng.random_sample() > 0.5 else -1) 342 | 343 | 344 | def _reset_ball(self, direction): 345 | self.ball.pos.x = self.width / 2 # move it to the center 346 | 347 | # we go in the same direction that they lost in but at starting vel. 348 | self.ball.vel.x = self.ball.speed * direction 349 | self.ball.vel.y = (self.rng.random_sample() * 350 | self.ball.speed) - self.ball.speed * 0.5 351 | 352 | def step(self, dt): 353 | dt /= 1000.0 354 | self.screen.fill((0, 0, 0)) 355 | 356 | self.agentPlayer.speed = self.players_speed_ratio * self.height 357 | self.cpuPlayer.speed = self.cpu_speed_ratio * self.height 358 | self.ball.speed = self.ball_speed_ratio * self.height 359 | 360 | self._handle_player_events() 361 | 362 | # doesnt make sense to have this, but include if needed. 363 | self.score_sum += self.rewards["tick"] 364 | 365 | self.ball.update(self.agentPlayer, self.cpuPlayer, dt) 366 | 367 | is_terminal_state = False 368 | 369 | # logic 370 | if self.ball.pos.x <= 0: 371 | self.score_sum += self.rewards["negative"] 372 | self.score_counts["cpu"] += 1.0 373 | self._reset_ball(-1) 374 | is_terminal_state = True 375 | 376 | if self.ball.pos.x >= self.width: 377 | self.score_sum += self.rewards["positive"] 378 | self.score_counts["agent"] += 1.0 379 | self._reset_ball(1) 380 | is_terminal_state = True 381 | 382 | if is_terminal_state: 383 | # winning 384 | if self.score_counts['agent'] == self.MAX_SCORE: 385 | self.score_sum += self.rewards["win"] 386 | 387 | # losing 388 | if self.score_counts['cpu'] == self.MAX_SCORE: 389 | self.score_sum += self.rewards["loss"] 390 | else: 391 | self.agentPlayer.update(self.dy, dt) 392 | self.cpuPlayer.updateCpu(self.ball, dt) 393 | 394 | self.players_group.draw(self.screen) 395 | self.ball_group.draw(self.screen) 396 | 397 | if __name__ == "__main__": 398 | import numpy as np 399 | 400 | pygame.init() 401 | game = Pong(width=256, height=200) 402 | game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) 403 | game.clock = pygame.time.Clock() 404 | game.rng = np.random.RandomState(24) 405 | game.init() 406 | 407 | while True: 408 | dt = game.clock.tick_busy_loop(60) 409 | game.step(dt) 410 | pygame.display.update() 411 | --------------------------------------------------------------------------------