├── .gitignore ├── LICENSE ├── README.md ├── feedback └── README.md ├── helpers └── logging_colorer.py ├── papers └── recent_deepmind_papers │ ├── 1-s2.0-S2352154615001151-main.pdf │ ├── 1507.04296.pdf │ ├── 1509.02971v2.pdf │ ├── 1509.06461v3.pdf │ ├── 1509.08731v1.pdf │ ├── 1510.09142v1.pdf │ ├── 1511.06581v3.pdf │ ├── 1512.04860v1.pdf │ ├── 1602.01783v2.pdf │ ├── 1603.00748v1.pdf │ ├── 1605.06676v2.pdf │ ├── 1606.02647v2.pdf │ ├── 1606.05312.pdf │ ├── 1610.00633.pdf │ ├── 4031-monte-carlo-planning-in-large-pomdps.pdf │ └── DQNNaturePaper.pdf ├── pics ├── atari.jpg ├── blindspot.png ├── jan.zikes.png ├── michal.sustr.png └── trophy.jpg ├── preparation ├── README.md ├── ale_example.py ├── gym_example.py ├── space_invaders.bin └── tf_example.py ├── requirements.txt ├── showcase-studies ├── atari-roms │ ├── breakout.bin │ ├── pong.bin │ ├── seaquest.bin │ └── space_invaders.bin ├── dqn-gym │ ├── .directory │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── atari_emulator.py │ ├── dqn_agent.py │ ├── environment.py │ ├── experience_memory.py │ ├── experiment.py │ ├── logging_colorer.py │ ├── parallel_dqn_agent.py │ ├── parallel_q_network.py │ ├── q_network.py │ ├── record_stats.py │ ├── run_dqn.py │ ├── states_examples │ │ ├── state_0_0.png │ │ ├── state_0_1.png │ │ ├── state_0_2.png │ │ ├── state_0_3.png │ │ ├── state_12_0.png │ │ ├── state_12_1.png │ │ ├── state_12_2.png │ │ ├── state_12_3.png │ │ ├── state_19_0.png │ │ ├── state_19_1.png │ │ ├── state_19_2.png │ │ ├── state_19_3.png │ │ ├── state_6_0.png │ │ ├── state_6_1.png │ │ ├── state_6_2.png │ │ └── state_6_3.png │ ├── training_results │ │ └── saved_models │ │ │ └── breakout │ │ │ └── dqn │ │ │ └── brick_hunter_example │ │ │ ├── brick_hunter_qsub.ckpt-24 │ │ │ ├── brick_hunter_qsub.ckpt-24.meta │ │ │ └── checkpoint │ └── visuals.py ├── dqn-simple │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── atari_environment.py │ ├── dqn.py │ ├── encode_ffmpeg │ ├── logging_colorer.py │ ├── logstats.sh │ ├── play_atari.py │ ├── png.py │ ├── replay.py │ ├── state.py │ └── tf_summaries.py ├── go-NN │ ├── .gitignore │ ├── README.md │ └── engine │ │ ├── Board.py │ │ ├── Book.py │ │ ├── CGOSEngine.py │ │ ├── Checkpoint.py │ │ ├── Engine.py │ │ ├── Eval.py │ │ ├── EvalEngine.py │ │ ├── EvalModels.py │ │ ├── EvalStats.py │ │ ├── EvalTraining.py │ │ ├── Features.py │ │ ├── GTP.py │ │ ├── Game.py │ │ ├── HelperEngine.py │ │ ├── InfluenceEngine.py │ │ ├── InfluenceModels.py │ │ ├── InfluenceTraining.py │ │ ├── KGSEngine.py │ │ ├── Layers.py │ │ ├── MakeEvalData.py │ │ ├── MakeInfluenceData.py │ │ ├── MakeMoveData.py │ │ ├── MirrorEngine.py │ │ ├── MoveModels.py │ │ ├── MoveTraining.py │ │ ├── NPZ.py │ │ ├── Normalization.py │ │ ├── OnlineExampleMaker.py │ │ ├── Policy.py │ │ ├── SGFReader.py │ │ ├── SelfPlay.py │ │ ├── Symmetry.py │ │ ├── TFEngine.py │ │ ├── Train.py │ │ ├── TreeSearch.py │ │ └── games_with_illegal_moves_sorted.txt ├── samuel_checkers │ ├── README.md │ ├── agent.py │ ├── arthur.py │ ├── checkers.py │ ├── game.py │ ├── logfile │ ├── random_agent.py │ └── test.py └── td-gammon │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── backgammon │ ├── README.md │ ├── __init__.py │ ├── agents │ │ ├── __init__.py │ │ ├── human_agent.py │ │ ├── random_agent.py │ │ └── td_gammon_agent.py │ └── game.py │ ├── checkpoints │ ├── checkpoint │ ├── checkpoint-178 │ ├── checkpoint-178.meta │ ├── checkpoint-447880 │ ├── checkpoint-447880.meta │ ├── checkpoint-7894 │ └── checkpoint-7894.meta │ ├── main.py │ └── model.py ├── slides ├── CNNs.pdf └── tensorflow_intro.pdf ├── tf_examples ├── convnet_mnist.py ├── linear_regression.py └── mlp_mnist.py └── theory ├── MC └── MCTS_(English).svg.png ├── alfa-beta ├── Selection_087.png └── tree │ ├── img001.gif │ ├── img002.gif │ ├── img003.gif │ ├── img004.gif │ ├── img005.gif │ ├── img006.gif │ ├── img007.gif │ ├── img008.gif │ ├── img009.gif │ ├── img010.gif │ ├── img011.gif │ ├── img012.gif │ ├── img013.gif │ ├── img014.gif │ ├── img015.gif │ ├── img016.gif │ ├── img017.gif │ └── img018.gif └── mini-max └── Selection_086.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | .idea/ 94 | *~ 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /feedback/README.md: -------------------------------------------------------------------------------- 1 | Please send your feedbacks via pull requests here in a text file. Thanks! 2 | -------------------------------------------------------------------------------- /helpers/logging_colorer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Console coloring of logs 3 | """ 4 | 5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output 6 | import logging 7 | 8 | # now we patch Python code to add color support to logging.StreamHandler 9 | import sys 10 | 11 | 12 | def add_coloring_to_emit_windows(fn): 13 | # add methods we need to the class 14 | def _out_handle(self): 15 | import ctypes 16 | return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 17 | out_handle = property(_out_handle) 18 | 19 | def _set_color(self, code): 20 | import ctypes 21 | # Constants from the Windows API 22 | self.STD_OUTPUT_HANDLE = -11 23 | hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 24 | ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code) 25 | 26 | setattr(logging.StreamHandler, '_set_color', _set_color) 27 | 28 | def new(*args): 29 | FOREGROUND_BLUE = 0x0001 # text color contains blue. 30 | FOREGROUND_GREEN = 0x0002 # text color contains green. 31 | FOREGROUND_RED = 0x0004 # text color contains red. 32 | FOREGROUND_INTENSITY = 0x0008 # text color is intensified. 33 | FOREGROUND_WHITE = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED 34 | # winbase.h 35 | STD_INPUT_HANDLE = -10 36 | STD_OUTPUT_HANDLE = -11 37 | STD_ERROR_HANDLE = -12 38 | 39 | # wincon.h 40 | FOREGROUND_BLACK = 0x0000 41 | FOREGROUND_BLUE = 0x0001 42 | FOREGROUND_GREEN = 0x0002 43 | FOREGROUND_CYAN = 0x0003 44 | FOREGROUND_RED = 0x0004 45 | FOREGROUND_MAGENTA = 0x0005 46 | FOREGROUND_YELLOW = 0x0006 47 | FOREGROUND_GREY = 0x0007 48 | FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. 49 | 50 | BACKGROUND_BLACK = 0x0000 51 | BACKGROUND_BLUE = 0x0010 52 | BACKGROUND_GREEN = 0x0020 53 | BACKGROUND_CYAN = 0x0030 54 | BACKGROUND_RED = 0x0040 55 | BACKGROUND_MAGENTA = 0x0050 56 | BACKGROUND_YELLOW = 0x0060 57 | BACKGROUND_GREY = 0x0070 58 | BACKGROUND_INTENSITY = 0x0080 # background color is intensified. 59 | 60 | levelno = args[1].levelno 61 | if(levelno>=50): 62 | color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY 63 | elif(levelno>=40): 64 | color = FOREGROUND_RED | FOREGROUND_INTENSITY 65 | elif(levelno>=30): 66 | color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY 67 | elif(levelno>=20): 68 | color = FOREGROUND_GREEN 69 | elif(levelno>=10): 70 | color = FOREGROUND_MAGENTA 71 | else: 72 | color = FOREGROUND_WHITE 73 | args[0]._set_color(color) 74 | 75 | ret = fn(*args) 76 | args[0]._set_color( FOREGROUND_WHITE ) 77 | #print "after" 78 | return ret 79 | return new 80 | 81 | def add_coloring_to_emit_ansi(fn): 82 | # add methods we need to the class 83 | def new(*args): 84 | levelno = args[1].levelno 85 | if(levelno>=50): 86 | color = '\x1b[31m' # red 87 | elif(levelno>=40): 88 | color = '\x1b[31m' # red 89 | elif(levelno>=30): 90 | color = '\x1b[33m' # yellow 91 | elif(levelno>=20): 92 | color = '\x1b[32m' # green 93 | elif(levelno>=10): 94 | color = '\x1b[35m' # pink 95 | else: 96 | color = '\x1b[0m' # normal 97 | args[1].msg = color + args[1].msg[:200] + '\x1b[0m' # normal 98 | #print "after" 99 | return fn(*args) 100 | return new 101 | 102 | import platform 103 | if platform.system()=='Windows': 104 | # Windows does not support ANSI escapes and we are using API calls to set the console color 105 | logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit) 106 | else: 107 | # all non-Windows platforms are supporting ANSI escapes so we use them 108 | logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit) 109 | #log = logging.getLogger() 110 | #log.addFilter(log_filter()) 111 | #//hdlr = logging.StreamHandler() 112 | #//hdlr.setFormatter(formatter()) 113 | 114 | def init_logging(verbose=True, log_file='app.log'): 115 | """ 116 | Initialize settings for Python logger 117 | 118 | This allows for logging into console as well as specified log_file. 119 | After you can use in the code just:: 120 | 121 | import logging 122 | logging.info("hello world!") 123 | # will produce 124 | # 2016-07-19 16:13:02,931 [MainThread ][INFO ]: hello world! 125 | 126 | :param bool verbose: 127 | :param str log_file: 128 | """ 129 | 130 | # let's log the same output to console and to file 131 | log_file = log_file 132 | format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]: %(message)s" 133 | 134 | root_logger = logging.getLogger() 135 | logging.basicConfig( 136 | format=format, 137 | level=logging.DEBUG if verbose else logging.WARNING, 138 | stream=sys.stderr) 139 | 140 | log_formatter = logging.Formatter(format) 141 | file_handler = logging.FileHandler(log_file) 142 | file_handler.setFormatter(log_formatter) 143 | root_logger.addHandler(file_handler) 144 | -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1-s2.0-S2352154615001151-main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1-s2.0-S2352154615001151-main.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1507.04296.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1507.04296.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1509.02971v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.02971v2.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1509.06461v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.06461v3.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1509.08731v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.08731v1.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1510.09142v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1510.09142v1.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1511.06581v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1511.06581v3.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1512.04860v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1512.04860v1.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1602.01783v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1602.01783v2.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1603.00748v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1603.00748v1.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1605.06676v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1605.06676v2.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1606.02647v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1606.02647v2.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1606.05312.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1606.05312.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/1610.00633.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1610.00633.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/4031-monte-carlo-planning-in-large-pomdps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/4031-monte-carlo-planning-in-large-pomdps.pdf -------------------------------------------------------------------------------- /papers/recent_deepmind_papers/DQNNaturePaper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/DQNNaturePaper.pdf -------------------------------------------------------------------------------- /pics/atari.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/atari.jpg -------------------------------------------------------------------------------- /pics/blindspot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/blindspot.png -------------------------------------------------------------------------------- /pics/jan.zikes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/jan.zikes.png -------------------------------------------------------------------------------- /pics/michal.sustr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/michal.sustr.png -------------------------------------------------------------------------------- /pics/trophy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/trophy.jpg -------------------------------------------------------------------------------- /preparation/README.md: -------------------------------------------------------------------------------- 1 | # Preparation for workshop 2 | 3 | On your personal laptop, please make sure to: 4 | 5 | - create an isolated environment (you can use virtualenv or conda, however you like): 6 | 7 | $ pip install virtualenv 8 | # put all the files in a location you like, we'll stick with ~/tf_env for simplicity 9 | $ virtualenv -p /usr/bin/python3.5 ~/tf_env 10 | $ source ~/tf_env/bin/activate 11 | 12 | - install python 3.5 into virtualenv with following packages 13 | 14 | # Put these into text file requirements.txt and execute in your virtualenv 15 | $ pip install -r requirements.txt 16 | # (there might be some extra packages, it is a list I have on my laptop) 17 | 18 | - [install Tensorflow in virtualenv](https://www.tensorflow.org/versions/r0.11/get_started/os_setup.html#virtualenv-installation) 19 | - install ALE - [atari learning environment](http://www.arcadelearningenvironment.org/) 20 | - Install necessary dependencies: 21 | 22 | sudo apt-get install libsdl-gfx1.2-dev libsdl-image1.2-dev libsdl1.2-dev cmake 23 | 24 | - Clone and build ALE: 25 | 26 | git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git 27 | cd Arcade-Learning-Environment 28 | cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON . 29 | make -j 4 30 | sudo make install 31 | sudo pip install . 32 | 33 | - install [OpenAI Gym](https://gym.openai.com/docs) 34 | 35 | pip install gym 36 | # note: I had to update gym source files, to swap if/elif conditions 37 | # for ffmpeg/avconv. I issued PR to the gym but I'm not sure if they 38 | # will update it. If this fails for you as well, you can update the code 39 | # or clone my repo at https://github.com/michalsustr/gym 40 | # Diff: 41 | # https://github.com/openai/gym/compare/master...michalsustr:master#diff-54b89e317dc6e7d9dfd407344cafd1bf 42 | pip install gym[atari] 43 | 44 | - optionally: [Set up TensorFlow on AWS GPU](https://github.com/gtoubassi/dqn-atari/wiki/Setting-up-TensorFlow-on-AWS-GPU) 45 | 46 | # Test setup 47 | - You can test your ALE setup by launching script 48 | 49 | $ python ale_example.py ./space_invaders.bin 50 | 51 | - Test tensorflow (can take a while to run for the first time) 52 | 53 | $ python tf_example.py 54 | 55 | - Test gym - get your API key in the [https://gym.openai.com/](gym) (by signing in with github account) 56 | and update the `gym_example.py` file. 57 | 58 | $ python gym_example.py 59 | 60 | You should get a reference link to your evalution board 61 | 62 | 2016-10-02 21:18:26,920 [MainThread ][INFO ]: 63 | **************************************************** 64 | You successfully uploaded your evaluation on CartPole-v0 to 65 | OpenAI Gym! You can find it at: 66 | 67 | https://gym.openai.com/evaluations/eval_8ZzrWOlRICX3ynLBTQ8A 68 | 69 | **************************************************** 70 | 71 | Please send this link to my e-mail address `michal.sustr at you know gmail.com` with title 72 | `[RL_workshop] gym link` so that we know how many people actually read this and prepared themselves :-) 73 | -------------------------------------------------------------------------------- /preparation/ale_example.py: -------------------------------------------------------------------------------- 1 | # python_example.py 2 | # Author: Ben Goodrich 3 | # 4 | # This is a direct port to python of the shared library example from 5 | # ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp 6 | import sys 7 | from random import randrange 8 | from ale_python_interface import ALEInterface 9 | 10 | if len(sys.argv) < 2: 11 | print('Usage: %s rom_file' % sys.argv[0]) 12 | sys.exit() 13 | 14 | ale = ALEInterface() 15 | 16 | # Get & Set the desired settings 17 | ale.setInt(b'random_seed', 123) 18 | 19 | # Set USE_SDL to true to display the screen. ALE must be compilied 20 | # with SDL enabled for this to work. On OSX, pygame init is used to 21 | # proxy-call SDL_main. 22 | USE_SDL = False 23 | if USE_SDL: 24 | if sys.platform == 'darwin': 25 | import pygame 26 | pygame.init() 27 | ale.setBool(b'sound', False) # Sound doesn't work on OSX 28 | elif sys.platform.startswith('linux'): 29 | ale.setBool(b'sound', True) 30 | ale.setBool(b'display_screen', True) 31 | 32 | # Load the ROM file 33 | rom_file = str.encode(sys.argv[1]) 34 | ale.loadROM(rom_file) 35 | 36 | # Get the list of legal actions 37 | legal_actions = ale.getLegalActionSet() 38 | 39 | # Play 10 episodes 40 | for episode in range(10): 41 | total_reward = 0 42 | while not ale.game_over(): 43 | a = legal_actions[randrange(len(legal_actions))] 44 | # Apply an action and get the resulting reward 45 | reward = ale.act(a); 46 | total_reward += reward 47 | print('Episode %d ended with score: %d' % (episode, total_reward)) 48 | ale.reset_game() 49 | -------------------------------------------------------------------------------- /preparation/gym_example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import sys 5 | 6 | from os.path import dirname, realpath 7 | sys.path.append(dirname(dirname(realpath(__file__)))) 8 | import helpers.logging_colorer as logging_colorer 9 | logging_colorer.init_logging() 10 | 11 | import gym 12 | 13 | # The world's simplest agent! 14 | class RandomAgent(object): 15 | def __init__(self, action_space): 16 | self.action_space = action_space 17 | 18 | def act(self, observation, reward, done): 19 | return self.action_space.sample() 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser(description=None) 23 | parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run') 24 | args = parser.parse_args() 25 | 26 | # Call `undo_logger_setup` if you want to undo Gym's logger setup 27 | # and configure things manually. (The default should be fine most 28 | # of the time.) 29 | gym.undo_logger_setup() 30 | logging_colorer.init_logging() 31 | 32 | env = gym.make(args.env_id) 33 | 34 | # You provide the directory to write to (can be an existing 35 | # directory, including one with existing data -- all monitor files 36 | # will be namespaced). You can also dump to a tempdir if you'd 37 | # like: tempfile.mkdtemp(). 38 | outdir = '/tmp/random-agent-results' 39 | env.monitor.start(outdir, force=True, seed=0) 40 | 41 | # This declaration must go *after* the monitor call, since the 42 | # monitor's seeding creates a new action_space instance with the 43 | # appropriate pseudorandom number generator. 44 | agent = RandomAgent(env.action_space) 45 | 46 | episode_count = 100 47 | max_steps = 200 48 | reward = 0 49 | done = False 50 | 51 | for i in range(episode_count): 52 | ob = env.reset() 53 | 54 | for j in range(max_steps): 55 | action = agent.act(ob, reward, done) 56 | ob, reward, done, _ = env.step(action) 57 | if done: 58 | break 59 | # Note there's no env.render() here. But the environment still can open window and 60 | # render if asked by env.monitor: it calls env.render('rgb_array') to record video. 61 | # Video is not recorded every episode, see capped_cubic_video_schedule for details. 62 | 63 | # Dump result info to disk 64 | env.monitor.close() 65 | 66 | # Upload to the scoreboard. We could also do this from another 67 | # process if we wanted. 68 | logging.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.") 69 | gym.upload(outdir, api_key='YOUR_API_KEY') 70 | -------------------------------------------------------------------------------- /preparation/space_invaders.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/preparation/space_invaders.bin -------------------------------------------------------------------------------- /preparation/tf_example.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HelloWorld example using TensorFlow library. 3 | Author: Aymeric Damien 4 | Project: https://github.com/aymericdamien/TensorFlow-Examples/ 5 | ''' 6 | 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | 11 | # Simple hello world using TensorFlow 12 | 13 | # Create a Constant op 14 | # The op is added as a node to the default graph. 15 | # 16 | # The value returned by the constructor represents the output 17 | # of the Constant op. 18 | hello = tf.constant('Hello, TensorFlow!') 19 | 20 | # Start tf session 21 | sess = tf.Session() 22 | 23 | # Run the op 24 | print(sess.run(hello)) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # manually installed later: 2 | #ale-python-interface==0.0.1 3 | #tensorflow==0.9.0 4 | alabaster 5 | arch 6 | argcomplete 7 | Babel 8 | backports.shutil-get-terminal-size 9 | blosc 10 | CommonMark 11 | cycler==0.10.0 12 | dask==0.11.0 13 | decorator==4.0.10 14 | descartes 15 | docutils==0.12 16 | ete3==3.0.0b35 17 | filterpy==0.1.3 18 | humanize==0.5.1 19 | imagesize==0.7.1 20 | ipykernel 21 | ipympl 22 | ipython 23 | ipython-genutils 24 | ipywidgets 25 | Jinja2==2.8 26 | jsonschema==2.5.1 27 | jupyter 28 | jupyter-client 29 | jupyter-console 30 | jupyter-contrib-core 31 | jupyter-core 32 | jupyter-nbextensions-configurator 33 | Markdown 34 | MarkupSafe==0.23 35 | matplotlib 36 | mistune 37 | mpld3==0.2 38 | mpmath==0.19 39 | nbconvert 40 | nbformat 41 | networkx==1.11 42 | notebook 43 | numpy 44 | pandas 45 | patsy 46 | pexpect==4.0.1 47 | pickleshare 48 | Pillow==3.3.1 49 | prompt-toolkit 50 | protobuf 51 | ptyprocess==0.5.1 52 | Pygments 53 | pyparsing==2.1.4 54 | python-dateutil 55 | pytz 56 | PyYAML 57 | pyzmq 58 | qtconsole 59 | recommonmark 60 | requests 61 | scikit-image 62 | scikit-learn 63 | scipy 64 | seaborn==0.7.1 65 | simplegeneric 66 | six==1.10.0 67 | snowballstemmer==1.2.1 68 | Sphinx 69 | sphinx-rtd-theme==0.1.9 70 | statsmodels 71 | sympy==1.0 72 | terminado==0.6 73 | toolz==0.8.0 74 | tornado==4.3 75 | tqdm==4.7.6 76 | traitlets 77 | wcwidth 78 | widgetsnbextension==1.2.6 79 | -------------------------------------------------------------------------------- /showcase-studies/atari-roms/breakout.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/breakout.bin -------------------------------------------------------------------------------- /showcase-studies/atari-roms/pong.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/pong.bin -------------------------------------------------------------------------------- /showcase-studies/atari-roms/seaquest.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/seaquest.bin -------------------------------------------------------------------------------- /showcase-studies/atari-roms/space_invaders.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/space_invaders.bin -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/.directory: -------------------------------------------------------------------------------- 1 | [Dolphin] 2 | PreviewsShown=true 3 | Timestamp=2016,10,2,13,57,49 4 | Version=3 5 | ViewMode=1 6 | 7 | [Settings] 8 | HiddenFilesShown=true 9 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/.gitignore: -------------------------------------------------------------------------------- 1 | training_results -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/Makefile: -------------------------------------------------------------------------------- 1 | train: 2 | # use default params 3 | python3 ./run_dqn.py breakout dqn brick_hunter 4 | 5 | quick_train: 6 | python ./run_dqn.py breakout dqn quick_train --memory_capacity 100000 --training_length=1000 --random_exploration_length=400 --test_games=3 --test_frequency=250 7 | quick_watch: 8 | python ./run_dqn.py breakout dqn quick_train --watch --test_games=3 -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/README.md: -------------------------------------------------------------------------------- 1 | Originally from https://github.com/Jabberwockyll/deep_rl_ale 2 | 3 | # deep_rl_ale 4 | This repo contains an implementation of [this paper](http://home.uchicago.edu/~arij/journalclub/papers/2015_Mnih_et_al.pdf) in TensorFlow. It also contains the option to use the [double dqn](http://arxiv.org/pdf/1509.06461v3.pdf) loss function, as well as a parallel version that acts and learns simultaneously to speed up training. 5 | 6 | [Watch it play Pong, Breakout, Space Invaders, and Seaquest here](https://youtu.be/gQ9FsAGb148) 7 | 8 | The code is still a little messy in some places, and will be cleaned up in the future, but there will probably not be any significant updates or changes until mid-May. 9 | 10 | ## Dependencies/Requirements 11 | 12 | 1. An nVidia GPU with GDDR5 memory to train in a reasonable amount of time 13 | 2. [Python 3](https://www.python.org/) 14 | 3. [The Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) for the emulator framework. 15 | 4. [Tensorflow](https://www.tensorflow.org/) for gpu numerical computions and symbolic differentiation. 16 | 5. Linux/OSX, because Tensorflow doesn't support Windows. 17 | 6. [Matplotlib](http://matplotlib.org/) and [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/) for visualizations. 18 | 7. [OpenCV](http://opencv.org/) for image scaling. Might switch to SciPy since OpenCV was a pain for me to install. 19 | 8. Any dependencies of the above software, of course, like NumPy. 20 | 21 | ## How to run 22 | 23 | From the top directory of the repo (dir with python files): 24 | ### Training 25 | `$ python3 ./run_dqn.py ` 26 | 27 | For example: 28 | 29 | `$ python3 ./run_dqn.py breakout dqn brick_hunter` 30 | 31 | ### Watching 32 | `$ python3 ./run_dqn.py --watch` 33 | Where \ is the \ used during training. If you used any non-default settings, make sure to use the same ones when watching as well. 34 | 35 | ## Running Notes 36 | 37 | You can change many hyperparameters/settings by entering optional arguments. 38 | To get a list of arguments: 39 | 40 | `$ python3 ./run_dqn.py --h` 41 | 42 | By default rom files are expected to be in a folder titled 'roms' in the parent directory of the repo. You can pass a diferent directory as an argument or change the default in run_dqn.py. 43 | 44 | Statistics and saved models are saved in the parent directory of the repo as well. 45 | 46 | The default settings are very similar to those used in the DeepMond Nature paper. There are only a few small differences of which I am aware. 47 | 48 | A full training run takes between 3 and 4 days on my nVidia GTX 970, depending on whether or not the parallel option is used. Parallel training speeds up training by ~30%, but I'm still testing how different things impact speed. 49 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/atari_emulator.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Class for ale instances to generate experiences and test agents. 3 | Uses DeepMind's preproessing/initialization methods 4 | ''' 5 | import logging 6 | import random 7 | import sys 8 | 9 | from scipy import ndimage 10 | import numpy as np 11 | 12 | from ale_python_interface import ALEInterface 13 | 14 | 15 | class AtariEmulator: 16 | def __init__(self, args): 17 | ''' Initialize Atari environment ''' 18 | 19 | # Parameters 20 | self.buffer_length = args.buffer_length 21 | self.screen_dims = args.screen_dims 22 | self.frame_skip = args.frame_skip 23 | self.blend_method = args.blend_method 24 | self.reward_processing = args.reward_processing 25 | self.max_start_wait = args.max_start_wait 26 | self.history_length = args.history_length 27 | self.start_frames_needed = self.buffer_length - 1 + ( 28 | (args.history_length - 1) * self.frame_skip) 29 | 30 | # Initialize ALE instance 31 | self.ale = ALEInterface() 32 | self.ale.setFloat(b'repeat_action_probability', 0.0) 33 | if args.watch: 34 | self.ale.setBool(b'sound', True) 35 | self.ale.setBool(b'display_screen', True) 36 | self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) 37 | 38 | self.buffer = np.empty((self.buffer_length, 210, 160)) 39 | self.current = 0 40 | self.action_set = self.ale.getMinimalActionSet() 41 | self.lives = self.ale.lives() 42 | 43 | self.reset() 44 | 45 | def get_possible_actions(self): 46 | ''' Return list of possible actions for game ''' 47 | return self.action_set 48 | 49 | def get_screen(self): 50 | ''' Add screen to frame buffer ''' 51 | self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) 52 | self.current = (self.current + 1) % self.buffer_length 53 | 54 | def reset(self): 55 | self.ale.reset_game() 56 | self.lives = self.ale.lives() 57 | 58 | if self.max_start_wait < 0: 59 | logging.error("ERROR: max start wait decreased beyond 0") 60 | sys.exit() 61 | elif self.max_start_wait <= self.start_frames_needed: 62 | wait = 0 63 | else: 64 | wait = random.randint(0, 65 | self.max_start_wait - self.start_frames_needed) 66 | for _ in range(wait): 67 | self.ale.act(self.action_set[0]) 68 | 69 | # Fill frame buffer 70 | self.get_screen() 71 | for _ in range(self.buffer_length - 1): 72 | self.ale.act(self.action_set[0]) 73 | self.get_screen() 74 | # get initial_states 75 | state = [(self.preprocess(), 0, 0, False)] 76 | for step in range(self.history_length - 1): 77 | state.append(self.run_step(0)) 78 | 79 | # make sure agent hasn't died yet 80 | if self.isTerminal(): 81 | logging.info( 82 | "Agent lost during start wait. Decreasing max_start_wait by 1") 83 | self.max_start_wait -= 1 84 | return self.reset() 85 | 86 | return state 87 | 88 | def run_step(self, action): 89 | ''' Apply action to game and return next screen and reward ''' 90 | 91 | raw_reward = 0 92 | for step in range(self.frame_skip): 93 | raw_reward += self.ale.act(self.action_set[action]) 94 | self.get_screen() 95 | 96 | reward = None 97 | if self.reward_processing == 'clip': 98 | reward = np.clip(raw_reward, -1, 1) 99 | else: 100 | reward = raw_reward 101 | 102 | terminal = self.isTerminal() 103 | self.lives = self.ale.lives() 104 | 105 | return (self.preprocess(), action, reward, terminal, raw_reward) 106 | 107 | def preprocess(self): 108 | ''' Preprocess frame for agent ''' 109 | 110 | img = None 111 | 112 | if self.blend_method == "max": 113 | img = np.amax(self.buffer, axis=0) 114 | 115 | # no idea where these numbers come from... 116 | img = ndimage.zoom(img, (0.4, 0.525)) 117 | return img 118 | 119 | def isTerminal(self): 120 | return (self.isGameOver() or (self.lives > self.ale.lives())) 121 | 122 | def isGameOver(self): 123 | return self.ale.game_over() 124 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/dqn_agent.py: -------------------------------------------------------------------------------- 1 | import experiment 2 | from visuals import Visuals 3 | 4 | 5 | import random 6 | import logging 7 | import numpy as np 8 | from tqdm import tqdm 9 | 10 | 11 | class DQNAgent(): 12 | def __init__(self, args, q_network, 13 | train_emulator, test_emulator, 14 | experience_memory, num_actions, 15 | train_stats, test_stats): 16 | 17 | self.network = q_network 18 | self.train_emulator = train_emulator 19 | self.test_emulator = test_emulator 20 | 21 | self.memory = experience_memory 22 | self.train_stats = train_stats 23 | self.test_stats = test_stats 24 | 25 | self.num_actions = num_actions 26 | self.history_length = args.history_length 27 | 28 | self.training_frequency = args.training_frequency 29 | self.random_exploration_length = args.random_exploration_length 30 | self.training_length = args.training_length 31 | self.initial_exploration_rate = args.initial_exploration_rate 32 | self.final_exploration_rate = args.final_exploration_rate 33 | self.final_exploration_frame = args.final_exploration_frame 34 | self.test_exploration_rate = args.test_exploration_rate 35 | self.recording_frequency = args.recording_frequency 36 | self.test_frequency = args.test_frequency 37 | 38 | self.exploration_rate = self.initial_exploration_rate 39 | self.total_steps = 0 40 | 41 | self.args = args 42 | 43 | self.test_state = [] 44 | 45 | logging.info("DQN Agent Initialized") 46 | 47 | def choose_action(self): 48 | if random.random() >= self.exploration_rate: 49 | state = self.memory.get_current_state() 50 | q_values = self.network.inference(state) 51 | self.train_stats.add_q_values(q_values) 52 | return np.argmax(q_values) 53 | else: 54 | return random.randrange(self.num_actions) 55 | 56 | def checkGameOver(self): 57 | if self.train_emulator.isGameOver(): 58 | initial_state = self.train_emulator.reset() 59 | for experience in initial_state: 60 | self.memory.add(experience[0], experience[1], experience[2], 61 | experience[3]) 62 | self.train_stats.add_game() 63 | 64 | def run_random_exploration(self): 65 | for step in tqdm(range(self.random_exploration_length)): 66 | state, action, reward, terminal, raw_reward = self.train_emulator.run_step( 67 | random.randrange(self.num_actions)) 68 | self.train_stats.add_reward(raw_reward) 69 | self.memory.add(state, action, reward, terminal) 70 | self.checkGameOver() 71 | self.total_steps += 1 72 | if (self.total_steps % self.recording_frequency == 0): 73 | self.train_stats.record(self.total_steps) 74 | 75 | def run_training(self): 76 | # show pbars only if not evaluating agent 77 | pbar = tqdm() 78 | for step in range(self.training_length): 79 | # test agent 80 | if step % self.test_frequency == 0: 81 | pbar.close() 82 | experiment.evaluate_agent(self.args, self, self.test_emulator, self.test_stats) 83 | self.save_model(step) 84 | logging.info("Training... (%d/%d) " % (step, self.training_length)) 85 | pbar = tqdm(total=min(self.test_frequency, self.training_length), unit="step") 86 | pbar.update(1) 87 | 88 | # play step 89 | state, action, reward, terminal, raw_reward = self.train_emulator.run_step( 90 | self.choose_action()) 91 | self.train_stats.add_reward(raw_reward) 92 | self.memory.add(state, action, reward, terminal) 93 | self.checkGameOver() 94 | 95 | # training 96 | if self.total_steps % self.training_frequency == 0: 97 | states, actions, rewards, next_states, terminals = self.memory.get_batch() 98 | loss = self.network.train(states, actions, rewards, next_states, 99 | terminals) 100 | self.train_stats.add_loss(loss) 101 | 102 | self.total_steps += 1 103 | 104 | if self.total_steps < self.final_exploration_frame: 105 | self.exploration_rate -= ( 106 | self.exploration_rate - self.final_exploration_rate) / ( 107 | self.final_exploration_frame - self.total_steps) 108 | 109 | if self.total_steps % self.recording_frequency == 0: 110 | self.train_stats.record(self.total_steps) 111 | self.network.record_params(self.total_steps) 112 | 113 | pbar.close() 114 | 115 | def test_step(self, observation): 116 | if len(self.test_state) < self.history_length: 117 | self.test_state.append(observation) 118 | 119 | # choose action 120 | q_values = None 121 | action = None 122 | if random.random() >= self.test_exploration_rate: 123 | state = np.expand_dims(np.transpose(self.test_state, [1, 2, 0]), 124 | axis=0) 125 | q_values = self.network.inference(state) 126 | action = np.argmax(q_values) 127 | else: 128 | action = random.randrange(self.num_actions) 129 | 130 | self.test_state.pop(0) 131 | return [action, q_values] 132 | 133 | def save_model(self, step): 134 | self.network.save_model(step) 135 | 136 | def run_experiment(self): 137 | logging.info("Running random exploration") 138 | self.run_random_exploration() 139 | 140 | self.train_emulator.reset() 141 | self.run_training() 142 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/experience_memory.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ExperienceMemory is a class for experience replay. 3 | It stores experience samples and samples minibatches for training. 4 | ''' 5 | 6 | import random 7 | 8 | import numpy as np 9 | 10 | 11 | class ExperienceMemory: 12 | def __init__(self, args, num_actions): 13 | ''' Initialize emtpy experience dataset. ''' 14 | 15 | # params 16 | self.capacity = args.memory_capacity 17 | self.history_length = args.history_length 18 | self.batch_size = args.batch_size 19 | self.num_actions = num_actions 20 | self.screen_dims = args.screen_dims 21 | 22 | # initialize dataset 23 | self.observations = np.empty( 24 | (self.capacity, self.screen_dims[0], self.screen_dims[1]), 25 | dtype=np.uint8) 26 | self.actions = np.empty(self.capacity, dtype=np.uint8) 27 | self.rewards = np.empty(self.capacity, dtype=np.integer) 28 | self.terminals = np.empty(self.capacity, dtype=np.bool) 29 | 30 | self.size = 0 31 | self.current = 0 32 | 33 | def add(self, obs, act, reward, terminal): 34 | ''' Add experience to dataset. 35 | 36 | Args: 37 | obs: single observation frame 38 | act: action taken 39 | reward: reward 40 | terminal: is this a terminal state? 41 | ''' 42 | 43 | self.observations[self.current] = obs 44 | self.actions[self.current] = act 45 | self.rewards[self.current] = reward 46 | self.terminals[self.current] = terminal 47 | 48 | self.current = (self.current + 1) % self.capacity 49 | if self.size == self.capacity - 1: 50 | self.size = self.capacity 51 | else: 52 | self.size = max(self.size, self.current) 53 | 54 | def get_state(self, indices): 55 | ''' Return the observation sequence that ends at index 56 | 57 | Args: 58 | indices: list of last observations in sequences 59 | ''' 60 | state = np.empty(( 61 | len(indices), self.screen_dims[0], self.screen_dims[1], 62 | self.history_length)) 63 | count = 0 64 | 65 | for index in indices: 66 | frame_slice = np.arange(index - self.history_length + 1, 67 | (index + 1)) 68 | state[count] = np.transpose( 69 | np.take(self.observations, frame_slice, axis=0), [1, 2, 0]) 70 | count += 1 71 | return state 72 | 73 | def get_current_state(self): 74 | ''' Return most recent observation sequence ''' 75 | 76 | return self.get_state([(self.current - 1) % self.capacity]) 77 | 78 | def get_batch(self): 79 | ''' Sample minibatch of experiences for training ''' 80 | 81 | samples = [] # indices of the end of each sample 82 | 83 | while len(samples) < self.batch_size: 84 | 85 | if self.size < self.capacity: # make this better 86 | index = random.randrange(self.history_length, self.current) 87 | else: 88 | # make sure state from index doesn't overlap with current's gap 89 | index = (self.current + random.randrange(self.history_length, 90 | self.size - 1)) % self.capacity 91 | # make sure no terminal observations are in the first state 92 | if self.terminals[(index - self.history_length):index].any(): 93 | continue 94 | else: 95 | samples.append(index) 96 | # endwhile 97 | samples = np.asarray(samples) 98 | 99 | # create batch 100 | o1 = self.get_state((samples - 1) % self.capacity) 101 | a = np.eye(self.num_actions)[ 102 | self.actions[samples]] # convert actions to one-hot matrix 103 | r = self.rewards[samples] 104 | o2 = self.get_state(samples) 105 | t = self.terminals[samples].astype(int) 106 | return [o1, a, r, o2, t] 107 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/experiment.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from tqdm import tqdm 3 | 4 | from visuals import Visuals 5 | 6 | 7 | def evaluate_agent(args, agent, test_emulator, test_stats): 8 | logging.info("Evaluating agent performace in test emulator") 9 | step = 0 10 | total_reward = 0.0 11 | reset = test_emulator.reset() 12 | agent.test_state = list(next(zip(*reset))) 13 | screen = test_emulator.preprocess() 14 | visuals = None 15 | if args.watch: 16 | visuals = Visuals(test_emulator.get_possible_actions()) 17 | 18 | # either play as many steps as possible or as many games 19 | for _ in tqdm(range(args.test_games), unit="game"): 20 | while not test_emulator.isGameOver() and step < args.test_steps: 21 | action, q_values = agent.test_step(screen) 22 | screen, action, reward, terminal, raw_reward = test_emulator.run_step(action) 23 | total_reward += raw_reward 24 | 25 | # record stats 26 | if not (test_stats is None): 27 | test_stats.add_reward(raw_reward) 28 | if not (q_values is None): 29 | test_stats.add_q_values(q_values) 30 | # endif 31 | # endif 32 | 33 | # update visuals 34 | if args.watch and (not (q_values is None)): 35 | visuals.update(q_values) 36 | 37 | step += 1 38 | # endwhile 39 | if not (test_stats is None): 40 | test_stats.add_game() 41 | reset = test_emulator.reset() 42 | agent.test_state = list(next(zip(*reset))) 43 | 44 | return total_reward / args.test_games 45 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/logging_colorer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Console coloring of logs 3 | """ 4 | 5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output 6 | import logging 7 | 8 | # now we patch Python code to add color support to logging.StreamHandler 9 | import sys 10 | 11 | 12 | def add_coloring_to_emit_windows(fn): 13 | # add methods we need to the class 14 | def _out_handle(self): 15 | import ctypes 16 | return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 17 | 18 | out_handle = property(_out_handle) 19 | 20 | def _set_color(self, code): 21 | import ctypes 22 | # Constants from the Windows API 23 | self.STD_OUTPUT_HANDLE = -11 24 | hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 25 | ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code) 26 | 27 | setattr(logging.StreamHandler, '_set_color', _set_color) 28 | 29 | def new(*args): 30 | FOREGROUND_BLUE = 0x0001 # text color contains blue. 31 | FOREGROUND_GREEN = 0x0002 # text color contains green. 32 | FOREGROUND_RED = 0x0004 # text color contains red. 33 | FOREGROUND_INTENSITY = 0x0008 # text color is intensified. 34 | FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED 35 | # winbase.h 36 | STD_INPUT_HANDLE = -10 37 | STD_OUTPUT_HANDLE = -11 38 | STD_ERROR_HANDLE = -12 39 | 40 | # wincon.h 41 | FOREGROUND_BLACK = 0x0000 42 | FOREGROUND_BLUE = 0x0001 43 | FOREGROUND_GREEN = 0x0002 44 | FOREGROUND_CYAN = 0x0003 45 | FOREGROUND_RED = 0x0004 46 | FOREGROUND_MAGENTA = 0x0005 47 | FOREGROUND_YELLOW = 0x0006 48 | FOREGROUND_GREY = 0x0007 49 | FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. 50 | 51 | BACKGROUND_BLACK = 0x0000 52 | BACKGROUND_BLUE = 0x0010 53 | BACKGROUND_GREEN = 0x0020 54 | BACKGROUND_CYAN = 0x0030 55 | BACKGROUND_RED = 0x0040 56 | BACKGROUND_MAGENTA = 0x0050 57 | BACKGROUND_YELLOW = 0x0060 58 | BACKGROUND_GREY = 0x0070 59 | BACKGROUND_INTENSITY = 0x0080 # background color is intensified. 60 | 61 | levelno = args[1].levelno 62 | if (levelno >= 50): 63 | color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY 64 | elif (levelno >= 40): 65 | color = FOREGROUND_RED | FOREGROUND_INTENSITY 66 | elif (levelno >= 30): 67 | color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY 68 | elif (levelno >= 20): 69 | color = FOREGROUND_GREEN 70 | elif (levelno >= 10): 71 | color = FOREGROUND_MAGENTA 72 | else: 73 | color = FOREGROUND_WHITE 74 | args[0]._set_color(color) 75 | 76 | ret = fn(*args) 77 | args[0]._set_color(FOREGROUND_WHITE) 78 | # print "after" 79 | return ret 80 | 81 | return new 82 | 83 | 84 | def add_coloring_to_emit_ansi(fn): 85 | # add methods we need to the class 86 | def new(*args): 87 | levelno = args[1].levelno 88 | if (levelno >= 50): 89 | color = '\x1b[31m' # red 90 | elif (levelno >= 40): 91 | color = '\x1b[31m' # red 92 | elif (levelno >= 30): 93 | color = '\x1b[33m' # yellow 94 | elif (levelno >= 20): 95 | color = '\x1b[32m' # green 96 | elif (levelno >= 10): 97 | color = '\x1b[35m' # pink 98 | else: 99 | color = '\x1b[0m' # normal 100 | args[1].msg = color + args[1].msg[:200] + '\x1b[0m' # normal 101 | # print "after" 102 | return fn(*args) 103 | 104 | return new 105 | 106 | 107 | import platform 108 | 109 | if platform.system() == 'Windows': 110 | # Windows does not support ANSI escapes and we are using API calls to set the console color 111 | logging.StreamHandler.emit = add_coloring_to_emit_windows( 112 | logging.StreamHandler.emit) 113 | else: 114 | # all non-Windows platforms are supporting ANSI escapes so we use them 115 | logging.StreamHandler.emit = add_coloring_to_emit_ansi( 116 | logging.StreamHandler.emit) 117 | # log = logging.getLogger() 118 | # log.addFilter(log_filter()) 119 | # //hdlr = logging.StreamHandler() 120 | # //hdlr.setFormatter(formatter()) 121 | 122 | 123 | def init_logging(verbose=True, log_file='app.log'): 124 | """ 125 | Initialize settings for Python logger 126 | 127 | This allows for logging into console as well as specified log_file. 128 | After you can use in the code just:: 129 | 130 | import logging 131 | logging.info("hello world!") 132 | # will produce 133 | # 2016-07-19 16:13:02,931 [MainThread ][INFO ]: hello world! 134 | 135 | :param bool verbose: 136 | :param str log_file: 137 | """ 138 | 139 | # let's log the same output to console and to file 140 | log_file = log_file 141 | format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]: %(message)s" 142 | 143 | root_logger = logging.getLogger() 144 | logging.basicConfig( 145 | format=format, 146 | level=logging.DEBUG if verbose else logging.WARNING, 147 | stream=sys.stderr) 148 | 149 | log_formatter = logging.Formatter(format) 150 | file_handler = logging.FileHandler(log_file) 151 | file_handler.setFormatter(log_formatter) 152 | root_logger.addHandler(file_handler) 153 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/parallel_dqn_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | import threading 4 | 5 | import numpy as np 6 | 7 | 8 | class ParallelDQNAgent(): 9 | def __init__(self, args, q_network, emulator, experience_memory, 10 | num_actions, train_stats): 11 | 12 | self.network = q_network 13 | self.emulator = emulator 14 | self.memory = experience_memory 15 | self.train_stats = train_stats 16 | 17 | self.num_actions = num_actions 18 | self.history_length = args.history_length 19 | self.training_frequency = args.training_frequency 20 | self.random_exploration_length = args.random_exploration_length 21 | self.initial_exploration_rate = args.initial_exploration_rate 22 | self.final_exploration_rate = args.final_exploration_rate 23 | self.final_exploration_frame = args.final_exploration_frame 24 | self.test_exploration_rate = args.test_exploration_rate 25 | self.recording_frequency = args.recording_frequency 26 | 27 | self.exploration_rate = self.initial_exploration_rate 28 | self.total_steps = 0 29 | self.train_steps = 0 30 | self.current_act_steps = 0 31 | self.current_train_steps = 0 32 | 33 | self.test_state = [] 34 | self.epoch_over = False 35 | 36 | def choose_action(self): 37 | 38 | if random.random() >= self.exploration_rate: 39 | state = self.memory.get_current_state() 40 | q_values = self.network.inference(state) 41 | self.train_stats.add_q_values(q_values) 42 | return np.argmax(q_values) 43 | else: 44 | return random.randrange(self.num_actions) 45 | 46 | def checkGameOver(self): 47 | if self.emulator.isGameOver(): 48 | initial_state = self.emulator.reset() 49 | for experience in initial_state: 50 | self.memory.add(experience[0], experience[1], experience[2], 51 | experience[3]) 52 | self.train_stats.add_game() 53 | 54 | def run_random_exploration(self): 55 | 56 | for step in range(self.random_exploration_length): 57 | 58 | state, action, reward, terminal, raw_reward = self.emulator.run_step( 59 | random.randrange(self.num_actions)) 60 | self.train_stats.add_reward(raw_reward) 61 | self.memory.add(state, action, reward, terminal) 62 | self.checkGameOver() 63 | self.total_steps += 1 64 | self.current_act_steps += 1 65 | if (self.total_steps % self.recording_frequency == 0): 66 | self.train_stats.record(self.total_steps) 67 | 68 | def train(self, steps): 69 | 70 | for step in range(steps): 71 | states, actions, rewards, next_states, terminals = self.memory.get_batch() 72 | loss = self.network.train(states, actions, rewards, next_states, 73 | terminals) 74 | self.train_stats.add_loss(loss) 75 | self.train_steps += 1 76 | self.current_train_steps += 1 77 | 78 | if self.train_steps < ( 79 | self.final_exploration_frame / self.training_frequency): 80 | self.exploration_rate -= ( 81 | self.exploration_rate - self.final_exploration_rate) / ( 82 | ( 83 | self.final_exploration_frame / self.training_frequency) - self.train_steps) 84 | 85 | if (( 86 | self.train_steps * self.training_frequency) % self.recording_frequency == 0) and not ( 87 | step == steps - 1): 88 | self.train_stats.record(self.random_exploration_length + ( 89 | self.train_steps * self.training_frequency)) 90 | self.network.record_params(self.random_exploration_length + ( 91 | self.train_steps * self.training_frequency)) 92 | 93 | self.epoch_over = True 94 | 95 | def run_epoch(self, steps, epoch): 96 | 97 | self.epoch_over = False 98 | threading.Thread(target=self.train, 99 | args=(int(steps / self.training_frequency),)).start() 100 | 101 | while not self.epoch_over: 102 | state, action, reward, terminal, raw_reward = self.emulator.run_step( 103 | self.choose_action()) 104 | self.memory.add(state, action, reward, terminal) 105 | self.train_stats.add_reward(raw_reward) 106 | self.checkGameOver() 107 | 108 | self.total_steps += 1 109 | self.current_act_steps += 1 110 | 111 | logging.info("act_steps: {0}".format(self.current_act_steps)) 112 | logging.info("learn_steps: {0}".format(self.current_train_steps)) 113 | self.train_stats.record(self.random_exploration_length + ( 114 | self.train_steps * self.training_frequency)) 115 | self.network.record_params(self.random_exploration_length + ( 116 | self.train_steps * self.training_frequency)) 117 | self.network.save_model(epoch) 118 | self.current_act_steps = 0 119 | self.current_train_steps = 0 120 | 121 | def test_step(self, observation): 122 | 123 | if len(self.test_state) < self.history_length: 124 | self.test_state.append(observation) 125 | 126 | # choose action 127 | q_values = None 128 | action = None 129 | if random.random() >= self.test_exploration_rate: 130 | state = np.expand_dims(np.transpose(self.test_state, [1, 2, 0]), 131 | axis=0) 132 | q_values = self.network.gpu_inference(state) 133 | action = np.argmax(q_values) 134 | else: 135 | action = random.randrange(self.num_actions) 136 | 137 | self.test_state.pop(0) 138 | return [action, q_values] 139 | 140 | def save_model(self, epoch): 141 | self.network.save_model(epoch) 142 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/record_stats.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | 7 | class RecordStats: 8 | def __init__(self, args, test): 9 | 10 | self.test = test 11 | self.reward = 0 12 | self.step_count = 0 13 | self.loss = 0.0 14 | self.loss_count = 0 15 | self.games = 0 16 | self.q_values = 0.0 17 | self.q_count = 0 18 | self.current_score = 0 19 | self.max_score = -1000000000 20 | self.min_score = 1000000000 21 | self.recording_frequency = args.recording_frequency 22 | 23 | with tf.device('/cpu:0'): 24 | self.spg = tf.placeholder(tf.float32, shape=[], 25 | name="score_per_game") 26 | self.mean_q = tf.placeholder(tf.float32, shape=[]) 27 | self.total_gp = tf.placeholder(tf.float32, shape=[]) 28 | self.max_r = tf.placeholder(tf.float32, shape=[]) 29 | self.min_r = tf.placeholder(tf.float32, shape=[]) 30 | self.time = tf.placeholder(tf.float32, shape=[]) 31 | 32 | self.spg_summ = tf.scalar_summary('score_per_game', self.spg) 33 | self.q_summ = tf.scalar_summary('q_values', self.mean_q) 34 | self.gp_summ = tf.scalar_summary('steps_per_game', self.total_gp) 35 | self.max_summ = tf.scalar_summary('maximum_score', self.max_r) 36 | self.min_summ = tf.scalar_summary('minimum_score', self.min_r) 37 | self.time_summ = tf.scalar_summary('steps_per_second', self.time) 38 | 39 | if not test: 40 | self.mean_l = tf.placeholder(tf.float32, shape=[], name='loss') 41 | self.l_summ = tf.scalar_summary('loss', self.mean_l) 42 | self.summary_op = tf.merge_summary( 43 | [self.spg_summ, self.q_summ, self.gp_summ, self.l_summ, 44 | self.max_summ, self.min_summ, self.time_summ]) 45 | self.path = ( 46 | args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/train') 47 | else: 48 | self.summary_op = tf.merge_summary( 49 | [self.spg_summ, self.q_summ, self.gp_summ, self.max_summ, 50 | self.min_summ, self.time_summ]) 51 | self.path = ( 52 | args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/test') 53 | 54 | # self.summary_op = tf.merge_all_summaries() 55 | self.sess = tf.Session() 56 | self.summary_writer = tf.train.SummaryWriter(self.path) 57 | self.start_time = time.time() 58 | 59 | def record(self, epoch): 60 | 61 | seconds = time.time() - self.start_time 62 | 63 | avg_loss = 0 64 | if self.loss_count != 0: 65 | avg_loss = self.loss / self.loss_count 66 | # print("average loss: {0}".format(avg_loss)) 67 | 68 | mean_q_values = 0 69 | if self.q_count > 0: 70 | mean_q_values = self.q_values / self.q_count 71 | # print("average q_values: {0}".format(mean_q_values)) 72 | 73 | score_per_game = 0.0 74 | steps_per_game = 0 75 | 76 | if self.games == 0: 77 | score_per_game = self.reward 78 | steps_per_game = self.step_count 79 | else: 80 | score_per_game = self.reward / self.games 81 | steps_per_game = self.step_count / self.games 82 | 83 | score_per_game = float(score_per_game) 84 | 85 | if not self.test: 86 | step_per_sec = self.recording_frequency / seconds 87 | summary_str = self.sess.run(self.summary_op, 88 | feed_dict={self.spg: score_per_game, 89 | self.mean_l: avg_loss, 90 | self.mean_q: mean_q_values, 91 | self.total_gp: steps_per_game, 92 | self.max_r: self.max_score, 93 | self.min_r: self.min_score, 94 | self.time: step_per_sec}) 95 | self.summary_writer.add_summary(summary_str, global_step=epoch) 96 | else: 97 | step_per_sec = self.step_count / seconds 98 | summary_str = self.sess.run(self.summary_op, 99 | feed_dict={self.spg: score_per_game, 100 | self.mean_q: mean_q_values, 101 | self.total_gp: steps_per_game, 102 | self.max_r: self.max_score, 103 | self.min_r: self.min_score, 104 | self.time: step_per_sec}) 105 | self.summary_writer.add_summary(summary_str, global_step=epoch) 106 | current_score = 0 107 | 108 | self.reward = 0 109 | self.step_count = 0 110 | self.loss = 0 111 | self.loss_count = 0 112 | self.games = 0 113 | self.q_values = 0 114 | self.q_count = 0 115 | self.max_score = -1000000000 116 | self.min_score = 1000000000 117 | 118 | def add_reward(self, r): 119 | self.reward += r 120 | self.current_score += r 121 | 122 | if self.step_count == 0: 123 | self.start_time = time.time() 124 | 125 | self.step_count += 1 126 | 127 | def add_loss(self, l): 128 | self.loss += l 129 | self.loss_count += 1 130 | 131 | def add_game(self): 132 | self.games += 1 133 | 134 | if self.current_score > self.max_score: 135 | self.max_score = self.current_score 136 | if self.current_score < self.min_score: 137 | self.min_score = self.current_score 138 | 139 | self.current_score = 0 140 | 141 | def add_q_values(self, q_vals): 142 | mean_q = np.mean(q_vals) 143 | self.q_values += mean_q 144 | self.q_count += 1 145 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_0_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_0.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_0_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_1.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_0_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_2.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_0_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_3.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_0.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_1.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_12_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_2.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_12_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_3.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_19_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_0.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_19_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_1.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_19_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_2.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_19_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_3.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_0.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_1.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_6_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_2.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/states_examples/state_6_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_3.png -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24 -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24.meta -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "brick_hunter_qsub.ckpt-24" 2 | all_model_checkpoint_paths: "brick_hunter_qsub.ckpt-24" 3 | -------------------------------------------------------------------------------- /showcase-studies/dqn-gym/visuals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib 3 | 4 | matplotlib.use('TKAgg') 5 | from matplotlib import pyplot as plt 6 | import seaborn as sns 7 | 8 | 9 | class Visuals: 10 | def __init__(self, actions): 11 | 12 | all_action_names = ['no-op', 'fire', 'up', 'right', 'left', 'down', 13 | 'up_right', 'up_left', 'down-right', 'down-left', 14 | 'up-fire', 'right-fire', 'left-fire', 'down-fire', 15 | 'up-right-fire', 'up-left-fire', 'down-right-fire', 16 | 'down-left-fire'] 17 | 18 | action_names = [all_action_names[i] for i in actions] 19 | self.num_actions = len(actions) 20 | self.max_q = 1 21 | self.min_q = 0 22 | # self.max_avg_q = 1 23 | 24 | xlocations = np.linspace(0.5, self.num_actions - 0.5, 25 | num=self.num_actions) 26 | xlocations = np.append(xlocations, self.num_actions + 0.05) 27 | if self.num_actions > 7: 28 | self.fig = plt.figure(figsize=(self.num_actions * 1.1, 6.0)) 29 | else: 30 | self.fig = plt.figure() 31 | self.bars = plt.bar(np.arange(self.num_actions), 32 | np.zeros(self.num_actions), 0.9) 33 | plt.xticks(xlocations, action_names + ['']) 34 | plt.ylabel('Expected Future Reward') 35 | plt.xlabel('Action') 36 | plt.title("State-Action Values") 37 | color_palette = sns.color_palette(n_colors=self.num_actions) 38 | for bar, color in zip(self.bars, color_palette): 39 | bar.set_color(color) 40 | self.fig.show() 41 | 42 | def update(self, q_values): 43 | 44 | for bar, q_value in zip(self.bars, q_values): 45 | bar.set_height(q_value) 46 | step_max = np.amax(q_values) 47 | step_min = np.amin(q_values) 48 | if step_max > self.max_q: 49 | self.max_q = step_max 50 | plt.gca().set_ylim([self.min_q, self.max_q]) 51 | if step_min < self.min_q: 52 | self.min_q = step_min 53 | plt.gca().set_ylim([self.min_q, self.max_q]) 54 | 55 | self.fig.canvas.draw() 56 | -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | out-* 3 | training-progression 4 | __pycache__ -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/Makefile: -------------------------------------------------------------------------------- 1 | train: 2 | python ./play_atari.py ./../atari-roms/space_invaders.bin | tee train.log -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/README.md: -------------------------------------------------------------------------------- 1 | Originally from https://github.com/gtoubassi/dqn-atari 2 | 3 | # DQN Atari 4 | 5 | [![Click to play video](https://img.youtube.com/vi/DqzSrEuA2Jw/1.jpg)](https://www.youtube.com/watch?v=DqzSrEuA2Jw) 6 | 7 | This repo represents my attempt to reproduce the DeepMind Atari playing agent described in the recent [Nature paper](http://home.uchicago.edu/~arij/journalclub/papers/2015_Mnih_et_al.pdf). 8 | 9 | While the DeepMind implementation is built in [lua with torch7](https://github.com/kuz/DeepMind-Atari-Deep-Q-Learner), this implementation uses [TensorFlow](http://tensorflow.org). Like DeepMind, it also depends on the [Arcade Learning Environment](http://www.arcadelearningenvironment.org/) (technically I believe DeepMind uses their [Xitari](https://github.com/deepmind/xitari) fork of ALE). 10 | 11 | ### Results 12 | 13 | I have been focused on attempting to match DeepMind's performance on Space Invaders, which in their publication is 1976+/-800, though I do not know exactly how they compute those results. For my results I compute average/stdev over the final 20 evals of the training regime. I did a run with the DeepMind code ([results here](https://docs.google.com/spreadsheets/d/1IKfiD9wQVXtx8q9RJk52x8HtTnqsbeJqa1ioS_bh-k8/edit?usp=sharing)) and by this measure saw results of 1428+/189. My current results are far short at 1139+/-138 (random agent scores ~150). Thus far I have not found anyone that has reproduced the DeepMind results using the approach described in the Nature paper. If you've done it, particularly with TensorFlow, let me know! 14 | 15 | I have also tried breakout and got a score of 284+/-78 but that was an older version with the wrong target network update frequency. (DeepMind reported 400+/-30 using their eval method). 16 | 17 | I have also experimented with compressing experience replay to have larger capacity than 1M. Both breakout and space invaders show ~10% improvement with 4M and 3M respectively. 18 | 19 | A publicly viewable google spreadsheet has [results](https://docs.google.com/spreadsheets/d/1RZM2qhKQaXaud4S2ILsRVukmiPCjM-xtJTuPRpb96HY/edit#gid=2001383367) for various experiments I have run. 20 | 21 | ### Running 22 | 23 | 1. Get Python and Tensorflow running, preferably on a GPU (see notes on [AWS setup](https://github.com/gtoubassi/dqn-atari/wiki/Setting-up-TensorFlow-on-AWS-GPU)). 24 | 2. Install the arcade learning environment (see [wiki](https://github.com/gtoubassi/dqn-atari/wiki/Installing-ALE)) 25 | 3. Install dqn-atari specific dependencies, currently just ``sudo pip install blosc`` 26 | 4. Download a game rom, and name it properly like space_invaders.bin (all lower case ending in bin -- the names must match for ALE). 27 | 5. Get the repo: 28 | 29 | git clone https://github.com/gtoubassi/dqn-atari.git 30 | 31 | 5. Run it! The default parameters attempt to mimic the Nature paper configuration: 32 | 33 | cd dqn-atari 34 | python ./play_atari.py ~/space_invaders.bin | tee train.log 35 | 36 | 6. Periodically check progress 37 | 38 | ./logstats.sh train.log 39 | 40 | ### References 41 | 42 | The following were very helpful: 43 | 44 | * [Overview of Deep Q Learning](http://www.nervanasys.com/demystifying-deep-reinforcement-learning/) 45 | * David Silver's [Introduction to Reinforcement Learning](https://www.youtube.com/watch?v=2pWv7GOvuf0&list=PL5X3mDkKaJrL42i_jhE4N-p6E2Ol62Ofa) 46 | * [deep_rl_ale](https://github.com/Jabberwockyll/deep_rl_ale) 47 | * [Flabbybird agent using TensorFlow](https://github.com/yenchenlin1994/DeepLearningFlappyBird) 48 | * [Space Invaders using Theano](http://maciejjaskowski.github.io/2016/03/09/space-invaders.html) 49 | * [Deep Q Learning Google Group](https://groups.google.com/forum/#!forum/deep-q-learning) 50 | -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/atari_environment.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | import os 4 | import random 5 | from state import State 6 | from ale_python_interface import ALEInterface 7 | 8 | # Terminology in this class: 9 | # Episode: the span of one game life 10 | # Game: an ALE game (e.g. in space invaders == 3 Episodes or 3 Lives) 11 | # Frame: An ALE frame (e.g. 60 fps) 12 | # Step: An Environment step (e.g. covers 4 frames) 13 | # 14 | class AtariEnvironment: 15 | 16 | def __init__(self, args, outputDir): 17 | 18 | self.outputDir = outputDir 19 | self.screenCaptureFrequency = args.screen_capture_freq 20 | 21 | self.ale = ALEInterface() 22 | self.ale.setInt(b'random_seed', 123456) 23 | random.seed(123456) 24 | # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo 25 | self.ale.setFloat(b'repeat_action_probability', 0.0) 26 | 27 | # Load the ROM file 28 | self.ale.loadROM(args.rom.encode('UTF-8')) 29 | 30 | self.actionSet = self.ale.getMinimalActionSet() 31 | self.gameNumber = 0 32 | self.stepNumber = 0 33 | self.resetGame() 34 | 35 | def getNumActions(self): 36 | return len(self.actionSet) 37 | 38 | def getState(self): 39 | return self.state 40 | 41 | def getGameNumber(self): 42 | return self.gameNumber 43 | 44 | def getFrameNumber(self): 45 | return self.ale.getFrameNumber() 46 | 47 | def getEpisodeFrameNumber(self): 48 | return self.ale.getEpisodeFrameNumber() 49 | 50 | def getEpisodeStepNumber(self): 51 | return self.episodeStepNumber 52 | 53 | def getStepNumber(self): 54 | return self.stepNumber 55 | 56 | def getGameScore(self): 57 | return self.gameScore 58 | 59 | def isGameOver(self): 60 | return self.ale.game_over() 61 | 62 | def step(self, action): 63 | previousLives = self.ale.lives() 64 | reward = 0 65 | isTerminal = 0 66 | self.stepNumber += 1 67 | self.episodeStepNumber += 1 68 | 69 | for i in range(4): 70 | prevScreenRGB = self.ale.getScreenRGB() 71 | reward += self.ale.act(self.actionSet[action]) 72 | screenRGB = self.ale.getScreenRGB() 73 | 74 | # Detect end of episode, I don't think I'm handling this right in terms 75 | # of the overall game loop (??) 76 | if self.ale.lives() < previousLives or self.ale.game_over(): 77 | isTerminal = 1 78 | break 79 | 80 | if self.gameNumber % self.screenCaptureFrequency == 0: 81 | dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) 82 | if not os.path.isdir(dir): 83 | logging.info("Capturing screen this iteration into "+self.outputDir + '/screen_cap/') 84 | os.makedirs(dir) 85 | self.ale.saveScreenPNG((dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())).encode('UTF-8')) 86 | 87 | 88 | maxedScreen = np.maximum(screenRGB, prevScreenRGB) 89 | self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) 90 | self.gameScore += reward 91 | return reward, self.state, isTerminal 92 | 93 | def resetGame(self): 94 | if self.ale.game_over(): 95 | self.gameNumber += 1 96 | self.ale.reset_game() 97 | self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) 98 | self.gameScore = 0 99 | self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number 100 | -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/encode_ffmpeg: -------------------------------------------------------------------------------- 1 | ffmpeg -r 60 -f image2 -i frame-%06d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p video.mp4 -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/logging_colorer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Console coloring of logs 3 | """ 4 | 5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output 6 | import logging 7 | 8 | # now we patch Python code to add color support to logging.StreamHandler 9 | import sys 10 | 11 | 12 | def add_coloring_to_emit_windows(fn): 13 | # add methods we need to the class 14 | def _out_handle(self): 15 | import ctypes 16 | return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 17 | out_handle = property(_out_handle) 18 | 19 | def _set_color(self, code): 20 | import ctypes 21 | # Constants from the Windows API 22 | self.STD_OUTPUT_HANDLE = -11 23 | hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE) 24 | ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code) 25 | 26 | setattr(logging.StreamHandler, '_set_color', _set_color) 27 | 28 | def new(*args): 29 | FOREGROUND_BLUE = 0x0001 # text color contains blue. 30 | FOREGROUND_GREEN = 0x0002 # text color contains green. 31 | FOREGROUND_RED = 0x0004 # text color contains red. 32 | FOREGROUND_INTENSITY = 0x0008 # text color is intensified. 33 | FOREGROUND_WHITE = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED 34 | # winbase.h 35 | STD_INPUT_HANDLE = -10 36 | STD_OUTPUT_HANDLE = -11 37 | STD_ERROR_HANDLE = -12 38 | 39 | # wincon.h 40 | FOREGROUND_BLACK = 0x0000 41 | FOREGROUND_BLUE = 0x0001 42 | FOREGROUND_GREEN = 0x0002 43 | FOREGROUND_CYAN = 0x0003 44 | FOREGROUND_RED = 0x0004 45 | FOREGROUND_MAGENTA = 0x0005 46 | FOREGROUND_YELLOW = 0x0006 47 | FOREGROUND_GREY = 0x0007 48 | FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. 49 | 50 | BACKGROUND_BLACK = 0x0000 51 | BACKGROUND_BLUE = 0x0010 52 | BACKGROUND_GREEN = 0x0020 53 | BACKGROUND_CYAN = 0x0030 54 | BACKGROUND_RED = 0x0040 55 | BACKGROUND_MAGENTA = 0x0050 56 | BACKGROUND_YELLOW = 0x0060 57 | BACKGROUND_GREY = 0x0070 58 | BACKGROUND_INTENSITY = 0x0080 # background color is intensified. 59 | 60 | levelno = args[1].levelno 61 | if(levelno>=50): 62 | color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY 63 | elif(levelno>=40): 64 | color = FOREGROUND_RED | FOREGROUND_INTENSITY 65 | elif(levelno>=30): 66 | color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY 67 | elif(levelno>=20): 68 | color = FOREGROUND_GREEN 69 | elif(levelno>=10): 70 | color = FOREGROUND_MAGENTA 71 | else: 72 | color = FOREGROUND_WHITE 73 | args[0]._set_color(color) 74 | 75 | ret = fn(*args) 76 | args[0]._set_color( FOREGROUND_WHITE ) 77 | #print "after" 78 | return ret 79 | return new 80 | 81 | def add_coloring_to_emit_ansi(fn): 82 | # add methods we need to the class 83 | def new(*args): 84 | levelno = args[1].levelno 85 | if(levelno>=50): 86 | color = '\x1b[31m' # red 87 | elif(levelno>=40): 88 | color = '\x1b[31m' # red 89 | elif(levelno>=30): 90 | color = '\x1b[33m' # yellow 91 | elif(levelno>=20): 92 | color = '\x1b[32m' # green 93 | elif(levelno>=10): 94 | color = '\x1b[35m' # pink 95 | else: 96 | color = '\x1b[0m' # normal 97 | args[1].msg = color + args[1].msg[:200] + '\x1b[0m' # normal 98 | #print "after" 99 | return fn(*args) 100 | return new 101 | 102 | import platform 103 | if platform.system()=='Windows': 104 | # Windows does not support ANSI escapes and we are using API calls to set the console color 105 | logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit) 106 | else: 107 | # all non-Windows platforms are supporting ANSI escapes so we use them 108 | logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit) 109 | #log = logging.getLogger() 110 | #log.addFilter(log_filter()) 111 | #//hdlr = logging.StreamHandler() 112 | #//hdlr.setFormatter(formatter()) 113 | 114 | def init_logging(verbose=True, log_file='app.log'): 115 | """ 116 | Initialize settings for Python logger 117 | 118 | This allows for logging into console as well as specified log_file. 119 | After you can use in the code just:: 120 | 121 | import logging 122 | logging.info("hello world!") 123 | # will produce 124 | # 2016-07-19 16:13:02,931 [MainThread ][INFO ]: hello world! 125 | 126 | :param bool verbose: 127 | :param str log_file: 128 | """ 129 | 130 | # let's log the same output to console and to file 131 | log_file = log_file 132 | format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]: %(message)s" 133 | 134 | root_logger = logging.getLogger() 135 | logging.basicConfig( 136 | format=format, 137 | level=logging.DEBUG if verbose else logging.WARNING, 138 | stream=sys.stderr) 139 | 140 | log_formatter = logging.Formatter(format) 141 | file_handler = logging.FileHandler(log_file) 142 | file_handler.setFormatter(log_formatter) 143 | root_logger.addHandler(file_handler) -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/logstats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo -n "Number of episodes: " 4 | grep Episode $1 | awk '{print $2}' | tail -1 5 | 6 | echo -n "Number of frames: " 7 | grep Episode $1 | awk '{print $7}' | tr -d '(' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{printf "%'"'"'d\n", SUM}' 8 | 9 | echo -n "Average score first 50: " 10 | grep Episode $1 | head -50 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 11 | 12 | echo -n "Average score last 50: " 13 | grep Episode $1 | tail -50 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 14 | 15 | echo -n "Average score first 100: " 16 | grep Episode $1 | head -100 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 17 | 18 | echo -n "Average score last 100: " 19 | grep Episode $1 | tail -100 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 20 | 21 | echo -n "Average score first 250: " 22 | grep Episode $1 | head -250 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 23 | 24 | echo -n "Average score last 250: " 25 | grep Episode $1 | tail -250 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 26 | 27 | echo -n "Average score first 500: " 28 | grep Episode $1 | head -500 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 29 | 30 | echo -n "Average score last 500: " 31 | grep Episode $1 | tail -500 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 32 | 33 | echo -n "Average score first 1000: " 34 | grep Episode $1 | head -1000 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 35 | 36 | echo -n "Average score last 1000: " 37 | grep Episode $1 | tail -1000 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 38 | 39 | echo -n "Average score for ALL: " 40 | grep Episode $1 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 41 | 42 | echo -n "Best scores ever: " 43 | grep Episode $1 | awk '{print $6}' | sort -n | tail -5 | sort -rn | tr '\n' ',' | sed 's#,$#\n#g' | sed 's#,#, #g' 44 | 45 | echo -n "Recent eval runs: " 46 | grep Average.eval $1 | tail -5 | awk '{print $NF}' | tr '\n' ',' | sed 's#,$#\n#g' | sed 's#,#, #g' 47 | 48 | echo -n "Average of last 20 evals: " 49 | grep Average.eval $1 | tail -20 | awk '{print $NF}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}' 50 | 51 | -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/play_atari.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | import logging 4 | import sys 5 | import numpy as np 6 | import os 7 | import random 8 | from tqdm import tqdm 9 | import replay 10 | import time 11 | import argparse 12 | import dqn 13 | from atari_environment import AtariEnvironment 14 | from state import State 15 | import logging_colorer 16 | 17 | logging_colorer.init_logging() 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--train-epoch-steps", type=int, default=250000, help="how many steps (=4 frames) to run during a training epoch (approx -- will finish current game)") 21 | parser.add_argument("--eval-epoch-steps", type=int, default=125000, help="how many steps (=4 frames) to run during an eval epoch (approx -- will finish current game)") 22 | parser.add_argument("--replay-capacity", type=int, default=1000000, help="how many states to store for future training") 23 | parser.add_argument("--prioritized-replay", action='store_true', help="Prioritize interesting states when training (e.g. terminal or non zero rewards)") 24 | parser.add_argument("--compress-replay", action='store_true', help="if set replay memory will be compressed with blosc, allowing much larger replay capacity") 25 | parser.add_argument("--normalize-weights", action='store_true', default=True, help="if set weights/biases are normalized like torch, with std scaled by fan in to the node") 26 | parser.add_argument("--screen-capture-freq", type=int, default=50, help="record screens for a game this often") 27 | parser.add_argument("--save-model-freq", type=int, default=5000, help="save the model once per 10000 training sessions") 28 | parser.add_argument("--observation-steps", type=int, default=50000, help="train only after this many stesp (=4 frames)") 29 | parser.add_argument("--learning-rate", type=float, default=0.00025, help="learning rate (step size for optimization algo)") 30 | parser.add_argument("--target-model-update-freq", type=int, default=10000, help="how often (in steps) to update the target model. Note nature paper says this is in 'number of parameter updates' but their code says steps. see tinyurl.com/hokp4y8") 31 | parser.add_argument("--model", help="tensorflow model checkpoint file to initialize from") 32 | parser.add_argument("rom", help="rom file to run") 33 | args = parser.parse_args() 34 | 35 | print('Arguments: %s' % (args)) 36 | 37 | game_name = os.path.splitext(os.path.split(args.rom)[1])[0] 38 | baseOutputDir = 'out-'+ game_name + '-' + time.strftime("%Y-%m-%d-%H-%M-%S") 39 | os.makedirs(baseOutputDir) 40 | logging.info("Training game "+game_name) 41 | logging.info("Storing training into "+baseOutputDir) 42 | 43 | State.setup(args) 44 | 45 | environment = AtariEnvironment(args, baseOutputDir) 46 | 47 | dqn_network = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args) 48 | 49 | replayMemory = replay.ReplayMemory(args) 50 | 51 | def runEpoch(minEpochSteps, evalWithEpsilon=None): 52 | logging.info('Running epoch with min epoch steps: %d' % minEpochSteps) 53 | stepStart = environment.getStepNumber() 54 | isTraining = True if evalWithEpsilon is None else False 55 | startGameNumber = environment.getGameNumber() 56 | epochTotalScore = 0 57 | 58 | pbar = tqdm(total=minEpochSteps) 59 | while environment.getStepNumber() - stepStart < minEpochSteps: 60 | startTime = lastLogTime = time.time() 61 | stateReward = 0 62 | state = None 63 | 64 | while not environment.isGameOver(): 65 | # Choose next action 66 | if evalWithEpsilon is None: 67 | epsilon = max(.1, 1.0 - 0.9 * environment.getStepNumber() / 1e6) 68 | else: 69 | epsilon = evalWithEpsilon 70 | 71 | if state is None or random.random() > (1 - epsilon): 72 | action = random.randrange(environment.getNumActions()) 73 | else: 74 | screens = np.reshape(state.getScreens(), (1, 84, 84, 4)) 75 | action = dqn_network.inference(screens) 76 | 77 | # Make the move 78 | oldState = state 79 | reward, state, isTerminal = environment.step(action) 80 | pbar.update(4) 81 | 82 | # Record experience in replay memory and train 83 | if isTraining and oldState is not None: 84 | clippedReward = min(1, max(-1, reward)) 85 | replayMemory.addSample(replay.Sample(oldState, action, clippedReward, state, isTerminal)) 86 | 87 | if environment.getStepNumber() > args.observation_steps and environment.getEpisodeStepNumber() % 4 == 0: 88 | batch = replayMemory.drawBatch(32) 89 | dqn_network.train(batch, environment.getStepNumber()) 90 | 91 | if time.time() - lastLogTime > 60: 92 | print((' ...frame %d' % environment.getEpisodeFrameNumber())) 93 | lastLogTime = time.time() 94 | 95 | if isTerminal: 96 | state = None 97 | 98 | episodeTime = time.time() - startTime 99 | logging.info(('%s %d ended with score: %d (%d frames in %fs for %d fps)' % 100 | ('Episode' if isTraining else 'Eval', environment.getGameNumber(), environment.getGameScore(), 101 | environment.getEpisodeFrameNumber(), episodeTime, environment.getEpisodeFrameNumber() / episodeTime))) 102 | 103 | epochTotalScore += environment.getGameScore() 104 | environment.resetGame() 105 | 106 | pbar.close() 107 | 108 | # return the average score 109 | return epochTotalScore / (environment.getGameNumber() - startGameNumber) 110 | 111 | 112 | try: 113 | while True: 114 | aveScore = runEpoch(args.train_epoch_steps) #train 115 | print(('Average training score: %d' % (aveScore))) 116 | aveScore = runEpoch(args.eval_epoch_steps, evalWithEpsilon=.05) #eval 117 | print(('Average eval score: %d' % (aveScore))) 118 | except KeyboardInterrupt or TypeError: 119 | dqn_network.save_snapshot(environment.getStepNumber()) -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/replay.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import math 3 | import random 4 | 5 | class Sample: 6 | 7 | def __init__(self, state1, action, reward, state2, terminal): 8 | self.state1 = state1 9 | self.action = action 10 | self.reward = reward 11 | self.state2 = state2 12 | self.terminal = terminal 13 | self.weight = 1 14 | self.cumulativeWeight = 1 15 | 16 | def isInteresting(self): 17 | return self.terminal or self.reward != 0 18 | 19 | def __cmp__(self, obj): 20 | return self.cumulativeWeight - obj.cumulativeWeight 21 | 22 | 23 | class ReplayMemory: 24 | 25 | def __init__(self, args): 26 | self.samples = [] 27 | self.maxSamples = args.replay_capacity 28 | self.prioritizedReplay = args.prioritized_replay 29 | self.numInterestingSamples = 0 30 | self.batchesDrawn = 0 31 | 32 | def numSamples(): 33 | return len(self.samples) 34 | 35 | def addSample(self, sample): 36 | self.samples.append(sample) 37 | self._updateWeightsForNewlyAddedSample() 38 | self._truncateListIfNecessary() 39 | 40 | def _updateWeightsForNewlyAddedSample(self): 41 | if len(self.samples) > 1: 42 | self.samples[-1].cumulativeWeight = self.samples[-1].weight + self.samples[-2].cumulativeWeight 43 | 44 | if self.samples[-1].isInteresting(): 45 | self.numInterestingSamples += 1 46 | 47 | # Boost the neighboring samples. How many samples? Roughly the number of samples 48 | # that are "uninteresting". Meaning if interesting samples occur 3% of the time, then boost 33 49 | uninterestingSampleRange = max(1, len(self.samples) / max(1, self.numInterestingSamples)) 50 | for i in range(int(uninterestingSampleRange), 0, -1): 51 | index = len(self.samples) - i 52 | if index < 1: 53 | break 54 | # This is an exponential that ranges from 3.0 to 1.01 over the domain of [0, uninterestingSampleRange] 55 | # So the interesting sample gets a 3x boost, and the one furthest away gets a 1% boost 56 | boost = 1.0 + 3.0/(math.exp(i/(uninterestingSampleRange/6.0))) 57 | self.samples[index].weight *= boost 58 | self.samples[index].cumulativeWeight = self.samples[index].weight + self.samples[index - 1].cumulativeWeight 59 | 60 | def _truncateListIfNecessary(self): 61 | # premature optimizastion alert :-), don't truncate on each 62 | # added sample since (I assume) it requires a memcopy of the list (probably 8mb) 63 | if len(self.samples) > self.maxSamples * 1.05: 64 | truncatedWeight = 0 65 | # Before truncating the list, correct self.numInterestingSamples, and prepare 66 | # for correcting the cumulativeWeights of the remaining samples 67 | for i in range(self.maxSamples, len(self.samples)): 68 | truncatedWeight += self.samples[i].weight 69 | if self.samples[i].isInteresting(): 70 | self.numInterestingSamples -= 1 71 | 72 | # Truncate the list 73 | self.samples = self.samples[(len(self.samples) - self.maxSamples):] 74 | 75 | # Correct cumulativeWeights 76 | for sample in self.samples: 77 | sample.cumulativeWeight -= truncatedWeight 78 | 79 | def drawBatch(self, batchSize): 80 | if batchSize > len(self.samples): 81 | raise IndexError('Too few samples (%d) to draw a batch of %d' % (len(self.samples), batchSize)) 82 | 83 | self.batchesDrawn += 1 84 | 85 | if self.prioritizedReplay: 86 | return self._drawPrioritizedBatch(batchSize) 87 | else: 88 | return random.sample(self.samples, batchSize) 89 | 90 | # The nature paper doesn't do this but they mention the idea. 91 | # This particular approach and the weighting I am using is a total 92 | # uninformed fabrication on my part. There is probably a more 93 | # principled way to do this 94 | def _drawPrioritizedBatch(self, batchSize): 95 | batch = [] 96 | probe = Sample(None, 0, 0, None, False) 97 | while len(batch) < batchSize: 98 | probe.cumulativeWeight = random.uniform(0, self.samples[-1].cumulativeWeight) 99 | index = bisect.bisect_right(self.samples, probe, 0, len(self.samples) - 1) 100 | sample = self.samples[index] 101 | sample.weight = max(1, .8 * sample.weight) 102 | if sample not in batch: 103 | batch.append(sample) 104 | 105 | if self.batchesDrawn % 100 == 0: 106 | cumulative = 0 107 | for sample in self.samples: 108 | cumulative += sample.weight 109 | sample.cumulativeWeight = cumulative 110 | return batch 111 | 112 | def _printBatchWeight(self, batch): 113 | batchWeight = 0 114 | for i in range(0, len(batch)): 115 | batchWeight += batch[i].weight 116 | print(('batch weight: %f' % batchWeight)) 117 | -------------------------------------------------------------------------------- /showcase-studies/dqn-simple/state.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.ndimage as ndimage 3 | import blosc 4 | import png 5 | 6 | class State: 7 | 8 | useCompression = False 9 | 10 | @staticmethod 11 | def setup(args): 12 | State.useCompression = args.compress_replay 13 | 14 | def stateByAddingScreen(self, screen, frameNumber): 15 | screen = np.dot(screen, np.array([.299, .587, .114])).astype(np.uint8) 16 | screen = ndimage.zoom(screen, (0.4, 0.525)) 17 | screen.resize((84, 84, 1)) 18 | #self.saveScreenAsPNG('screen', screen, frameNumber) 19 | 20 | if State.useCompression: 21 | screen = blosc.compress(np.reshape(screen, 84 * 84).tobytes(), typesize=1) 22 | 23 | newState = State() 24 | if hasattr(self, 'screens'): 25 | newState.screens = self.screens[:3] 26 | newState.screens.insert(0, screen) 27 | else: 28 | newState.screens = [screen, screen, screen, screen] 29 | return newState 30 | 31 | def getScreens(self): 32 | if State.useCompression: 33 | s = [] 34 | for i in range(4): 35 | s.append(np.reshape(np.fromstring(blosc.decompress(self.screens[i]), dtype=np.uint8), (84, 84, 1))) 36 | else: 37 | s = self.screens 38 | return np.concatenate(s, axis=2) 39 | 40 | def saveScreenAsPNG(self, basefilename, screen, frameNumber): 41 | pngfile = open(basefilename + ('-%08d.png' % frameNumber), 'wb') 42 | pngWriter = png.Writer(screen.shape[1], screen.shape[0], greyscale=True) 43 | pngWriter.write(pngfile, screen) 44 | pngfile.close() 45 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.sgf 3 | *.tar.bz2 4 | *.npz 5 | *.swp 6 | *.bin 7 | data/ 8 | play/ 9 | work/ 10 | cgos/ 11 | kgsGtp/ 12 | 13 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/CGOSEngine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Engine import * 4 | from HelperEngine import HelperEngine 5 | 6 | # forwards commands to both a main engine 7 | # and a helper engine. When picking a move, 8 | # we first ask the helper engine. If it passes, 9 | # we pass. Otherwise we ask the main engine 10 | class CGOSEngine(BaseEngine): 11 | def __init__(self, engine): 12 | self.engine = engine 13 | self.helper= HelperEngine() 14 | 15 | # subclasses must override this 16 | def name(self): 17 | return self.engine.name() 18 | 19 | # subclasses must override this 20 | def version(self): 21 | return self.engine.version() 22 | 23 | def set_board_size(self, N): 24 | return self.engine.set_board_size(N) and \ 25 | self.helper.set_board_size(N) 26 | 27 | def clear_board(self): 28 | self.engine.clear_board() 29 | self.helper.clear_board() 30 | self.cleanup_mode = False 31 | 32 | def set_komi(self, komi): 33 | self.engine.set_komi(komi) 34 | self.helper.set_komi(komi) 35 | 36 | def player_passed(self, color): 37 | self.engine.player_passed(color) 38 | self.helper.player_passed(color) 39 | 40 | def stone_played(self, x, y, color): 41 | self.engine.stone_played(x, y, color) 42 | self.helper.stone_played(x, y, color) 43 | 44 | def generate_move(self, color, cleanup=False): 45 | # enter cleanup mode if helper passes. 46 | # if it resigns, resign. 47 | if not self.cleanup_mode: 48 | self.helper.set_level(5) 49 | move = self.helper.generate_move(color, cleanup=False) 50 | if move.is_pass(): 51 | print "CGOSEngine: helper passed! Entering cleanup mode." 52 | self.cleanup_mode = True 53 | elif move.is_resign(): 54 | print "CGOSEngine: helper resigned! Resigning." 55 | return Move.Resign 56 | else: # helper didn't pass or resign 57 | self.helper.undo() # helper must support this 58 | 59 | # in cleanup mode, moves are made by helper_cleanup 60 | if self.cleanup_mode: 61 | print "CGOSEngine: In cleanup mode: using helper to generate move." 62 | self.helper.set_level(10) 63 | move = self.helper.generate_move(color, cleanup=True) 64 | self.engine.move_was_played(move) 65 | return move 66 | 67 | # otherwise, moves are made by the main engine 68 | print "CGOSEngine: Generating move using main engine." 69 | move = self.engine.generate_move(color) 70 | if move.is_play(): 71 | self.helper.stone_played(move.x, move.y, color) 72 | elif move.is_pass(): 73 | self.helper.player_passed(color) 74 | return move 75 | 76 | def undo(self): 77 | self.engine.undo() 78 | self.helper.undo() 79 | 80 | def quit(self): 81 | self.engine.quit() 82 | self.helper.quit() 83 | 84 | def supports_final_status_list(self): 85 | return True 86 | 87 | def final_status_list(self, status): 88 | return self.helper.final_status_list(status) 89 | 90 | def final_score(self): 91 | return self.helper.final_score() 92 | 93 | 94 | if __name__ == '__main__': 95 | import GTP 96 | fclient = GTP.redirect_all_output("log_engine.txt") 97 | 98 | import MoveModels 99 | from TFEngine import TFEngine 100 | from Book import PositionRecord 101 | from Book import MoveRecord 102 | 103 | engine = CGOSEngine(TFEngine("conv12posdepELU", MoveModels.Conv12PosDepELU(N=19, Nfeat=21))) 104 | 105 | gtp = GTP.GTP(engine, fclient) 106 | gtp.loop() 107 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Checkpoint.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def restore_from_checkpoint(sess, saver, ckpt_dir): 4 | print "Trying to restore from checkpoint in dir", ckpt_dir 5 | ckpt = tf.train.get_checkpoint_state(ckpt_dir) 6 | if ckpt and ckpt.model_checkpoint_path: 7 | print "Checkpoint file is ", ckpt.model_checkpoint_path 8 | saver.restore(sess, ckpt.model_checkpoint_path) 9 | global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) 10 | print "Restored from checkpoint %s" % global_step 11 | return global_step 12 | else: 13 | print "No checkpoint file found" 14 | assert False 15 | 16 | def optionally_restore_from_checkpoint(sess, saver, train_dir): 17 | while True: 18 | response = raw_input("Restore from checkpoint [y/n]? ").lower() 19 | if response == 'y': 20 | return restore_from_checkpoint(sess, saver, train_dir) 21 | if response == 'n': 22 | return 0 23 | 24 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Engine.py: -------------------------------------------------------------------------------- 1 | from Board import Board 2 | from GTP import Move 3 | import copy 4 | 5 | class BaseEngine(object): 6 | def __init__(self): 7 | self.board = None 8 | self.opponent_passed = False 9 | self.state_stack = [] 10 | 11 | def push_state(self): 12 | self.state_stack.append(copy.deepcopy(self.board)) 13 | 14 | def pop_state(self): 15 | self.board = self.state_stack.pop() 16 | self.opponent_passed = False 17 | 18 | def undo(self): 19 | if len(self.state_stack) > 0: 20 | self.pop_state() 21 | print "BaseEngine: after undo, board is" 22 | self.board.show() 23 | else: 24 | print "BaseEngine: undo called, but state_stack is empty. Board is" 25 | self.board.show() 26 | 27 | # subclasses must override this 28 | def name(self): 29 | assert False 30 | 31 | # subclasses must override this 32 | def version(self): 33 | assert False 34 | 35 | # subclasses may override to only accept 36 | # certain board sizes. They should call this 37 | # base method. 38 | def set_board_size(self, N): 39 | self.board = Board(N) 40 | return True 41 | 42 | def clear_board(self): 43 | self.board.clear() 44 | self.state_stack = [] 45 | self.opponent_passed = False 46 | 47 | def set_komi(self, komi): 48 | self.komi = float(komi) 49 | 50 | def player_passed(self, color): 51 | self.push_state() 52 | self.board.play_pass() 53 | self.opponent_passed = True 54 | 55 | def stone_played(self, x, y, color): 56 | self.push_state() 57 | self.board.play_stone(x, y, color) 58 | self.opponent_passed = False 59 | self.board.show() 60 | 61 | def move_was_played(self, move): 62 | if move.is_play(): 63 | self.stone_played(move.x, move.y, self.board.color_to_play) 64 | elif move.is_pass(): 65 | self.player_passed(self.board.color_to_play) 66 | 67 | # subclasses must override this 68 | def pick_move(self, color): 69 | assert False 70 | 71 | def generate_move(self, color, cleanup=False): 72 | move = self.pick_move(color) 73 | self.push_state() 74 | if move.is_play(): 75 | self.board.play_stone(move.x, move.y, color) 76 | self.board.show() 77 | return move 78 | 79 | def quit(self): 80 | pass 81 | 82 | def supports_final_status_list(self): 83 | return False 84 | 85 | 86 | class IdiotEngine(BaseEngine): 87 | def __init__(self): 88 | super(IdiotEngine,self).__init__() 89 | 90 | def name(self): 91 | return "IdiotEngine" 92 | 93 | def version(self): 94 | return "1.0" 95 | 96 | def pick_move(self, color): 97 | for x in xrange(self.board.N): 98 | for y in xrange(self.board.N): 99 | if self.board.play_is_legal(x, y, color): 100 | return Move(x,y) 101 | return Move.Pass() 102 | 103 | 104 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os.path 4 | import Features 5 | import Normalization 6 | 7 | 8 | def restore_from_checkpoint(sess, saver, ckpt_dir): 9 | print "Trying to restore from checkpoint in dir", ckpt_dir 10 | ckpt = tf.train.get_checkpoint_state(ckpt_dir) 11 | if ckpt and ckpt.model_checkpoint_path: 12 | print "Checkpoint file is ", ckpt.model_checkpoint_path 13 | saver.restore(sess, ckpt.model_checkpoint_path) 14 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 15 | print "Restored from checkpoint %s" % global_step 16 | else: 17 | print "No checkpoint file found" 18 | assert False 19 | 20 | class TFEval: 21 | def __init__(self, model): 22 | self.model = model 23 | 24 | # build the graph 25 | with tf.Graph().as_default(): 26 | with tf.device('/cpu:0'): 27 | self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') 28 | self.score_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) 29 | saver = tf.train.Saver(tf.trainable_variables()) 30 | init = tf.initialize_all_variables() 31 | self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 32 | self.sess.run(init) 33 | checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') 34 | restore_from_checkpoint(self.sess, saver, checkpoint_dir) 35 | 36 | def evaluate(self, board): 37 | board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(board, board.color_to_play).astype(np.float32) 38 | Normalization.apply_featurewise_normalization_C(board_feature_planes) 39 | feed_dict = {self.feature_planes: board_feature_planes.reshape(1,self.model.N,self.model.N,self.model.Nfeat)} 40 | score = np.asscalar(self.sess.run(self.score_op, feed_dict)) 41 | return score 42 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/EvalEngine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import tensorflow as tf 3 | import numpy as np 4 | import os 5 | from Engine import * 6 | from Board import * 7 | import Features 8 | import Normalization 9 | import Symmetry 10 | import Checkpoint 11 | 12 | def average_probs_over_symmetries(probs): 13 | assert probs.size == 8 14 | return probs.mean() 15 | 16 | class EvalEngine(BaseEngine): 17 | def name(self): 18 | return "EvalEngine" 19 | 20 | def version(self): 21 | return "1.0" 22 | 23 | def __init__(self, model): 24 | BaseEngine.__init__(self) 25 | self.model = model 26 | with tf.Graph().as_default(): 27 | with tf.device('/cpu:0'): 28 | self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') 29 | self.probs_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) 30 | saver = tf.train.Saver(tf.trainable_variables()) 31 | init = tf.initialize_all_variables() 32 | self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 33 | self.sess.run(init) 34 | checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') 35 | Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir) 36 | 37 | def get_position_eval(self): 38 | #assert self.model.Nfeat == 21 39 | #board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, self.board.color_to_play).astype(np.float32) 40 | #Normalization.apply_featurewise_normalization_C(board_feature_planes) 41 | assert self.model.Nfeat == 22 42 | board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi(self.board, self.board.color_to_play, self.komi).astype(np.float32) 43 | Normalization.apply_featurewise_normalization_D(board_feature_planes) 44 | feature_batch = Symmetry.make_symmetry_batch(board_feature_planes) 45 | feed_dict = {self.feature_planes: feature_batch} 46 | probs_batch = self.sess.run(self.probs_op, feed_dict) 47 | prob = average_probs_over_symmetries(probs_batch) 48 | if self.board.color_to_play == Color.White: 49 | prob *= -1 50 | return prob 51 | 52 | def pick_move(self, color): 53 | for i in xrange(10000): 54 | x = np.random.randint(0, self.board.N-1) 55 | y = np.random.randint(0, self.board.N-1) 56 | if self.board.play_is_legal(x, y, color): 57 | return Move(x,y) 58 | return Move.Pass 59 | 60 | 61 | if __name__ == '__main__': 62 | import GTP 63 | fclient = GTP.redirect_all_output("log_engine.txt") 64 | 65 | import EvalModels 66 | 67 | engine = EvalEngine(EvalModels.Conv11PosDepFC1ELU(N=19, Nfeat=22)) 68 | 69 | gtp = GTP.GTP(engine, fclient) 70 | gtp.loop() 71 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/EvalModels.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from Layers import * 3 | 4 | class Conv5PosDepFC1ELU: 5 | def __init__(self, N, Nfeat): 6 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/eval_conv5posdepfc1ELU_N%d_fe%d" % (N, Nfeat) 7 | self.N = N 8 | self.Nfeat = Nfeat 9 | def inference(self, feature_planes, N, Nfeat): 10 | NK = 64 11 | NKfirst = 64 12 | Nfc = 256 13 | conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1') 14 | conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2') 15 | conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3') 16 | conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4') 17 | conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5') 18 | conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6') 19 | conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7') 20 | conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8') 21 | conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9') 22 | conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10') 23 | conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11') 24 | conv11_flat = tf.reshape(conv11, [-1, NK*N*N]) 25 | fc = ELU_fully_connected_layer(conv11_flat, NK*N*N, Nfc) 26 | score = tf.tanh(linear_layer(fc, Nfc, 1)) 27 | return score 28 | 29 | class Conv11PosDepFC1ELU: 30 | def __init__(self, N, Nfeat): 31 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/eval_conv11posdepfc1ELU_N%d_fe%d" % (N, Nfeat) 32 | self.N = N 33 | self.Nfeat = Nfeat 34 | def inference(self, feature_planes, N, Nfeat): 35 | NK = 256 36 | NKfirst = 256 37 | Nfc = 256 38 | conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1') 39 | conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2') 40 | conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3') 41 | conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4') 42 | conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5') 43 | conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6') 44 | conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7') 45 | conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8') 46 | conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9') 47 | conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10') 48 | conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11') 49 | conv11_flat = tf.reshape(conv11, [-1, NK*N*N]) 50 | fc = ELU_fully_connected_layer(conv11_flat, NK*N*N, Nfc) 51 | score = tf.tanh(linear_layer(fc, Nfc, 1)) 52 | return score 53 | 54 | class Linear: 55 | def __init__(self, N, Nfeat): 56 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/linear_N%d_fe%d" % (N, Nfeat) 57 | self.N = N 58 | self.Nfeat = Nfeat 59 | def inference(self, feature_planes, N, Nfeat): 60 | features_flat = tf.reshape(feature_planes, [-1, N*N*Nfeat]) 61 | weights = tf.Variable(tf.constant(0.0, shape=[N*N*Nfeat, 1]), name='weights') 62 | #weights = tf.constant(0.0, shape=[N*N*Nfeat, 1]) 63 | bias = tf.Variable(tf.constant(0.0, shape=[1])) 64 | out = tf.matmul(features_flat, weights) + bias 65 | #out = tf.matmul(features_flat, weights) 66 | score = tf.tanh(out) 67 | return score 68 | 69 | class Zero: 70 | def __init__(self, N, Nfeat): 71 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/zero_N%d_fe%d" % (N, Nfeat) 72 | def inference(self, feature_planes, N, Nfeat): 73 | dummy = tf.Variable(tf.constant(0.0, dtype=tf.float32), name='dummy') 74 | return dummy * tf.constant(0.0, dtype=tf.float32, shape=[128]) 75 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/EvalStats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | 5 | def do_game(sgf, correct, tries): 6 | reader = SGFReader(sgf) 7 | 8 | if reader.komi == None: 9 | print "skiping %s b/c there's no komi given" % sgf 10 | return 11 | komi = float(reader.komi) 12 | if not komi_allowed(komi): 13 | print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi) 14 | 15 | if reader.result == None: 16 | print "skipping %s because there's no result given" % sgf 17 | return 18 | elif "B+" in reader.result: 19 | winner = Color.Black 20 | elif "W+" in reader.result: 21 | winner = Color.White 22 | else: 23 | print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result) 24 | return 25 | 26 | turn_num = 0 27 | while True: 28 | feature_planes = feature_maker(reader.board, reader.next_play_color(), komi) 29 | final_score = +1 if reader.next_play_color() == winner else -1 30 | final_score_arr = np.array([final_score], dtype=np.int8) 31 | 32 | writer.push_example((feature_planes, final_score_arr)) 33 | if reader.has_more(): 34 | reader.play_next_move() 35 | else: 36 | break 37 | 38 | def do_stats_on_sgfs(sgfs): 39 | for sgf in sgfs: 40 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/EvalTraining.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import time 4 | import multiprocessing 5 | import random 6 | import Symmetry 7 | import NPZ 8 | import gc 9 | 10 | def apply_random_symmetries(many_feature_planes): 11 | N = many_feature_planes.shape[1] 12 | for i in range(many_feature_planes.shape[0]): 13 | s = random.randint(0, 7) 14 | #Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s) 15 | Symmetry.apply_symmetry_features_example(many_feature_planes, i, s) 16 | 17 | 18 | """ 19 | def build_feed_dict(loader, apply_normalization, feature_planes, final_scores): 20 | a = time.time() 21 | batch = loader.next_minibatch(('feature_planes', 'final_scores')) 22 | b = time.time() 23 | loaded_feature_planes = batch['feature_planes'].astype(np.float32) 24 | loaded_scores = batch['final_scores'].astype(np.float32) # BIT ME HARD. 25 | c = time.time() 26 | 27 | loaded_scores = np.ravel(loaded_scores) # flatten to 1D 28 | d = time.time() 29 | 30 | apply_normalization(loaded_feature_planes) 31 | e = time.time() 32 | 33 | #print "WARNING: NOT APPLYING SYMMETRIES!!!!!!!!!!!!!!!!" 34 | apply_random_symmetries(loaded_feature_planes) 35 | f = time.time() 36 | 37 | print "b-a = %f, c-b = %f, d-c = %f, e-d = %f, f-e = %f" % ((b-a,c-b,d-c,e-d,f-e)) 38 | 39 | #N = loaded_feature_planes.shape[1] 40 | 41 | #print "loaded_feature_planes =" 42 | #print loaded_feature_planes 43 | #print "loaded_scores =" 44 | #print loaded_scores 45 | 46 | return { feature_planes: loaded_feature_planes, 47 | final_scores: loaded_scores } 48 | """ 49 | 50 | def build_feed_dict_strings(loader, apply_normalization): 51 | a = time.time() 52 | batch = loader.next_minibatch(('feature_planes', 'final_scores')) 53 | b = time.time() 54 | loaded_feature_planes = batch['feature_planes'].astype(np.float32) 55 | loaded_scores = batch['final_scores'].astype(np.float32) # BIT ME HARD. 56 | loaded_scores = np.ravel(loaded_scores) # flatten to 1D 57 | c = time.time() 58 | apply_normalization(loaded_feature_planes) 59 | d = time.time() 60 | apply_random_symmetries(loaded_feature_planes) 61 | e = time.time() 62 | print "b-a=%f, c-b=%f, d-c=%f, e-d=%f" % (b-a, c-b, d-c, e-d) 63 | return { 'feature_planes': loaded_feature_planes, 64 | 'final_scores': loaded_scores } 65 | 66 | def dict_strings_to_ops(feed_dict_strings, feature_planes_ph, final_scores_ph): 67 | return { feature_planes_ph: feed_dict_strings['feature_planes'], 68 | final_scores_ph: feed_dict_strings['final_scores'] } 69 | 70 | def build_feed_dict(loader, apply_normalization, feature_planes_ph, final_scores_ph): 71 | return dict_strings_to_ops(build_feed_dict_strings(loader, apply_normalization), feature_planes_ph, final_scores_ph) 72 | 73 | def async_worker(q, npz_dir, minibatch_size, apply_normalization): 74 | print "Hello from EvalTraining async_worker process!!!" 75 | gc.set_debug(gc.DEBUG_STATS) 76 | loader = NPZ.RandomizingLoader(npz_dir, minibatch_size) 77 | names = ('feature_planes', 'final_scores') 78 | while True: 79 | feed_dict_strings = build_feed_dict_strings(loader, apply_normalization) 80 | q.put(feed_dict_strings, block=True) # will block if queue is full 81 | 82 | class AsyncRandomBatchQueue: 83 | def __init__(self, feature_planes_ph, final_scores_ph, npz_dir, minibatch_size, apply_normalization): 84 | self.q = multiprocessing.Queue(maxsize=5) 85 | self.process = multiprocessing.Process(target=async_worker, args=(self.q, npz_dir, minibatch_size, apply_normalization)) 86 | self.process.daemon = True 87 | self.process.start() 88 | self.feature_planes_ph = feature_planes_ph 89 | self.final_scores_ph = final_scores_ph 90 | def next_feed_dict(self): 91 | feed_dict_strings = self.q.get(block=True, timeout=30) 92 | feed_dict = dict_strings_to_ops(feed_dict_strings, self.feature_planes_ph, self.final_scores_ph) 93 | return feed_dict 94 | 95 | 96 | def loss_func(score_op): 97 | final_scores = tf.placeholder(tf.float32, shape=[None]) 98 | 99 | squared_errors = tf.square(tf.reshape(score_op, [-1]) - final_scores) 100 | #mean_sq_err = tf.reduce_mean(squared_errors, name='mean_sq_err') 101 | cross_entropy_ish_loss = tf.reduce_mean(-tf.log(tf.constant(1.0) - tf.constant(0.5) * tf.abs(tf.reshape(score_op, [-1]) - final_scores), name='cross-entropy-ish-loss')) 102 | 103 | correct_prediction = tf.equal(tf.sign(tf.reshape(score_op, [-1])), tf.sign(final_scores)) 104 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') 105 | #return final_scores, mean_sq_err, accuracy, squared_errors 106 | return final_scores, cross_entropy_ish_loss, accuracy 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Game.py: -------------------------------------------------------------------------------- 1 | from Board import * 2 | 3 | def str_from_vertex(x, y): 4 | return chr(ord('a')+x) + chr(ord('a'))+y 5 | 6 | class Game: 7 | def __init__(self, N, komi, rules): 8 | self.moves = [] 9 | self.N = N 10 | self.komi = komi 11 | self.rules = rules 12 | self.result = None 13 | 14 | def add_move(self, move): 15 | self.moves.append(move) 16 | 17 | def set_result(self, move): 18 | self.result = result 19 | 20 | def write_sgf(self, filename): 21 | assert self.result != None 22 | with open(filename, 'w') as f: 23 | f.write("(;GM[1]FF[4]") 24 | f.write("RU[%s]SZ[%d]KM[%0.2f]\n" % self.rules, self.N, self.komi) 25 | f.write("RE[%s]\n" % self.result) 26 | color = Color.Black 27 | for move in moves: 28 | if move.is_resign(): continue 29 | color_str = "B" if color == Color.Black else "W" 30 | vert_str = "" if move.is_pass() else str_from_vertex(move.x, move.y) 31 | f.write(";%s[%s]\n" % (color_str, vert_str)) 32 | color = flipped_color[color] 33 | f.write(")\n") 34 | 35 | 36 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/HelperEngine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import subprocess 3 | from GTP import * 4 | from Board import * 5 | 6 | # Using gnugo to determine when to pass and to play cleanup moves 7 | 8 | class HelperEngine: 9 | def __init__(self, level=10): 10 | command = ["gnugo", "--mode", "gtp", "--level", str(level), "--chinese-rules", "--positional-superko"] 11 | self.proc = subprocess.Popen(command, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # bufsize=1 is line buffered 12 | 13 | def send_command(self, command): 14 | print "HelperEngine: sending command \"%s\"" % command 15 | self.proc.stdin.write(command) 16 | self.proc.stdin.write('\n') 17 | 18 | response = "" 19 | while True: 20 | line = self.proc.stdout.readline() 21 | if line.startswith('='): 22 | response += line[2:] 23 | elif line.startswith('?'): 24 | print "HelperEngine: error response! line is \"%s\"" % line 25 | response += line[2:] 26 | elif len(line.strip()) == 0: 27 | # blank line ends response 28 | break 29 | else: 30 | response += line 31 | response = response.strip() 32 | print "HelperEngine: got response \"%s\"" % response 33 | return response 34 | 35 | def set_board_size(self, N): 36 | self.send_command("boardsize %d" % N) 37 | return True # could parse helper response 38 | 39 | def clear_board(self): 40 | self.send_command("clear_board") 41 | 42 | def set_komi(self, komi): 43 | self.send_command("komi %.2f" % komi) 44 | 45 | def player_passed(self, color): 46 | self.send_command("play %s pass" % color_names[color]) 47 | 48 | def stone_played(self, x, y, color): 49 | self.send_command("play %s %s" % (color_names[color], str_from_coords(x, y))) 50 | 51 | def set_level(self, level): 52 | self.send_command("level %d" % level) 53 | 54 | def generate_move(self, color, cleanup=False): 55 | cmd = "kgs-genmove_cleanup" if cleanup else "genmove" 56 | response = self.send_command("%s %s" % (cmd, color_names[color])) 57 | if 'pass' in response.lower(): 58 | return Move.Pass 59 | elif 'resign' in response.lower(): 60 | return Move.Resign 61 | else: 62 | x, y= coords_from_str(response) 63 | return Move(x, y) 64 | 65 | def undo(self): 66 | self.send_command('undo') 67 | 68 | def quit(self): 69 | pass 70 | 71 | def final_status_list(self, status): 72 | return self.send_command("final_status_list %s" % status) 73 | 74 | def final_score(self): 75 | return self.send_command("final_score") 76 | 77 | 78 | if __name__ == '__main__': 79 | helper = HelperEngine() 80 | 81 | helper.set_board_size(19) 82 | helper.clear_board() 83 | helper.set_komi(6.5) 84 | helper.stone_played(5, 5, Color.Black) 85 | move = helper.generate_move(Color.White) 86 | print "move =", move 87 | helper.undo() 88 | move = helper.pick_move(Color.White) 89 | print "move =", move 90 | 91 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/InfluenceEngine.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | from Engine import * 5 | from Board import * 6 | import Features 7 | import Symmetry 8 | import Checkpoint 9 | 10 | class InfluenceEngine(BaseEngine): 11 | def name(self): 12 | return "InfluenceEngine" 13 | 14 | def version(self): 15 | return "1.0" 16 | 17 | def __init__(self, model): 18 | BaseEngine.__init__(self) 19 | self.model = model 20 | with tf.Graph().as_default(): 21 | with tf.device('/cpu:0'): 22 | self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') 23 | self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) 24 | saver = tf.train.Saver(tf.trainable_variables()) 25 | init = tf.initialize_all_variables() 26 | self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 27 | self.sess.run(init) 28 | checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') 29 | Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir) 30 | 31 | def make_influence_map(self): 32 | if self.model.Nfeat == 15: 33 | board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(self.board, self.board.color_to_play) 34 | assert False, "for some reason I commented out the normalization???" 35 | #Normalization.apply_featurewise_normalization_B(board_feature_planes) 36 | else: 37 | assert False 38 | feature_batch = make_symmetry_batch(board_feature_planes) 39 | feed_dict = {self.feature_planes: feature_batch} 40 | logit_batch = self.sess.run(self.logits, feed_dict) 41 | move_logits = Symmetry.average_plane_over_symmetries(logit_batch, self.model.N) 42 | move_logits = move_logits.reshape((self.model.N, self.model.N)) 43 | influence_map = np.tanh(move_logits) 44 | if self.board.color_to_play == Color.White: 45 | influence_map *= -1 46 | #influence_map = -1 * np.ones((self.model.N, self.model.N), dtype=np.float32) 47 | return influence_map 48 | 49 | 50 | def pick_move(self, color): 51 | for i in xrange(10000): 52 | x = np.random.randint(0, self.board.N-1) 53 | y = np.random.randint(0, self.board.N-1) 54 | if self.board.play_is_legal(x, y, color): 55 | return Move(x,y) 56 | return Move.Pass() 57 | 58 | 59 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/InfluenceModels.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from Layers import * 3 | 4 | class Conv4PosDep: 5 | def __init__(self, N, Nfeat): 6 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/influence_conv4posdep_N%d_fe%d" % (N, Nfeat) 7 | self.N = N 8 | self.Nfeat = Nfeat 9 | def inference(self, feature_planes, N, Nfeat): 10 | NK = 16 11 | NKfirst = 16 12 | conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1') 13 | conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2') 14 | conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3') 15 | conv4 = conv_pos_dep_bias(conv3, 3, NK, 1, N, 'conv4') 16 | logits = tf.reshape(conv4, [-1, N*N]) 17 | return logits # use with sigmoid and sigmoid_cross_entropy_with_logits 18 | 19 | 20 | class Conv12PosDepELU: 21 | def __init__(self, N, Nfeat): 22 | self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/influence_conv12posdep_N%d_fe%d" % (N, Nfeat) 23 | self.N = N 24 | self.Nfeat = Nfeat 25 | def inference(self, feature_planes, N, Nfeat): 26 | NK = 192 27 | NKfirst = 192 28 | conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1') 29 | conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2') 30 | conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3') 31 | conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4') 32 | conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5') 33 | conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6') 34 | conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7') 35 | conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8') 36 | conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9') 37 | conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10') 38 | conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11') 39 | conv12 = conv_pos_dep_bias(conv11, 3, NK, 1, N, 'conv12') 40 | logits = tf.reshape(conv12, [-1, N*N]) 41 | return logits 42 | 43 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/InfluenceTraining.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import random 4 | import Symmetry 5 | 6 | def apply_random_symmetries(many_feature_planes, many_final_maps): 7 | for i in range(many_feature_planes.shape[0]): 8 | s = random.randint(0, 7) 9 | Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s) 10 | Symmetry.apply_symmetry_plane(many_final_maps[i,:,:], s) 11 | 12 | def build_feed_dict(loader, apply_normalization, feature_planes, final_maps): 13 | batch = loader.next_minibatch(('feature_planes', 'final_maps')) 14 | loaded_feature_planes = batch['feature_planes'].astype(np.float32) 15 | loaded_final_maps = batch['final_maps'].astype(np.float32) 16 | 17 | apply_normalization(loaded_feature_planes) 18 | 19 | apply_random_symmetries(loaded_feature_planes, loaded_final_maps) 20 | 21 | minibatch_size = loaded_feature_planes.shape[0] 22 | N = loaded_feature_planes.shape[1] 23 | return { feature_planes: loaded_feature_planes, 24 | final_maps: loaded_final_maps.reshape((minibatch_size, N*N)) } 25 | 26 | def loss_func(logits): 27 | final_maps = tf.placeholder(tf.float32, shape=[None, 361]) 28 | 29 | # final maps are originally -1 to 1. rescale them to 0 to 1 probabilities: 30 | final_prob_maps = final_maps * tf.constant(0.5) + tf.constant(0.5) 31 | cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=final_prob_maps) 32 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 33 | 34 | correct_prediction = tf.equal(tf.sign(logits), tf.sign(final_maps)) 35 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 36 | return final_maps, cross_entropy_mean, accuracy 37 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/KGSEngine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from Engine import * 3 | from HelperEngine import HelperEngine 4 | 5 | # forwards commands to both a main engine 6 | # and a helper engine. When picking a move, 7 | # we first ask the helper engine. If it passes, 8 | # we pass. Otherwise we ask the main engine 9 | class KGSEngine(BaseEngine): 10 | def __init__(self, engine): 11 | self.engine = engine 12 | self.helper = HelperEngine() 13 | 14 | # subclasses must override this 15 | def name(self): 16 | return self.engine.name() 17 | 18 | # subclasses must override this 19 | def version(self): 20 | return self.engine.version() 21 | 22 | def set_board_size(self, N): 23 | return self.engine.set_board_size(N) and \ 24 | self.helper.set_board_size(N) 25 | 26 | def clear_board(self): 27 | self.engine.clear_board() 28 | self.helper.clear_board() 29 | 30 | def set_komi(self, komi): 31 | self.engine.set_komi(komi) 32 | self.helper.set_komi(komi) 33 | 34 | def player_passed(self, color): 35 | self.engine.player_passed(color) 36 | self.helper.player_passed(color) 37 | 38 | def stone_played(self, x, y, color): 39 | self.engine.stone_played(x, y, color) 40 | self.helper.stone_played(x, y, color) 41 | 42 | def generate_move(self, color, cleanup=False): 43 | pass_checking_level = 5 44 | cleanup_level = 10 45 | self.helper.set_level(cleanup_level if cleanup else pass_checking_level) 46 | 47 | move = self.helper.generate_move(color, cleanup) 48 | if move.is_resign(): 49 | return Move.Resign 50 | elif move.is_pass() or cleanup: 51 | self.engine.move_was_played(move) 52 | return move 53 | else: 54 | self.helper.undo() 55 | 56 | move = self.engine.generate_move(color) 57 | if move.is_play(): 58 | self.helper.stone_played(move.x, move.y, color) 59 | elif move.is_pass(): 60 | self.helper.player_passed(color) 61 | return move 62 | 63 | def undo(self): 64 | self.engine.undo() 65 | self.helper.undo() 66 | 67 | def quit(self): 68 | self.engine.quit() 69 | self.helper.quit() 70 | 71 | def supports_final_status_list(self): 72 | return True 73 | 74 | def final_status_list(self, status): 75 | return self.helper.final_status_list(status) 76 | 77 | def get_last_move_probs(self): 78 | return self.engine.get_last_move_probs() 79 | 80 | def toggle_kibitz_mode(self): 81 | return self.engine.toggle_kibitz_mode() 82 | 83 | if __name__ == '__main__': 84 | import GTP 85 | fclient = GTP.redirect_all_output("log_engine.txt") 86 | 87 | from GTP import GTP 88 | from TFEngine import TFEngine 89 | import MoveModels 90 | from Book import PositionRecord 91 | from Book import MoveRecord 92 | 93 | engine = KGSEngine(TFEngine("conv12posdepELU", MoveModels.Conv12PosDepELU(N=19, Nfeat=21))) 94 | 95 | gtp = GTP(engine, fclient) 96 | gtp.loop() 97 | 98 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | 4 | def conv(inputs, diameter, Nin, Nout, name): 5 | fan_in = diameter * diameter * Nin 6 | #stddev = math.sqrt(2.0 / fan_in) 7 | print "WARNING: USING DIFFERENT STDDEV FOR CONV!" 8 | stddev = math.sqrt(1.0 / fan_in) 9 | kernel = tf.Variable(tf.truncated_normal([diameter, diameter, Nin, Nout], stddev=stddev), name=name+'_kernel') 10 | return tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME') 11 | 12 | def conv_uniform_bias(inputs, diameter, Nin, Nout, name): 13 | bias = tf.Variable(tf.constant(0.0, shape=[Nout]), name=name+'_bias') 14 | return conv(inputs, diameter, Nin, Nout, name) + bias 15 | 16 | def conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name): 17 | bias = tf.Variable(tf.constant(0.0, shape=[N, N, Nout]), name=name+'_bias') 18 | return conv(inputs, diameter, Nin, Nout, name) + bias 19 | 20 | def ReLU_conv_uniform_bias(inputs, diameter, Nin, Nout, name): 21 | return tf.nn.relu(conv_uniform_bias(inputs, diameter, Nin, Nout, name)) 22 | 23 | def ReLU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name): 24 | return tf.nn.relu(conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name)) 25 | 26 | def ELU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name): 27 | return tf.nn.elu(conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name)) 28 | 29 | def linear_layer(inputs, Nin, Nout): 30 | #stddev = math.sqrt(2.0 / Nin) 31 | print "WARNING: USING DIFFERENT STDDEV FOR LINEAR!" 32 | stddev = math.sqrt(1.0 / Nin) 33 | print "linear layer using stddev =", stddev 34 | weights = tf.Variable(tf.truncated_normal([Nin, Nout], stddev=0.1)) 35 | bias = tf.Variable(tf.constant(0.0, shape=[Nout])) 36 | out = tf.matmul(inputs, weights) + bias 37 | return out 38 | 39 | def ReLU_fully_connected_layer(inputs, Nin, Nout): 40 | return tf.nn.relu(linear_layer(inputs, Nin, Nout)) 41 | 42 | def ELU_fully_connected_layer(inputs, Nin, Nout): 43 | return tf.nn.elu(linear_layer(inputs, Nin, Nout)) 44 | 45 | 46 | def preReLU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name): 47 | return conv_pos_dep_bias(tf.nn.relu(inputs), diameter, Nin, Nout, N, name) 48 | 49 | def preELU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name): 50 | return conv_pos_dep_bias(tf.nn.elu(inputs), diameter, Nin, Nout, N, name) 51 | 52 | def residual_block_preReLU_2convs_pos_dep_bias(inputs, diameter, Nfeat, N, name): 53 | conv1 = preReLU_conv_pos_dep_bias(inputs, diameter, Nfeat, Nfeat, N, name + '_1') 54 | conv2 = preReLU_conv_pos_dep_bias(conv1, diameter, Nfeat, Nfeat, N, name + '_2') 55 | return inputs + conv2 56 | 57 | def residual_block_preELU_2convs_pos_dep_bias(inputs, diameter, Nfeat, N, name): 58 | conv1 = preELU_conv_pos_dep_bias(inputs, diameter, Nfeat, Nfeat, N, name + '_1') 59 | conv2 = preELU_conv_pos_dep_bias(conv1, diameter, Nfeat, Nfeat, N, name + '_2') 60 | return inputs + conv2 61 | 62 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/MakeEvalData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import os 4 | import os.path 5 | import random 6 | from Board import * 7 | from SGFReader import SGFReader 8 | import Features 9 | import NPZ 10 | 11 | 12 | def write_game_data(sgf, writer, feature_maker, rank_allowed, komi_allowed): 13 | reader = SGFReader(sgf) 14 | 15 | if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank): 16 | print "skipping %s b/c of disallowed rank. ranks are %s, %s" % (sgf, reader.black_rank, reader.white_rank) 17 | return 18 | 19 | if reader.komi == None: 20 | print "skiping %s b/c there's no komi given" % sgf 21 | return 22 | komi = float(reader.komi) 23 | if not komi_allowed(komi): 24 | print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi) 25 | 26 | if reader.result == None: 27 | print "skipping %s because there's no result given" % sgf 28 | return 29 | elif "B+" in reader.result: 30 | winner = Color.Black 31 | elif "W+" in reader.result: 32 | winner = Color.White 33 | else: 34 | print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result) 35 | return 36 | 37 | while True: 38 | feature_planes = feature_maker(reader.board, reader.next_play_color(), komi) 39 | final_score = +1 if reader.next_play_color() == winner else -1 40 | final_score_arr = np.array([final_score], dtype=np.int8) 41 | 42 | writer.push_example((feature_planes, final_score_arr)) 43 | if reader.has_more(): 44 | reader.play_next_move() 45 | else: 46 | break 47 | 48 | def make_KGS_eval_data(): 49 | N = 19 50 | Nfeat = 22 51 | feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi 52 | 53 | #for set_name in ['train', 'val', 'test']: 54 | print "WARNING: ONLY DOING VAL AND TEST SETS!" 55 | for set_name in ['val', 'test']: 56 | games_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/%s" % set_name 57 | out_dir = "/home/greg/coding/ML/go/NN/data/KGS/eval_examples/stones_4lib_4hist_ko_4cap_komi_Nf22/%s" % set_name 58 | 59 | writer = NPZ.RandomizingWriter(out_dir=out_dir, 60 | names=['feature_planes', 'final_scores'], 61 | shapes=[(N,N,Nfeat), (1,)], 62 | dtypes=[np.int8, np.int8], 63 | Nperfile=128, buffer_len=50000) 64 | 65 | rank_allowed = lambda rank: True 66 | 67 | komi_allowed = lambda komi: komi in [0.5, 5.5, 6.5, 7.5] 68 | 69 | sgfs = [] 70 | for sub_dir in os.listdir(games_dir): 71 | for fn in os.listdir(os.path.join(games_dir, sub_dir)): 72 | sgfs.append(os.path.join(games_dir, sub_dir, fn)) 73 | random.shuffle(sgfs) 74 | 75 | num_games = 0 76 | for sgf in sgfs: 77 | #print "making eval data from %s" % sgf 78 | write_game_data(sgf, writer, feature_maker, rank_allowed, komi_allowed) 79 | num_games += 1 80 | if num_games % 100 == 0: print "Finished %d games of %d" % (num_games, len(sgfs)) 81 | 82 | writer.drain() 83 | 84 | 85 | def komi_test(): 86 | games_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/train" 87 | sgfs = [] 88 | for sub_dir in os.listdir(games_dir): 89 | for fn in os.listdir(os.path.join(games_dir, sub_dir)): 90 | sgfs.append(os.path.join(games_dir, sub_dir, fn)) 91 | random.shuffle(sgfs) 92 | counts = {} 93 | num_games = 0 94 | for sgf in sgfs: 95 | reader = SGFReader(sgf) 96 | print "komi =", reader.komi 97 | if reader.komi in counts: 98 | counts[reader.komi] += 1 99 | else: 100 | counts[reader.komi] = 1 101 | num_games += 1 102 | if num_games % 100 == 0: 103 | print "counts:", counts 104 | 105 | 106 | 107 | if __name__ == '__main__': 108 | make_KGS_eval_data() 109 | #komi_test() 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/MakeInfluenceData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import os 4 | import os.path 5 | import random 6 | from Board import * 7 | from SGFReader import SGFReader 8 | #from MakeMoveData import show_plane 9 | import Features 10 | import NPZ 11 | 12 | def find_vertices_reached_by_color(board, color): 13 | visited = np.zeros((board.N, board.N), dtype=np.bool_) 14 | reached = np.zeros((board.N, board.N), dtype=np.int8) 15 | 16 | for x in xrange(board.N): 17 | for y in xrange(board.N): 18 | if not visited[x,y] and board[x,y] == color: 19 | q = [(x,y)] 20 | visited[x,y] = True 21 | reached[x,y] = 1 22 | while q: 23 | vert = q.pop() 24 | for adj in board.adj_vertices(vert): 25 | if not visited[adj] and (board[adj] == color or board[adj] == Color.Empty): 26 | q.append(adj) 27 | visited[adj] = True 28 | reached[adj] = True 29 | return reached 30 | 31 | 32 | 33 | 34 | def get_final_territory_map(sgf): 35 | reader = SGFReader(sgf) 36 | while reader.has_more(): 37 | reader.play_next_move() 38 | 39 | #reader.board.show() 40 | 41 | reached_by_black = find_vertices_reached_by_color(reader.board, Color.Black) 42 | reached_by_white = find_vertices_reached_by_color(reader.board, Color.White) 43 | 44 | #print "reached_by_black:" 45 | #show_plane(reached_by_black) 46 | #print "reached_by_white:" 47 | #show_plane(reached_by_white) 48 | 49 | territory_map = reached_by_black - reached_by_white 50 | #print "territory_map:\n", territory_map 51 | return territory_map 52 | 53 | 54 | def write_game_data(sgf, sgf_aftermath, writer, feature_maker, rank_allowed): 55 | final_map_black_positive = get_final_territory_map(sgf_aftermath) 56 | final_map_white_positive = (-1) * final_map_black_positive 57 | reader = SGFReader(sgf) 58 | 59 | if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank): 60 | #print "skipping game b/c of disallowed rank. ranks are %s, %s" % (reader.black_rank, reader.white_rank) 61 | return 62 | 63 | while True: 64 | feature_planes = feature_maker(reader.board, reader.next_play_color()) 65 | final_map = final_map_black_positive if reader.next_play_color() == Color.Black else final_map_white_positive 66 | writer.push_example((feature_planes, final_map)) 67 | if reader.has_more(): 68 | reader.play_next_move() 69 | else: 70 | break 71 | 72 | def make_KGS_influence_data(): 73 | N = 19 74 | Nfeat = 15 75 | feature_maker = Features.make_feature_planes_stones_3liberties_4history_ko 76 | 77 | for set_name in ['train', 'val', 'test']: 78 | games_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/games/played_out/%s" % set_name 79 | aftermath_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/games/playouts" 80 | out_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/examples/stones_3lib_4hist_ko_Nf15/%s" % set_name 81 | 82 | writer = NPZ.RandomizingWriter(out_dir=out_dir, 83 | names=['feature_planes', 'final_maps'], 84 | shapes=[(N,N,Nfeat), (N,N)], 85 | dtypes=[np.int8, np.int8], 86 | Nperfile=128, buffer_len=50000) 87 | 88 | rank_allowed = lambda rank: rank in ['1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d', 89 | '1p', '2p', '3p', '4p', '5p', '6p', '7p', '8p', '9p', '10p'] 90 | 91 | game_fns = os.listdir(games_dir) 92 | random.shuffle(game_fns) 93 | num_games = 0 94 | for fn in game_fns: 95 | print "making influence data from %s" % fn 96 | sgf = os.path.join(games_dir, fn) 97 | sgf_aftermath = os.path.join(aftermath_dir, 'played_out_' + fn) 98 | assert os.path.isfile(sgf_aftermath) 99 | 100 | write_game_data(sgf, sgf_aftermath, writer, feature_maker, rank_allowed) 101 | 102 | num_games += 1 103 | if num_games % 100 == 0: print "Finished %d games of %d" % (num_games, len(game_fns)) 104 | 105 | writer.drain() 106 | 107 | 108 | 109 | if __name__ == '__main__': 110 | #get_final_territory_map("/home/greg/coding/ML/go/NN/data/KGS/influence/games/playouts/played_out_2001-05-01-2.sgf") 111 | #make_KGS_influence_data() 112 | import cProfile 113 | cProfile.run('make_KGS_influence_data()', sort='cumtime') 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/MakeMoveData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import sys 4 | import os 5 | import os.path 6 | import time 7 | import random 8 | from SGFReader import SGFReader 9 | from Board import * 10 | import Features 11 | import NPZ 12 | 13 | def make_move_arr(x, y): 14 | return np.array([x,y], dtype=np.int8) 15 | 16 | def show_plane(array): 17 | assert len(array.shape) == 2 18 | N = array.shape[0] 19 | print "=" * N 20 | for y in xrange(N): 21 | for x in xrange(N): 22 | sys.stdout.write('1' if array[x,y]==1 else '0') 23 | sys.stdout.write('\n') 24 | print "=" * array.shape[1] 25 | 26 | def show_all_planes(array): 27 | assert len(array.shape) == 3 28 | for i in xrange(array.shape[2]): 29 | print "PLANE %d:" % i 30 | show_plane(array[:,:,i]) 31 | 32 | def show_feature_planes_and_move(feature_planes, move): 33 | print "FEATURE PLANES:" 34 | show_all_planes(feature_planes) 35 | print "MOVE:" 36 | print move 37 | 38 | def show_batch(all_feature_planes, all_moves): 39 | batch_size = all_feature_planes.shape[0] 40 | print "MINIBATCH OF SIZE", batch_size 41 | for i in xrange(batch_size): 42 | print "EXAMPLE", i 43 | show_feature_planes_and_move(all_feature_planes[i,:,:,:], all_moves[i,:]) 44 | 45 | def test_feature_planes(): 46 | board = Board(5) 47 | moves = [(0,0), (1,1), (2,2), (3,3), (4,4)] 48 | play_color = Color.Black 49 | for x,y in moves: 50 | board.show() 51 | feature_planes = make_feature_planes(board, play_color) 52 | move_arr = make_move_arr(x, y) 53 | show_feature_planes_and_move(feature_planes, move_arr) 54 | print 55 | board.play_stone(x, y, play_color) 56 | play_color = flipped_color[play_color] 57 | 58 | def write_game_data(sgf, writer, feature_maker, rank_allowed): 59 | reader = SGFReader(sgf) 60 | 61 | color_is_good = { Color.Black: rank_allowed(reader.black_rank), 62 | Color.White: rank_allowed(reader.white_rank) } 63 | if (not color_is_good[Color.White]) and (not color_is_good[Color.Black]): 64 | print "skipping game b/c of disallowed rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank) 65 | return 66 | elif not color_is_good[Color.White]: 67 | print "ignoring white because of rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank) 68 | elif not color_is_good[Color.Black]: 69 | print "ignoring black because of rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank) 70 | 71 | try: 72 | while reader.has_more(): 73 | vertex, color = reader.peek_next_move() 74 | if vertex and color_is_good[color]: # if not pass, and if player is good enough 75 | x, y = vertex 76 | if reader.board.play_is_legal(x, y, color): 77 | feature_planes = feature_maker(reader.board, color) 78 | move_arr = make_move_arr(x, y) 79 | writer.push_example((feature_planes, move_arr)) 80 | else: 81 | raise IllegalMoveException("playing a %s stone at (%d,%d) is illegal" % (color_names[color], x, y)) 82 | reader.play_next_move() 83 | except IllegalMoveException, e: 84 | print "Aborting b/c of illegal move!" 85 | print "sgf causing exception is %s" % sgf 86 | print e 87 | exit(-1) 88 | 89 | def make_move_prediction_data(sgf_list, N, Nfeat, out_dir, feature_maker, rank_allowed): 90 | sgf_list = list(sgf_list) # make local copy to permute 91 | random.shuffle(sgf_list) 92 | 93 | writer = NPZ.RandomizingWriter(out_dir=out_dir, 94 | names=['feature_planes', 'moves'], 95 | shapes=[(N,N,Nfeat), (2,)], 96 | dtypes=[np.int8, np.int8], 97 | Nperfile=128, buffer_len=50000) 98 | 99 | num_games = 0 100 | for sgf in sgf_list: 101 | print "processing %s" % sgf 102 | write_game_data(sgf, writer, feature_maker, rank_allowed) 103 | num_games += 1 104 | if num_games % 100 == 0: print "num_games =", num_games 105 | writer.drain() 106 | 107 | def make_KGS_move_data(): 108 | N = 19 109 | Nfeat = 21 110 | feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures 111 | rank_allowed = lambda rank: rank in ['6d', '7d', '8d', '9d', '10d', '11d', 112 | '1p', '2p', '3p', '4p', '5p', '6p', '7p', '8p', '9p', '10p', '11p'] 113 | 114 | for set_name in ['train', 'val', 'test']: 115 | base_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/%s" % set_name 116 | sgfs = [os.path.join(base_dir, sub_dir, fn) for sub_dir in os.listdir(base_dir) for fn in os.listdir(os.path.join(base_dir, sub_dir))] 117 | out_dir = "/home/greg/coding/ML/go/NN/data/KGS/move_examples/6dan_stones_4lib_4hist_ko_4cap_Nf21/%s" % set_name 118 | 119 | make_move_prediction_data(sgfs, N, Nfeat, out_dir, feature_maker, rank_allowed) 120 | 121 | def make_GoGoD_move_data(): 122 | N = 19 123 | Nfeat = 21 124 | feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures 125 | rank_allowed = lambda rank: rank in [ '1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d', '11d' ] 126 | 127 | for set_name in ['train', 'val', 'test']: 128 | with open('/home/greg/coding/ML/go/NN/data/GoGoD/bad_sgfs/bad_sgfs.txt', 'r') as f: 129 | excluded_sgfs = [line.strip() for line in f.readlines()] 130 | print "excluded_sgfs =\n", excluded_sgfs 131 | base_dir = "/home/greg/coding/ML/go/NN/data/GoGoD/sets/%s" % set_name 132 | sgfs = [os.path.join(base_dir, sub_dir, fn) for sub_dir in os.listdir(base_dir) for fn in os.listdir(os.path.join(base_dir, sub_dir)) if fn not in excluded_sgfs] 133 | out_dir = "/home/greg/coding/ML/go/NN/data/GoGoD/move_examples/stones_4lib_4hist_ko_4cap_Nf21/%s" % set_name 134 | make_move_prediction_data(sgfs, N, Nfeat, out_dir, feature_maker, rank_allowed) 135 | 136 | 137 | 138 | if __name__ == "__main__": 139 | #test_feature_planes() 140 | #test_minibatch_read_write() 141 | #test_TrainingDataWrite() 142 | #run_PlaneTester() 143 | 144 | #make_KGS_move_data() 145 | make_GoGoD_move_data() 146 | #make_CGOS9x9_training_data() 147 | 148 | #import cProfile 149 | #cProfile.run('make_KGS_training_data()', sort='cumtime') 150 | 151 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/MirrorEngine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | from Engine import * 4 | from Board import * 5 | 6 | class MirrorEngine(BaseEngine): 7 | def __init__(self): 8 | super(BaseEngine,self).__init__() 9 | self.last_opponent_play = None 10 | 11 | def name(self): 12 | return "MirrorEngine" 13 | 14 | def version(self): 15 | return "1.0" 16 | 17 | def stone_played(self, x, y, color): 18 | super(MirrorEngine, self).stone_played(x, y, color) 19 | self.last_opponent_play = (x,y) 20 | 21 | def pick_move(self, color): 22 | if not self.opponent_passed and self.last_opponent_play: 23 | mirror_x = self.board.N - self.last_opponent_play[0] - 1 24 | mirror_y = self.board.N - self.last_opponent_play[1] - 1 25 | if self.board.play_is_legal(mirror_x, mirror_y, color): 26 | return (mirror_x, mirror_y) 27 | 28 | enemy_stones = (self.board.vertices == flipped_color[color]) 29 | our_stones = (self.board.vertices == color) 30 | rot_enemy_stones = np.rot90(enemy_stones, 2) 31 | 32 | play_vertices = np.logical_and(rot_enemy_stones, np.logical_not(our_stones)) 33 | play_vertices = np.logical_and(play_vertices, np.logical_not(enemy_stones)) 34 | 35 | for x in xrange(self.board.N): 36 | for y in xrange(self.board.N): 37 | if play_vertices[x,y] and self.board.play_is_legal(x, y, color): 38 | return (x,y) 39 | 40 | center = (self.board.N/2, self.board.N/2) 41 | if self.board[center] == Color.Empty and self.board.play_is_legal(center[0], center[1], color): 42 | return center 43 | 44 | return None 45 | 46 | 47 | if __name__ == '__main__': 48 | import sys 49 | import os 50 | from GTP import GTP 51 | 52 | # Redirect stuff that would normally go to stdout 53 | # and stderr to a file. 54 | fclient = sys.stdout 55 | logfile = "log_mirror.txt" 56 | sys.stdout = sys.stderr = open(logfile, 'w', 0) # 0 = unbuffered 57 | 58 | engine = MirrorEngine() 59 | gtp = GTP(engine, fclient) 60 | gtp.loop() 61 | 62 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/MoveTraining.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import random 4 | import Symmetry 5 | 6 | def apply_random_symmetries(many_feature_planes, many_move_arrs): 7 | N = many_feature_planes.shape[1] 8 | for i in range(many_feature_planes.shape[0]): 9 | s = random.randint(0, 7) 10 | Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s) 11 | Symmetry.apply_symmetry_vertex(many_move_arrs[i,:], N, s) 12 | 13 | 14 | def build_feed_dict(loader, apply_normalization, feature_planes, move_indices): 15 | batch = loader.next_minibatch(('feature_planes', 'moves') ) 16 | loaded_feature_planes = batch['feature_planes'].astype(np.float32) 17 | loaded_move_arrs = batch['moves'].astype(np.int32) # BIT ME HARD. 18 | 19 | apply_normalization(loaded_feature_planes) 20 | 21 | apply_random_symmetries(loaded_feature_planes, loaded_move_arrs) 22 | 23 | N = loaded_feature_planes.shape[1] 24 | loaded_move_indices = N * loaded_move_arrs[:,0] + loaded_move_arrs[:,1] 25 | 26 | return { feature_planes: loaded_feature_planes.astype(np.float32), 27 | move_indices: loaded_move_indices } 28 | 29 | def loss_func(logits): 30 | move_indices = tf.placeholder(tf.int64, shape=[None]) 31 | 32 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, move_indices) 33 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 34 | 35 | correct_prediction = tf.equal(tf.argmax(logits,1), move_indices) 36 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 37 | return move_indices, cross_entropy_mean, accuracy 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/OnlineExampleMaker.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class QueueWriter: 4 | def __init__(self, batch_queue, names, shapes, dtypes, minibatch_size, buffer_len): 5 | assert buffer_len >= Nperfile 6 | assert len(names) == len(shapes) == len(dtypes) 7 | self.batch_queue = batch_queue 8 | self.names = names 9 | self.shapes = shapes 10 | self.dtypes = dtypes 11 | self.minibatch_size = minibatch_size 12 | self.buffer_len = buffer_len 13 | self.examples = [] 14 | 15 | def push_example(self, example): 16 | assert len(example) == len(self.names) 17 | for i in xrange(len(example)): 18 | assert example[i].dtype == self.dtypes[i] 19 | self.examples.append(example) 20 | if len(self.examples) >= self.buffer_len: 21 | self.write_minibatch_to_queue() 22 | 23 | def write_minibatch_to_queue(self): 24 | assert len(self.examples) >= self.minibatch_size 25 | 26 | # put minibatch_size random examples at the end of the list 27 | for i in xrange(self.minibatch_size): 28 | a = len(self.examples) - i - 1 29 | if a > 0: 30 | b = random.randint(0, a-1) 31 | self.examples[a], self.examples[b] = self.examples[b], self.examples[a] 32 | 33 | # pop minibatch_size examples off the end of the list 34 | # put each component into a separate numpy batch array 35 | save_dict = {} 36 | for c,name in enumerate(self.names): 37 | batch_shape = (self.minibatch_size,) + self.shapes[c] 38 | batch = np.empty(batch_shape, dtype=self.dtypes[c]) 39 | for i in xrange(self.Nperfile): 40 | batch[i,:] = self.examples[-1-i][c] 41 | save_dict[name] = batch 42 | 43 | del self.examples[-self.Nperfile:] 44 | 45 | self.batch_queue.put(batch, block=True) 46 | 47 | 48 | 49 | def make_game_data_eval(sgf, writer, feature_maker, apply_normalization, rank_allowed, komi_allowed): 50 | reader = SGFReader(sgf) 51 | 52 | if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank): 53 | print "skipping %s b/c of disallowed rank. ranks are %s, %s" % (sgf, reader.black_rank, reader.white_rank) 54 | return 55 | 56 | if reader.komi == None: 57 | print "skiping %s b/c there's no komi given" % sgf 58 | return 59 | komi = float(reader.komi) 60 | if not komi_allowed(komi): 61 | print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi) 62 | 63 | if reader.result == None: 64 | print "skipping %s because there's no result given" % sgf 65 | return 66 | elif "B+" in reader.result: 67 | winner = Color.Black 68 | elif "W+" in reader.result: 69 | winner = Color.White 70 | else: 71 | print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result) 72 | return 73 | 74 | while True: 75 | feature_planes = feature_maker(reader.board, reader.next_play_color(), komi) 76 | final_score = +1 if reader.next_play_color() == winner else -1 77 | final_score_arr = np.array([final_score], dtype=np.int8) 78 | 79 | feature_planes_normalized = feature_plane.astype(np.float32) 80 | apply_normalization(feature_planes_normalized) 81 | 82 | assert False, "need to add random symmetries and maybe other stuff" 83 | 84 | writer.push_example((feature_planes_normalized, final_score_arr)) 85 | if reader.has_more(): 86 | reader.play_next_move() 87 | else: 88 | break 89 | 90 | 91 | def async_worker_eval(self, batch_queue, sgfs, make_game_data): 92 | writer = QueueWriter(batch_queue=batch_queue, 93 | names=['feature_planes', 'final_scores'], 94 | shapes=[(N,N,Nfeat), (1,)], 95 | dtypes=[np.int8, np.int8], 96 | minibatch_size=128, buffer_len=50000) 97 | while True: 98 | random.shuffle(sgfs) 99 | for sgf in sgfs: 100 | make_game_data(sgf, writer) 101 | 102 | 103 | 104 | 105 | class OnlineExampleQueue: 106 | def __init__(self, sgfs, make_example): 107 | base_dir = '/home/greg/coding/ML/go/NN/data/4dKGS/SGFs/train' 108 | sgfs = [] 109 | for sub_dir in os.listdir(base_dir): 110 | for fn in os.listdir(os.path.join(base_dir, sub_dir)): 111 | sgfs.append(os.path.join(base_dir, sub_dir, fn)) 112 | 113 | self.q = multiprocessing.Queue(maxsize=5) 114 | 115 | make_game_data = functools.partial(make_game_data_eval( 116 | feature_maker=Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi, 117 | apply_normalization=Normalization.apply_featurewise_normalization_D, 118 | rank_allowed=lambda rank: rank in ['1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d'], 119 | komi_allowed=lambda komi: komi in [0.5, 5.5, 6.5, 7.5]) 120 | 121 | self.process = multiprocessing.Process(target=async_worker_eval, args=(self.q, sgfs, make_game_data)) 122 | self.process.daemon = True 123 | self.process.start() 124 | 125 | def next_feed_dict(self, feature_planes_ph, final_scores_ph): 126 | feed_dict_strings = self.q.get(block=True, timeout=5) 127 | return { feature_planes_ph: feed_dict_strings['feature_planes'], 128 | final_scores_ph: feed_dict_strings['final_scores'] } 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Policy.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os.path 4 | import Checkpoint 5 | import Features 6 | import Normalization 7 | 8 | class AllPolicy: 9 | def suggest_moves(self, board): 10 | moves = [] 11 | for x in xrange(board.N): 12 | for y in xrange(board.N): 13 | if board.play_is_legal(x, y, board.color_to_play): 14 | moves.append((x,y)) 15 | return moves 16 | 17 | def softmax(E, temp): 18 | #print "E =\n", E 19 | expE = np.exp(temp * (E - max(E))) # subtract max to avoid overflow 20 | return expE / np.sum(expE) 21 | 22 | class TFPolicy: 23 | def __init__(self, model, threshold_prob, softmax_temp): 24 | self.model = model 25 | self.threshold_prob = threshold_prob 26 | self.softmax_temp = softmax_temp 27 | 28 | # build the graph 29 | with tf.Graph().as_default(): 30 | with tf.device('/cpu:0'): 31 | self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') 32 | self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) 33 | saver = tf.train.Saver(tf.trainable_variables()) 34 | init = tf.initialize_all_variables() 35 | self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 36 | self.sess.run(init) 37 | checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') 38 | Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir) 39 | 40 | def suggest_moves(self, board): 41 | board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(board, board.color_to_play).astype(np.float32) 42 | Normalization.apply_featurewise_normalization_C(board_feature_planes) 43 | feed_dict = {self.feature_planes: board_feature_planes.reshape(1,self.model.N,self.model.N,self.model.Nfeat)} 44 | move_logits = self.sess.run(self.logits, feed_dict).ravel() # ravel flattens to 1D 45 | # zero out illegal moves 46 | for x in xrange(self.model.N): 47 | for y in xrange(self.model.N): 48 | ind = self.model.N * x + y 49 | if not board.play_is_legal(x, y, board.color_to_play): 50 | move_logits[ind] = -1e99 51 | move_probs = softmax(move_logits, self.softmax_temp) 52 | sum_probs = np.sum(move_probs) 53 | if sum_probs == 0: return [] # no legal moves 54 | move_probs /= sum_probs # re-normalize probabilities 55 | 56 | good_moves = [] 57 | cum_prob = 0.0 58 | while cum_prob < self.threshold_prob: 59 | ind = np.argmax(move_probs) 60 | x,y = ind / self.model.N, ind % self.model.N 61 | good_moves.append((x,y)) 62 | prob = move_probs[ind] 63 | cum_prob += prob 64 | move_probs[ind] = 0 65 | 66 | return good_moves 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/SGFReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Board import * 4 | 5 | READING_NAME = 1 6 | READING_DATA = 2 7 | 8 | separators = set(['(', ')', ' ', '\n', '\r', '\t', ';']) 9 | 10 | properties_taking_lists = set(['AB', # add black stone (handicap) 11 | 'AW', # add white stone (handicap) 12 | ]) 13 | 14 | def parse_property_name(file_data, ptr): 15 | while file_data[ptr] in separators: 16 | ptr += 1 17 | if ptr >= len(file_data): return (None, ptr) 18 | name = '' 19 | while file_data[ptr] != '[': 20 | name += file_data[ptr] 21 | ptr += 1 22 | return (name, ptr) 23 | 24 | def parse_property_data(file_data, ptr): 25 | while file_data[ptr].isspace(): 26 | ptr += 1 27 | if file_data[ptr] != '[': 28 | return (None, ptr) 29 | ptr += 1 30 | data = '' 31 | while file_data[ptr] != ']': 32 | data += file_data[ptr] 33 | ptr += 1 34 | ptr += 1 35 | return (data, ptr) 36 | 37 | def parse_property_data_list(file_data, ptr): 38 | data_list = [] 39 | while True: 40 | (data, ptr) = parse_property_data(file_data, ptr) 41 | if data == None: 42 | return (data_list, ptr) 43 | else: 44 | data_list.append(data) 45 | 46 | def parse_vertex(s): 47 | if len(s) == 0: 48 | return None # pass 49 | if s == "tt": # GoGoD sometimes uses this to indicate a pass 50 | return None # We are sacrificing >19x19 support here 51 | x = ord(s[0]) - ord('a') 52 | y = ord(s[1]) - ord('a') 53 | return (x,y) 54 | 55 | class SGFParser: 56 | def __init__(self, filename): 57 | with open(filename, 'r') as f: 58 | self.file_data = f.read() 59 | self.ptr = 0 60 | 61 | def __iter__(self): 62 | return self 63 | 64 | def next(self): 65 | (property_name, self.ptr) = parse_property_name(self.file_data, self.ptr) 66 | if property_name == None: 67 | raise StopIteration 68 | elif property_name in properties_taking_lists: 69 | (property_data, self.ptr) = parse_property_data_list(self.file_data, self.ptr) 70 | else: 71 | (property_data, self.ptr) = parse_property_data(self.file_data, self.ptr) 72 | return (property_name, property_data) 73 | 74 | 75 | def test_SGFParser(): 76 | #sgf = "../data/KGS/SGFs/KGS2001/2000-10-10-1.sgf" 77 | sgf = "/home/greg/coding/ML/go/NN/data/GoGoD/modern_games/2007/2007-08-21g.sgf" 78 | parser = SGFParser(sgf) 79 | for property_name, property_data in parser: 80 | print "\"%s\" = \"%s\"" % (property_name, property_data) 81 | 82 | 83 | class SGFReader: 84 | def __init__(self, filename): 85 | self.filename = filename 86 | parser = SGFParser(filename) 87 | self.initial_stones = [] 88 | self.moves = [] 89 | self.black_rank = None 90 | self.white_rank = None 91 | self.result = None 92 | self.board = None 93 | self.komi = None 94 | for property_name, property_data in parser: 95 | if property_name == "SZ": # board size 96 | self.board = Board(int(property_data)) 97 | elif property_name == "AB": # black initial stones 98 | for vertex_str in property_data: 99 | self.initial_stones.append((parse_vertex(vertex_str), Color.Black)) 100 | elif property_name == "AW": # white initial stones 101 | for vertex_str in property_data: 102 | self.initial_stones.append((parse_vertex(vertex_str), Color.White)) 103 | elif property_name == "B": # black plays 104 | self.moves.append((parse_vertex(property_data), Color.Black)) 105 | elif property_name == "W": # white plays 106 | self.moves.append((parse_vertex(property_data), Color.White)) 107 | elif property_name == "BR": # black rank 108 | self.black_rank = property_data 109 | elif property_name == "WR": # white rank 110 | self.white_rank = property_data 111 | elif property_name == "RE": # result 112 | self.result = property_data 113 | elif property_name == "KM": # komi 114 | self.komi = property_data 115 | 116 | if not self.board: 117 | self.board = Board(19) # assume 19x19 if we didn't see a size 118 | 119 | for (x,y), color in self.initial_stones: 120 | self.board.play_stone(x, y, color) 121 | 122 | self.moves_played = 0 123 | 124 | def has_more(self): 125 | return self.moves_played < len(self.moves) 126 | 127 | def peek_next_move(self): 128 | return self.moves[self.moves_played] 129 | 130 | def play_next_move(self): 131 | move = self.moves[self.moves_played] 132 | self.moves_played += 1 133 | vertex, color = move 134 | if vertex: 135 | x,y = vertex 136 | self.board.play_stone(x, y, color) 137 | else: 138 | self.board.play_pass() 139 | return move 140 | 141 | def next_play_color(self): 142 | if self.has_more(): 143 | return self.moves[self.moves_played][1] 144 | elif self.moves: 145 | return flipped_color[self.moves[-1][1]] 146 | elif self.initial_stones: 147 | return flipped_color[self.initial_stones[-1][1]] 148 | else: 149 | return Color.Black 150 | 151 | 152 | def test_SGFReader(): 153 | #sgf = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/kgs-19-2008-02-new/2008-02-09-18.sgf" 154 | sgf = "/home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-07-01c.sgf" 155 | reader = SGFReader(sgf) 156 | 157 | print "initial position:" 158 | reader.board.show() 159 | 160 | while reader.has_more(): 161 | print "before move, next play color is", color_names[reader.next_play_color()] 162 | vertex, color = reader.play_next_move() 163 | print "after move", vertex, "by", color_names[color], "board is" 164 | reader.board.show() 165 | print "after move, next play color is", color_names[reader.next_play_color()] 166 | 167 | print "Game over." 168 | 169 | if __name__ == "__main__": 170 | #test_SGFParser() 171 | test_SGFReader() 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/SelfPlay.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Self play games as used by DeepMind to train AlphaGo's value network. Play a 4 | # policy against itself, but insert single random move somewhere in the game. 5 | # Use the position immediately after the random move together with the final 6 | # game result as a single training example for the value network. 7 | 8 | def run_self_play_game_with_random_move(engine, N=19, komi=7.5): 9 | board = Board(N) 10 | 11 | engine.clear_board() 12 | engine.set_board_size(N) 13 | engine.set_komi(komi) 14 | 15 | random_policy = RandomPolicy() 16 | 17 | example_feature_planes = None 18 | example_color_to_play = None 19 | 20 | random_move_num = np.randint(0, 450) 21 | print "random_move_num = ", random_move_num 22 | move_num = 0 23 | consecutive_passes = 0 24 | result = None 25 | while consecutive_passes < 2: 26 | if move_num == random_move_num: 27 | move = random_policy.pick_move(board) 28 | board.play_move(move) 29 | engine.move_was_played(move) 30 | example_color_to_play = board.color_to_play 31 | print "chose random move (%d,%d) for %s on move #%d" % (move.x, move.y, color_names[example_color_to_play], move_num) 32 | example_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(board, example_color_to_play) 33 | else: 34 | move = engine.generate_move(board) 35 | if move.is_resign(): 36 | result = "B+Resign" if board.color_to_play == Color.Black else "W+Resign" 37 | break 38 | elif move.is_pass(): 39 | consecutive_passes += 1 40 | else: 41 | consecutive_passes = 0 42 | board.play_move(move) 43 | move_num += 1 44 | 45 | if result == None: 46 | result = engine.final_score() 47 | 48 | print "self play game finished. result is", result 49 | 50 | if example_feature_planes != None: 51 | winner = Color.Black if "B+" in result else Color.White 52 | example_outcome = +1 if winner == example_color_to_play else -1 53 | print "produced example with example_outcome = %d" % example_outcome 54 | return (example_feature_planes, example_outcome) 55 | else: 56 | print "game didn't go long enough: no example produced." 57 | return None 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/Symmetry.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | # in place, hopefully 5 | def apply_symmetry_features_example(many_planes, i, s): 6 | assert len(many_planes.shape) == 4 7 | if (s & 1) != 0: # flip x 8 | many_planes[i,:,:,:] = many_planes[i,::-1,:,:] 9 | if (s & 2) != 0: # flip y 10 | many_planes[i,:,:,:] = many_planes[i,:,::-1,:] 11 | if (s & 4) != 0: # swap x and y 12 | many_planes[i,:,:,:] = many_planes[i,:,:,:].transpose(1, 0, 2) 13 | 14 | 15 | def apply_symmetry_planes(planes, s): 16 | assert len(planes.shape) == 3 17 | if (s & 1) != 0: # flip x 18 | np.copyto(planes, planes[::-1,:,:]) 19 | if (s & 2) != 0: # flip y 20 | np.copyto(planes, planes[:,::-1,:]) 21 | if (s & 4) != 0: # swap x and y 22 | np.copyto(planes, np.transpose(planes[:,:,:], (1,0,2))) 23 | 24 | def apply_symmetry_plane(plane, s): 25 | assert len(plane.shape) == 2 26 | if (s & 1) != 0: # flip x 27 | np.copyto(plane, plane[::-1,:]) 28 | if (s & 2) != 0: # flip y 29 | np.copyto(plane, plane[:,::-1]) 30 | if (s & 4) != 0: # swap x and y 31 | np.copyto(plane, np.transpose(plane[:,:], (1,0))) 32 | 33 | def invert_symmetry_plane(plane, s): 34 | assert len(plane.shape) == 2 35 | # note reverse order of 4,2,1 36 | if (s & 4) != 0: # swap x and y 37 | np.copyto(plane, np.transpose(plane[:,:], (1,0))) 38 | if (s & 2) != 0: # flip y 39 | np.copyto(plane, plane[:,::-1]) 40 | if (s & 1) != 0: # flip x 41 | np.copyto(plane, plane[::-1,:]) 42 | 43 | def apply_symmetry_vertex(vertex, N, s): 44 | assert vertex.size == 2 45 | if (s & 1) != 0: # flip x 46 | vertex[0] = N - vertex[0] - 1 47 | if (s & 2) != 0: # flip y 48 | vertex[1] = N - vertex[1] - 1 49 | if (s & 4) != 0: # swap x and y 50 | np.copyto(vertex, vertex[::-1]) 51 | assert 0 <= vertex[0] < N 52 | assert 0 <= vertex[1] < N 53 | 54 | def get_symmetry_vertex_tuple(vertex, N, s): 55 | x,y = vertex 56 | if (s & 1) != 0: # flip x 57 | x = N - x - 1 58 | if (s & 2) != 0: # flip y 59 | y = N - y - 1 60 | if (s & 4) != 0: # swap x and y 61 | x,y = y,x 62 | assert 0 <= x < N 63 | assert 0 <= y < N 64 | return (x,y) 65 | 66 | def get_inverse_symmetry_vertex_tuple(vertex, N, s): 67 | x,y = vertex 68 | # note reverse order of 4,2,1 69 | if (s & 4) != 0: # swap x and y 70 | x,y = y,x 71 | if (s & 2) != 0: # flip y 72 | y = N - y - 1 73 | if (s & 1) != 0: # flip x 74 | x = N - x - 1 75 | assert 0 <= x < N 76 | assert 0 <= y < N 77 | return (x,y) 78 | 79 | def make_symmetry_batch(features): 80 | assert len(features.shape) == 3 81 | N = features.shape[0] 82 | Nfeat = features.shape[2] 83 | feature_batch = np.empty((8, N, N, Nfeat), dtype=features.dtype) 84 | for s in xrange(8): 85 | feature_batch[s,:,:,:] = features 86 | apply_symmetry_planes(feature_batch[s,:,:,:], s) 87 | return feature_batch 88 | 89 | def average_plane_over_symmetries(planes, N): 90 | assert planes.shape == (8, N*N) 91 | planes = planes.reshape((8, N, N)) 92 | for s in xrange(8): 93 | invert_symmetry_plane(planes[s,:,:], s) 94 | mean_plane = planes.mean(axis=0) 95 | mean_plane = mean_plane.reshape((N*N,)) 96 | return mean_plane 97 | 98 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/TFEngine.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import random 4 | import os 5 | from Engine import * 6 | import Book 7 | import Features 8 | import Normalization 9 | import Symmetry 10 | import Checkpoint 11 | from GTP import Move, true_stderr 12 | from Board import * 13 | 14 | def softmax(E, temp): 15 | #print "E =\n", E 16 | expE = np.exp(temp * (E - max(E))) # subtract max to avoid overflow 17 | return expE / np.sum(expE) 18 | 19 | def sample_from(probs): 20 | cumsum = np.cumsum(probs) 21 | r = random.random() 22 | for i in xrange(len(probs)): 23 | if r <= cumsum[i]: 24 | return i 25 | assert False, "problem with sample_from" 26 | 27 | 28 | class TFEngine(BaseEngine): 29 | def __init__(self, eng_name, model): 30 | super(TFEngine,self).__init__() 31 | self.eng_name = eng_name 32 | self.model = model 33 | self.book = Book.load_GoGoD_book() 34 | 35 | self.last_move_probs = np.zeros((self.model.N, self.model.N,)) 36 | self.kibitz_mode = False 37 | 38 | # build the graph 39 | with tf.Graph().as_default(): 40 | with tf.device('/cpu:0'): 41 | self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') 42 | self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) 43 | saver = tf.train.Saver(tf.trainable_variables()) 44 | init = tf.initialize_all_variables() 45 | self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 46 | self.sess.run(init) 47 | checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') 48 | Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir) 49 | 50 | 51 | def name(self): 52 | return self.eng_name 53 | 54 | def version(self): 55 | return "1.0" 56 | 57 | def set_board_size(self, N): 58 | if N != self.model.N: 59 | return False 60 | return BaseEngine.set_board_size(self, N) 61 | 62 | def pick_book_move(self, color): 63 | if self.book: 64 | book_move = Book.get_book_move(self.board, self.book) 65 | if book_move: 66 | print "playing book move", book_move 67 | return Move(book_move[0], book_move[1]) 68 | print "no book move" 69 | else: 70 | print "no book" 71 | return None 72 | 73 | def pick_model_move(self, color): 74 | if self.model.Nfeat == 15: 75 | board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(self.board, color) 76 | Normalization.apply_featurewise_normalization_B(board_feature_planes) 77 | elif self.model.Nfeat == 21: 78 | board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, color).astype(np.float32) 79 | Normalization.apply_featurewise_normalization_C(board_feature_planes) 80 | else: 81 | assert False 82 | feature_batch = Symmetry.make_symmetry_batch(board_feature_planes) 83 | 84 | feed_dict = {self.feature_planes: feature_batch} 85 | 86 | logit_batch = self.sess.run(self.logits, feed_dict) 87 | move_logits = Symmetry.average_plane_over_symmetries(logit_batch, self.model.N) 88 | softmax_temp = 1.0 89 | move_probs = softmax(move_logits, softmax_temp) 90 | 91 | # zero out illegal moves 92 | for x in xrange(self.model.N): 93 | for y in xrange(self.model.N): 94 | ind = self.model.N * x + y 95 | if not self.board.play_is_legal(x, y, color): 96 | move_probs[ind] = 0 97 | sum_probs = np.sum(move_probs) 98 | if sum_probs == 0: return Move.Pass() # no legal moves, pass 99 | move_probs /= sum_probs # re-normalize probabilities 100 | 101 | pick_best = True 102 | if pick_best: 103 | move_ind = np.argmax(move_probs) 104 | else: 105 | move_ind = sample_from(move_probs) 106 | move_x = move_ind / self.model.N 107 | move_y = move_ind % self.model.N 108 | 109 | self.last_move_probs = move_probs.reshape((self.board.N, self.board.N)) 110 | 111 | return Move(move_x, move_y) 112 | 113 | def pick_move(self, color): 114 | book_move = self.pick_book_move(color) 115 | if book_move: 116 | if self.kibitz_mode: # in kibitz mode compute model probabilities anyway 117 | self.pick_model_move(color) # ignore the model move 118 | return book_move 119 | return self.pick_model_move(color) 120 | 121 | def get_last_move_probs(self): 122 | return self.last_move_probs 123 | 124 | def stone_played(self, x, y, color): 125 | # if we are in kibitz mode, we want to compute model probabilities for ALL turns 126 | if self.kibitz_mode: 127 | self.pick_model_move(color) 128 | true_stderr.write("probability of played move %s (%d, %d) was %.2f%%\n" % (color_names[color], x, y, 100*self.last_move_probs[x,y])) 129 | 130 | BaseEngine.stone_played(self, x, y, color) 131 | 132 | def toggle_kibitz_mode(self): 133 | self.kibitz_mode = ~self.kibitz_mode 134 | return self.kibitz_mode 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/TreeSearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import copy 3 | from Engine import BaseEngine 4 | from GTP import Move 5 | 6 | # want policy network to influence evaluation???? 7 | # could modify score by policy probability, possibly in a depth-dependent way 8 | 9 | def get_board_after_move(board, move): 10 | ret = copy.deepcopy(board) 11 | ret.play_stone(move[0], move[1], board.color_to_play) 12 | return ret 13 | 14 | def minimax_eval(board, policy, value, depth): 15 | if depth == 0: 16 | score = value.evaluate(board) 17 | print " "*(3-depth), "leaf node, score =", score 18 | return score 19 | 20 | moves = policy.suggest_moves(board) 21 | assert len(moves) > 0 22 | best_score = -99 23 | for move in moves: 24 | next_board = get_board_after_move(board, move) 25 | print " "*(3-depth), "trying move", move 26 | score = -1 * minimax_eval(next_board, policy, value, depth-1) 27 | print " "*(3-depth), "move", move, "has score", score 28 | if score > best_score: 29 | best_score = score 30 | return best_score 31 | 32 | def choose_move_minimax(board, policy, value, depth): 33 | assert depth > 0 34 | 35 | moves = policy.suggest_moves(board) 36 | best_score = -99 37 | best_move = None 38 | for move in moves: 39 | next_board = get_board_after_move(board, move) 40 | print "minimax root node: trying (%d,%d)..." % (move[0], move[1]) 41 | score = -1 * minimax_eval(next_board, policy, value, depth-1) 42 | print "minimax root node: (%d,%d) gives score %f" % (move[0], move[1], score) 43 | if score > best_score: 44 | best_score, best_move = score, move 45 | return best_move 46 | 47 | 48 | # Return value of position if it's between lower and upper. 49 | # If it's <= lower, return lower; if it's >= upper return upper. 50 | def alphabeta_eval(board, policy, value, lower, upper, depth): 51 | if depth == 0: 52 | score = value.evaluate(board) 53 | print " "*(3-depth), "leaf node, score =", score 54 | return score 55 | 56 | moves = policy.suggest_moves(board) 57 | assert len(moves) > 0 58 | for move in moves: 59 | next_board = get_board_after_move(board, move) 60 | print " "*(3-depth), "trying move", move 61 | score = -1 * alphabeta_eval(next_board, policy, value, -upper, -lower, depth-1) 62 | print " "*(3-depth), "move", move, "has score", score 63 | if score >= upper: 64 | print " "*(3-depth), "fail high!" 65 | return upper 66 | if score > lower: 67 | lower = score 68 | return lower 69 | 70 | def choose_move_alphabeta(board, policy, value, depth): 71 | assert depth > 0 72 | 73 | moves = policy.suggest_moves(board) 74 | lower = -1 75 | upper = +1 76 | best_move = None 77 | for move in moves: 78 | next_board = get_board_after_move(board, move) 79 | print "alpha-beta root node: trying (%d,%d)..." % (move[0], move[1]) 80 | score = -1 * alphabeta_eval(next_board, policy, value, -upper, -lower, depth-1) 81 | print "alpha-beta root node: (%d,%d) gives score %f" % (move[0], move[1], score) 82 | if score > lower: 83 | lower, best_move = score, move 84 | return best_move 85 | 86 | 87 | 88 | class TreeSearchEngine(BaseEngine): 89 | def __init__(self, policy, value): 90 | self.policy = policy 91 | self.value = value 92 | def name(self): 93 | return "TreeSearch" 94 | def version(self): 95 | return "1.0" 96 | def pick_move(self, color): 97 | x,y = choose_move_alphabeta(self.board, self.policy, self.value, depth=3) 98 | return Move(x,y) 99 | def get_position_eval(self): 100 | return self.value.evaluate(self.board) 101 | 102 | if __name__ == '__main__': 103 | import GTP 104 | fclient = GTP.redirect_all_output("log_engine.txt") 105 | 106 | import Policy 107 | import MoveModels 108 | import Eval 109 | import EvalModels 110 | 111 | #policy = Policy.AllPolicy() 112 | policy = Policy.TFPolicy(model=MoveModels.Conv12PosDepELU(N=19, Nfeat=21), threshold_prob=0.8, softmax_temp=1.0) 113 | value = Eval.TFEval(EvalModels.Conv11PosDepFC1ELU(N=19, Nfeat=21)) 114 | 115 | engine = TreeSearchEngine(policy, value) 116 | 117 | gtp = GTP.GTP(engine, fclient) 118 | gtp.loop() 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /showcase-studies/go-NN/engine/games_with_illegal_moves_sorted.txt: -------------------------------------------------------------------------------- 1 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1800-49/1802-01-22a.sgf 2 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1970-75/1971-06-05a.sgf 3 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1976-79/1976-09-08a.sgf 4 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1980/1980-10-08a.sgf 5 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1982/1982-09-29b.sgf 6 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1983/1983-08-09b.sgf 7 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1985/1985-11-07a.sgf 8 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1988/1988-01-12b.sgf 9 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1988/1988-10-27a.sgf 10 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1991/1991-07-10b.sgf 11 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1992/1992-06-11a.sgf 12 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1992/1992-12-00c.sgf 13 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1993/1993-06-08b.sgf 14 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1993/1993-06-10d.sgf 15 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1994/1994-06-30f.sgf 16 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-03-12g.sgf 17 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-05-15c.sgf 18 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-06-08j.sgf 19 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1996/1996-07-31b.sgf 20 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1997/1997-03-18o.sgf 21 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1997/1997-12-04a.sgf 22 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1998/1998-05-11b.sgf 23 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1999/1999-04-10h.sgf 24 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-04-05aa.sgf 25 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-04-23a.sgf 26 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-05-10aa.sgf 27 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2002/2002-04-25q.sgf 28 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2004/2004-05-10f.sgf 29 | -------------------------------------------------------------------------------- /showcase-studies/samuel_checkers/README.md: -------------------------------------------------------------------------------- 1 | Checkers AI: a look at Arthur Samuel's ideas 2 | ===================================== 3 | 4 | Usage 5 | --- 6 | Download project. Navigate to directory. Do `python game.py`, and type in `arthur` when prompted for agent module. 7 | 8 | (Note: to adjust how long the computer player "thinks" about its next move, you can vary the default depth parameter of the look ahead search. Go into `arthur.py` and change `depth=x` parameter of the function `move_function`.) 9 | 10 | Files 11 | --- 12 | 13 | 14 | `checkers.py` 15 | 16 | This file contains the definition of the CheckerBoard class. Its methods include new game 17 | initialization, ASCII printed output, and getting legal moves from a given state. 18 | 19 | `agent.py` 20 | 21 | This file contains the implementation of the AI CheckersAgent class. All that is required 22 | of a CheckersAgent is that it have a function move_function() that takes in a game state and 23 | returns a legal move. 24 | 25 | `arthur.py` 26 | 27 | This file contains the implementation of an agent that is inspired by Arthur Samuel's 28 | historic machine learning checkers program. 29 | 30 | `game.py` 31 | 32 | This file contains the harness for running an actual game of checkers. 33 | 34 | > Written with [StackEdit](https://stackedit.io/). 35 | -------------------------------------------------------------------------------- /showcase-studies/samuel_checkers/agent.py: -------------------------------------------------------------------------------- 1 | # Andrew Edwards -- almostimplemented.com 2 | # ======================================= 3 | # A checkers agent class. 4 | # 5 | # Last updated: July 21, 2014 6 | 7 | 8 | class CheckersAgent: 9 | def __init__(self, move_function): 10 | self.move_function = move_function 11 | 12 | def make_move(self, board): 13 | return self.move_function(board) 14 | -------------------------------------------------------------------------------- /showcase-studies/samuel_checkers/random_agent.py: -------------------------------------------------------------------------------- 1 | # Andrew Edwards -- almostimplemented.com 2 | # ======================================= 3 | # A checkers agent that picks a random move 4 | # 5 | # Last updated: July 21, 2014 6 | import random 7 | 8 | def move_function(board): 9 | return random.choice(board.get_moves()) 10 | -------------------------------------------------------------------------------- /showcase-studies/samuel_checkers/test.py: -------------------------------------------------------------------------------- 1 | import checkers 2 | import agent 3 | import arthur 4 | import random_agent 5 | 6 | BLACK, WHITE = 0, 1 7 | 8 | f = open('logfile', 'w') 9 | 10 | for i in range(100): 11 | print "game: " + str(i) 12 | B = checkers.CheckerBoard() 13 | cpu_1 = agent.CheckersAgent(lambda board: arthur.move_function(board, 4)) 14 | cpu_2 = agent.CheckersAgent(lambda board: arthur.move_function(board, 6)) 15 | current_player = B.active 16 | turn = 1 17 | while not B.is_over(): 18 | f.write(str(B)) 19 | if turn % 100 == 0: 20 | print "# of turns: " + str(turn) 21 | B.make_move(cpu_1.make_move(B)) 22 | if B.active == current_player: 23 | continue 24 | current_player = B.active 25 | turn += 1 26 | while not B.is_over() and B.active == current_player: 27 | B.make_move(cpu_2.make_move(B)) 28 | current_player = B.active 29 | if B.active == WHITE: 30 | print "Congrats Black, you win!" 31 | else: 32 | print "Congrats White, you win!" 33 | print "Game took %i turns" % turn 34 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env/ 3 | logs/ 4 | summaries/ 5 | models/ 6 | *.pyc 7 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/Makefile: -------------------------------------------------------------------------------- 1 | train: 2 | python main.py 3 | 4 | play: 5 | python main.py --play --restore -------------------------------------------------------------------------------- /showcase-studies/td-gammon/README.md: -------------------------------------------------------------------------------- 1 | # TD-Gammon 2 | 3 | Originally from https://github.com/fomorians/td-gammon 4 | 5 | Implementation of [TD-Gammon](http://www.bkgm.com/articles/tesauro/tdl.html) in TensorFlow. 6 | 7 | Before DeepMind tackled playing Atari games or built AlphaGo there was TD-Gammon, the first algorithm to reach an expert level of play in backgammon. Gerald Tesauro published his paper in 1992 describing TD-Gammon as a neural network trained with reinforcement learning. It is referenced in both Atari and AlphaGo research papers and helped set the groundwork for many of the advancements made in the last few years. 8 | 9 | The code features [eligibility traces](https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node87.html#fig:GDTDl) on the gradients which are an elegant way to assign credit to actions made in the past. 10 | 11 | ## Training 12 | 13 | 1. [Install TensorFlow](https://www.tensorflow.org/versions/r0.7/get_started/os_setup.html#pip-installation). 14 | 2. Clone the repo: `git clone https://github.com/fomorians/td-gammon.git && cd td-gammon` 15 | 3. Run training: `python main.py` 16 | 17 | ## Play 18 | 19 | To play against a trained model: `python main.py --play --restore` 20 | 21 | ## Things to try 22 | 23 | - Compare with and without eligibility traces by replacing the trace with the unmodified gradient. 24 | - Try different activation functions on the hidden layer. 25 | - Expand the board representation. Currently it uses the "compact" representation from the paper. A full board representation should remove some ambiguity between board states. 26 | - Increase the number of turns the agent will look at before making a move. The paper used a 2-ply and 3-ply search while this implementation only uses 1-ply. 27 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/README.md: -------------------------------------------------------------------------------- 1 | Backgammon 2 | === 3 | 4 | This is a fork of https://github.com/awni/backgammon. 5 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/backgammon/agents/__init__.py -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/agents/human_agent.py: -------------------------------------------------------------------------------- 1 | from ..game import Game 2 | 3 | class HumanAgent(object): 4 | def __init__(self, player): 5 | self.player = player 6 | self.name = 'Human' 7 | 8 | def get_action(self, moves, game=None): 9 | if not moves: 10 | input("No moves for you...(hit enter)") 11 | return None 12 | 13 | while True: 14 | while True: 15 | mv1 = input('Please enter a move "," ("%s" for on the board, "%s" for off the board): ' % (Game.ON, Game.OFF)) 16 | mv1 = self.get_formatted_move(mv1) 17 | if not mv1: 18 | print('Bad format enter e.g. "3,4"') 19 | else: 20 | break 21 | 22 | while True: 23 | mv2 = input('Please enter a second move (enter to skip): ') 24 | if mv2 == '': 25 | mv2 = None 26 | break 27 | mv2 = self.get_formatted_move(mv2) 28 | if not mv2: 29 | print('Bad format enter e.g. "3,4"') 30 | else: 31 | break 32 | 33 | if mv2: 34 | move = (mv1, mv2) 35 | else: 36 | move = (mv1,) 37 | 38 | if move in moves: 39 | return move 40 | elif move[::-1] in moves: 41 | move = move[::-1] 42 | return move 43 | else: 44 | print("You can't play that move") 45 | 46 | return None 47 | 48 | def get_formatted_move(self, move): 49 | try: 50 | start, end = move.split(",") 51 | if start == Game.ON: 52 | return (start, int(end)) 53 | if end == Game.OFF: 54 | return (int(start), end) 55 | return (int(start), int(end)) 56 | except: 57 | return False 58 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class RandomAgent(object): 4 | 5 | def __init__(self, player): 6 | self.player = player 7 | self.name = 'Random' 8 | 9 | def get_action(self, moves, game=None): 10 | return random.choice(list(moves)) if moves else None 11 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/backgammon/agents/td_gammon_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class TDAgent(object): 4 | 5 | def __init__(self, player, model): 6 | self.player = player 7 | self.model = model 8 | self.name = 'TD-Gammon' 9 | 10 | def get_action(self, actions, game): 11 | """ 12 | Return best action according to self.evaluationFunction, 13 | with no lookahead. 14 | """ 15 | v_best = 0 16 | a_best = None 17 | 18 | for a in actions: 19 | ateList = game.take_action(a, self.player) 20 | features = game.extract_features(game.opponent(self.player)) 21 | v = self.model.get_output(features) 22 | v = 1. - v if self.player == game.players[0] else v 23 | if v > v_best: 24 | v_best = v 25 | a_best = a 26 | game.undo_action(a, self.player, ateList) 27 | 28 | return a_best 29 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "checkpoint-178" 2 | all_model_checkpoint_paths: "checkpoint-178" 3 | -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-178: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-178 -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-178.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-178.meta -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-447880: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-447880 -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-447880.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-447880.meta -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-7894: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-7894 -------------------------------------------------------------------------------- /showcase-studies/td-gammon/checkpoints/checkpoint-7894.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-7894.meta -------------------------------------------------------------------------------- /showcase-studies/td-gammon/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | 4 | from model import Model 5 | 6 | flags = tf.app.flags 7 | FLAGS = flags.FLAGS 8 | 9 | flags.DEFINE_boolean('test', False, 'If true, test against a random strategy.') 10 | flags.DEFINE_boolean('play', False, 'If true, play against a trained TD-Gammon strategy.') 11 | flags.DEFINE_boolean('restore', False, 'If true, restore a checkpoint before training.') 12 | 13 | model_path = os.environ.get('MODEL_PATH', 'models/') 14 | summary_path = os.environ.get('SUMMARY_PATH', 'summaries/') 15 | checkpoint_path = os.environ.get('CHECKPOINT_PATH', 'checkpoints/') 16 | 17 | if not os.path.exists(model_path): 18 | os.makedirs(model_path) 19 | 20 | if not os.path.exists(checkpoint_path): 21 | os.makedirs(checkpoint_path) 22 | 23 | if not os.path.exists(summary_path): 24 | os.makedirs(summary_path) 25 | 26 | if __name__ == '__main__': 27 | graph = tf.Graph() 28 | sess = tf.Session(graph=graph) 29 | with sess.as_default(), graph.as_default(): 30 | model = Model(sess, model_path, summary_path, checkpoint_path, restore=FLAGS.restore) 31 | if FLAGS.test: 32 | model.test(episodes=1000) 33 | elif FLAGS.play: 34 | model.play() 35 | else: 36 | model.train() 37 | -------------------------------------------------------------------------------- /slides/CNNs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/slides/CNNs.pdf -------------------------------------------------------------------------------- /slides/tensorflow_intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/slides/tensorflow_intro.pdf -------------------------------------------------------------------------------- /tf_examples/convnet_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A Convolutional Network implementation example using TensorFlow library. 3 | This example is using the MNIST database of handwritten digits 4 | (http://yann.lecun.com/exdb/mnist/) 5 | Author: Aymeric Damien 6 | Project: https://github.com/aymericdamien/TensorFlow-Examples/ 7 | ''' 8 | 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | 13 | # Import MNIST data 14 | from tensorflow.examples.tutorials.mnist import input_data 15 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) 16 | 17 | # Parameters 18 | learning_rate = 0.001 19 | training_iters = 200000 20 | batch_size = 128 21 | display_step = 10 22 | 23 | # Network Parameters 24 | n_input = 784 # MNIST data input (img shape: 28*28) 25 | n_classes = 10 # MNIST total classes (0-9 digits) 26 | dropout = 0.75 # Dropout, probability to keep units 27 | 28 | # tf Graph input 29 | x = tf.placeholder(tf.float32, [None, n_input]) 30 | y = tf.placeholder(tf.float32, [None, n_classes]) 31 | keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) 32 | 33 | 34 | # Create some wrappers for simplicity 35 | def conv2d(x, W, b, strides=1): 36 | # Conv2D wrapper, with bias and relu activation 37 | x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 38 | x = tf.nn.bias_add(x, b) 39 | return tf.nn.relu(x) 40 | 41 | 42 | def maxpool2d(x, k=2): 43 | # MaxPool2D wrapper 44 | return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 45 | padding='SAME') 46 | 47 | 48 | # Create model 49 | def conv_net(x, weights, biases, dropout): 50 | # Reshape input picture 51 | x = tf.reshape(x, shape=[-1, 28, 28, 1]) 52 | 53 | # Convolution Layer 54 | conv1 = conv2d(x, weights['wc1'], biases['bc1']) 55 | # Max Pooling (down-sampling) 56 | conv1 = maxpool2d(conv1, k=2) 57 | 58 | # Convolution Layer 59 | conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) 60 | # Max Pooling (down-sampling) 61 | conv2 = maxpool2d(conv2, k=2) 62 | 63 | # Fully connected layer 64 | # Reshape conv2 output to fit fully connected layer input 65 | fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) 66 | fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) 67 | fc1 = tf.nn.relu(fc1) 68 | # Apply Dropout 69 | fc1 = tf.nn.dropout(fc1, dropout) 70 | 71 | # Output, class prediction 72 | out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) 73 | return out 74 | 75 | # Store layers weight & bias 76 | weights = { 77 | # 5x5 conv, 1 input, 32 outputs 78 | 'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])), 79 | # 5x5 conv, 32 inputs, 64 outputs 80 | 'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 81 | # fully connected, 7*7*64 inputs, 1024 outputs 82 | 'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])), 83 | # 1024 inputs, 10 outputs (class prediction) 84 | 'out': tf.Variable(tf.random_normal([1024, n_classes])) 85 | } 86 | 87 | biases = { 88 | 'bc1': tf.Variable(tf.random_normal([32])), 89 | 'bc2': tf.Variable(tf.random_normal([64])), 90 | 'bd1': tf.Variable(tf.random_normal([1024])), 91 | 'out': tf.Variable(tf.random_normal([n_classes])) 92 | } 93 | 94 | # Construct model 95 | pred = conv_net(x, weights, biases, keep_prob) 96 | 97 | # Define loss and optimizer 98 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) 99 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 100 | 101 | # Evaluate model 102 | correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) 103 | accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) 104 | 105 | # Initializing the variables 106 | init = tf.initialize_all_variables() 107 | 108 | # Launch the graph 109 | with tf.Session() as sess: 110 | sess.run(init) 111 | step = 1 112 | # Keep training until reach max iterations 113 | while step * batch_size < training_iters: 114 | batch_x, batch_y = mnist.train.next_batch(batch_size) 115 | # Run optimization op (backprop) 116 | sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, 117 | keep_prob: dropout}) 118 | if step % display_step == 0: 119 | # Calculate batch loss and accuracy 120 | loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, 121 | y: batch_y, 122 | keep_prob: 1.}) 123 | print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \ 124 | "{:.6f}".format(loss) + ", Training Accuracy= " + \ 125 | "{:.5f}".format(acc)) 126 | step += 1 127 | print("Optimization Finished!") 128 | 129 | # Calculate accuracy for 256 mnist test images 130 | print("Testing Accuracy:", \ 131 | sess.run(accuracy, feed_dict={x: mnist.test.images[:256], 132 | y: mnist.test.labels[:256], 133 | keep_prob: 1.})) -------------------------------------------------------------------------------- /tf_examples/linear_regression.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A linear regression learning algorithm example using TensorFlow library. 3 | Author: Aymeric Damien 4 | Project: https://github.com/aymericdamien/TensorFlow-Examples/ 5 | ''' 6 | 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | import numpy 11 | import matplotlib.pyplot as plt 12 | rng = numpy.random 13 | 14 | # Parameters 15 | learning_rate = 0.01 16 | training_epochs = 1000 17 | display_step = 50 18 | 19 | # Training Data 20 | train_X = numpy.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167, 21 | 7.042,10.791,5.313,7.997,5.654,9.27,3.1]) 22 | train_Y = numpy.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221, 23 | 2.827,3.465,1.65,2.904,2.42,2.94,1.3]) 24 | n_samples = train_X.shape[0] 25 | 26 | # tf Graph Input 27 | X = tf.placeholder("float") 28 | Y = tf.placeholder("float") 29 | 30 | # Set model weights 31 | W = tf.Variable(rng.randn(), name="weight") 32 | b = tf.Variable(rng.randn(), name="bias") 33 | 34 | # Construct a linear model 35 | pred = tf.add(tf.mul(X, W), b) 36 | 37 | # Mean squared error 38 | cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples) 39 | # Gradient descent 40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) 41 | 42 | # Initializing the variables 43 | init = tf.initialize_all_variables() 44 | 45 | # Launch the graph 46 | with tf.Session() as sess: 47 | sess.run(init) 48 | 49 | # Fit all training data 50 | for epoch in range(training_epochs): 51 | for (x, y) in zip(train_X, train_Y): 52 | sess.run(optimizer, feed_dict={X: x, Y: y}) 53 | 54 | # Display logs per epoch step 55 | if (epoch+1) % display_step == 0: 56 | c = sess.run(cost, feed_dict={X: train_X, Y:train_Y}) 57 | print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \ 58 | "W=", sess.run(W), "b=", sess.run(b)) 59 | 60 | print("Optimization Finished!") 61 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 62 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') 63 | 64 | # Graphic display 65 | plt.plot(train_X, train_Y, 'ro', label='Original data') 66 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') 67 | plt.legend() 68 | plt.show() 69 | 70 | # Testing example, as requested (Issue #2) 71 | test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1]) 72 | test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03]) 73 | 74 | print("Testing... (Mean square loss Comparison)") 75 | testing_cost = sess.run( 76 | tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]), 77 | feed_dict={X: test_X, Y: test_Y}) # same function as cost above 78 | print("Testing cost=", testing_cost) 79 | print("Absolute mean square loss difference:", abs( 80 | training_cost - testing_cost)) 81 | 82 | plt.plot(test_X, test_Y, 'bo', label='Testing data') 83 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') 84 | plt.legend() 85 | plt.show() -------------------------------------------------------------------------------- /tf_examples/mlp_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A Multilayer Perceptron implementation example using TensorFlow library. 3 | This example is using the MNIST database of handwritten digits 4 | (http://yann.lecun.com/exdb/mnist/) 5 | Author: Aymeric Damien 6 | Project: https://github.com/aymericdamien/TensorFlow-Examples/ 7 | ''' 8 | 9 | from __future__ import print_function 10 | 11 | # Import MNIST data 12 | from tensorflow.examples.tutorials.mnist import input_data 13 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) 14 | 15 | import tensorflow as tf 16 | 17 | # Parameters 18 | learning_rate = 0.001 19 | training_epochs = 15 20 | batch_size = 100 21 | display_step = 1 22 | 23 | # Network Parameters 24 | n_hidden_1 = 256 # 1st layer number of features 25 | n_hidden_2 = 256 # 2nd layer number of features 26 | n_input = 784 # MNIST data input (img shape: 28*28) 27 | n_classes = 10 # MNIST total classes (0-9 digits) 28 | 29 | # tf Graph input 30 | x = tf.placeholder("float", [None, n_input]) 31 | y = tf.placeholder("float", [None, n_classes]) 32 | 33 | 34 | # Create model 35 | def multilayer_perceptron(x, weights, biases): 36 | # Hidden layer with RELU activation 37 | layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 38 | layer_1 = tf.nn.relu(layer_1) 39 | # Hidden layer with RELU activation 40 | layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) 41 | layer_2 = tf.nn.relu(layer_2) 42 | # Output layer with linear activation 43 | out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] 44 | return out_layer 45 | 46 | # Store layers weight & bias 47 | weights = { 48 | 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), 49 | 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 50 | 'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes])) 51 | } 52 | biases = { 53 | 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 54 | 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 55 | 'out': tf.Variable(tf.random_normal([n_classes])) 56 | } 57 | 58 | # Construct model 59 | pred = multilayer_perceptron(x, weights, biases) 60 | 61 | # Define loss and optimizer 62 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) 63 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 64 | 65 | # Initializing the variables 66 | init = tf.initialize_all_variables() 67 | 68 | # Launch the graph 69 | with tf.Session() as sess: 70 | sess.run(init) 71 | 72 | # Training cycle 73 | for epoch in range(training_epochs): 74 | avg_cost = 0. 75 | total_batch = int(mnist.train.num_examples/batch_size) 76 | # Loop over all batches 77 | for i in range(total_batch): 78 | batch_x, batch_y = mnist.train.next_batch(batch_size) 79 | # Run optimization op (backprop) and cost op (to get loss value) 80 | _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, 81 | y: batch_y}) 82 | # Compute average loss 83 | avg_cost += c / total_batch 84 | # Display logs per epoch step 85 | if epoch % display_step == 0: 86 | print("Epoch:", '%04d' % (epoch+1), "cost=", \ 87 | "{:.9f}".format(avg_cost)) 88 | print("Optimization Finished!") 89 | 90 | # Test model 91 | correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) 92 | # Calculate accuracy 93 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) 94 | print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) -------------------------------------------------------------------------------- /theory/MC/MCTS_(English).svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/MC/MCTS_(English).svg.png -------------------------------------------------------------------------------- /theory/alfa-beta/Selection_087.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/Selection_087.png -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img001.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img001.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img002.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img002.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img003.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img003.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img004.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img004.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img005.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img005.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img006.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img006.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img007.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img007.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img008.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img008.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img009.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img009.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img010.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img010.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img011.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img011.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img012.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img012.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img013.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img013.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img014.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img014.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img015.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img015.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img016.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img016.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img017.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img017.gif -------------------------------------------------------------------------------- /theory/alfa-beta/tree/img018.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img018.gif -------------------------------------------------------------------------------- /theory/mini-max/Selection_086.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/mini-max/Selection_086.png --------------------------------------------------------------------------------