├── .gitignore
├── LICENSE
├── README.md
├── feedback
    └── README.md
├── helpers
    └── logging_colorer.py
├── papers
    └── recent_deepmind_papers
    │   ├── 1-s2.0-S2352154615001151-main.pdf
    │   ├── 1507.04296.pdf
    │   ├── 1509.02971v2.pdf
    │   ├── 1509.06461v3.pdf
    │   ├── 1509.08731v1.pdf
    │   ├── 1510.09142v1.pdf
    │   ├── 1511.06581v3.pdf
    │   ├── 1512.04860v1.pdf
    │   ├── 1602.01783v2.pdf
    │   ├── 1603.00748v1.pdf
    │   ├── 1605.06676v2.pdf
    │   ├── 1606.02647v2.pdf
    │   ├── 1606.05312.pdf
    │   ├── 1610.00633.pdf
    │   ├── 4031-monte-carlo-planning-in-large-pomdps.pdf
    │   └── DQNNaturePaper.pdf
├── pics
    ├── atari.jpg
    ├── blindspot.png
    ├── jan.zikes.png
    ├── michal.sustr.png
    └── trophy.jpg
├── preparation
    ├── README.md
    ├── ale_example.py
    ├── gym_example.py
    ├── space_invaders.bin
    └── tf_example.py
├── requirements.txt
├── showcase-studies
    ├── atari-roms
    │   ├── breakout.bin
    │   ├── pong.bin
    │   ├── seaquest.bin
    │   └── space_invaders.bin
    ├── dqn-gym
    │   ├── .directory
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── atari_emulator.py
    │   ├── dqn_agent.py
    │   ├── environment.py
    │   ├── experience_memory.py
    │   ├── experiment.py
    │   ├── logging_colorer.py
    │   ├── parallel_dqn_agent.py
    │   ├── parallel_q_network.py
    │   ├── q_network.py
    │   ├── record_stats.py
    │   ├── run_dqn.py
    │   ├── states_examples
    │   │   ├── state_0_0.png
    │   │   ├── state_0_1.png
    │   │   ├── state_0_2.png
    │   │   ├── state_0_3.png
    │   │   ├── state_12_0.png
    │   │   ├── state_12_1.png
    │   │   ├── state_12_2.png
    │   │   ├── state_12_3.png
    │   │   ├── state_19_0.png
    │   │   ├── state_19_1.png
    │   │   ├── state_19_2.png
    │   │   ├── state_19_3.png
    │   │   ├── state_6_0.png
    │   │   ├── state_6_1.png
    │   │   ├── state_6_2.png
    │   │   └── state_6_3.png
    │   ├── training_results
    │   │   └── saved_models
    │   │   │   └── breakout
    │   │   │       └── dqn
    │   │   │           └── brick_hunter_example
    │   │   │               ├── brick_hunter_qsub.ckpt-24
    │   │   │               ├── brick_hunter_qsub.ckpt-24.meta
    │   │   │               └── checkpoint
    │   └── visuals.py
    ├── dqn-simple
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── atari_environment.py
    │   ├── dqn.py
    │   ├── encode_ffmpeg
    │   ├── logging_colorer.py
    │   ├── logstats.sh
    │   ├── play_atari.py
    │   ├── png.py
    │   ├── replay.py
    │   ├── state.py
    │   └── tf_summaries.py
    ├── go-NN
    │   ├── .gitignore
    │   ├── README.md
    │   └── engine
    │   │   ├── Board.py
    │   │   ├── Book.py
    │   │   ├── CGOSEngine.py
    │   │   ├── Checkpoint.py
    │   │   ├── Engine.py
    │   │   ├── Eval.py
    │   │   ├── EvalEngine.py
    │   │   ├── EvalModels.py
    │   │   ├── EvalStats.py
    │   │   ├── EvalTraining.py
    │   │   ├── Features.py
    │   │   ├── GTP.py
    │   │   ├── Game.py
    │   │   ├── HelperEngine.py
    │   │   ├── InfluenceEngine.py
    │   │   ├── InfluenceModels.py
    │   │   ├── InfluenceTraining.py
    │   │   ├── KGSEngine.py
    │   │   ├── Layers.py
    │   │   ├── MakeEvalData.py
    │   │   ├── MakeInfluenceData.py
    │   │   ├── MakeMoveData.py
    │   │   ├── MirrorEngine.py
    │   │   ├── MoveModels.py
    │   │   ├── MoveTraining.py
    │   │   ├── NPZ.py
    │   │   ├── Normalization.py
    │   │   ├── OnlineExampleMaker.py
    │   │   ├── Policy.py
    │   │   ├── SGFReader.py
    │   │   ├── SelfPlay.py
    │   │   ├── Symmetry.py
    │   │   ├── TFEngine.py
    │   │   ├── Train.py
    │   │   ├── TreeSearch.py
    │   │   └── games_with_illegal_moves_sorted.txt
    ├── samuel_checkers
    │   ├── README.md
    │   ├── agent.py
    │   ├── arthur.py
    │   ├── checkers.py
    │   ├── game.py
    │   ├── logfile
    │   ├── random_agent.py
    │   └── test.py
    └── td-gammon
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── backgammon
    │       ├── README.md
    │       ├── __init__.py
    │       ├── agents
    │       │   ├── __init__.py
    │       │   ├── human_agent.py
    │       │   ├── random_agent.py
    │       │   └── td_gammon_agent.py
    │       └── game.py
    │   ├── checkpoints
    │       ├── checkpoint
    │       ├── checkpoint-178
    │       ├── checkpoint-178.meta
    │       ├── checkpoint-447880
    │       ├── checkpoint-447880.meta
    │       ├── checkpoint-7894
    │       └── checkpoint-7894.meta
    │   ├── main.py
    │   └── model.py
├── slides
    ├── CNNs.pdf
    └── tensorflow_intro.pdf
├── tf_examples
    ├── convnet_mnist.py
    ├── linear_regression.py
    └── mlp_mnist.py
└── theory
    ├── MC
        └── MCTS_(English).svg.png
    ├── alfa-beta
        ├── Selection_087.png
        └── tree
        │   ├── img001.gif
        │   ├── img002.gif
        │   ├── img003.gif
        │   ├── img004.gif
        │   ├── img005.gif
        │   ├── img006.gif
        │   ├── img007.gif
        │   ├── img008.gif
        │   ├── img009.gif
        │   ├── img010.gif
        │   ├── img011.gif
        │   ├── img012.gif
        │   ├── img013.gif
        │   ├── img014.gif
        │   ├── img015.gif
        │   ├── img016.gif
        │   ├── img017.gif
        │   └── img018.gif
    └── mini-max
        └── Selection_086.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Python template
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | 
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 | 
62 | # Scrapy stuff:
63 | .scrapy
64 | 
65 | # Sphinx documentation
66 | docs/_build/
67 | 
68 | # PyBuilder
69 | target/
70 | 
71 | # IPython Notebook
72 | .ipynb_checkpoints
73 | 
74 | # pyenv
75 | .python-version
76 | 
77 | # celery beat schedule file
78 | celerybeat-schedule
79 | 
80 | # dotenv
81 | .env
82 | 
83 | # virtualenv
84 | venv/
85 | ENV/
86 | 
87 | # Spyder project settings
88 | .spyderproject
89 | 
90 | # Rope project settings
91 | .ropeproject
92 | 
93 | .idea/
94 | *~
95 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/feedback/README.md:
--------------------------------------------------------------------------------
1 | Please send your feedbacks via pull requests here in a text file. Thanks!
2 | 


--------------------------------------------------------------------------------
/helpers/logging_colorer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Console coloring of logs
  3 | """
  4 | 
  5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
  6 | import logging
  7 | 
  8 | # now we patch Python code to add color support to logging.StreamHandler
  9 | import sys
 10 | 
 11 | 
 12 | def add_coloring_to_emit_windows(fn):
 13 |         # add methods we need to the class
 14 |     def _out_handle(self):
 15 |         import ctypes
 16 |         return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 17 |     out_handle = property(_out_handle)
 18 | 
 19 |     def _set_color(self, code):
 20 |         import ctypes
 21 |         # Constants from the Windows API
 22 |         self.STD_OUTPUT_HANDLE = -11
 23 |         hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 24 |         ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code)
 25 | 
 26 |     setattr(logging.StreamHandler, '_set_color', _set_color)
 27 | 
 28 |     def new(*args):
 29 |         FOREGROUND_BLUE      = 0x0001 # text color contains blue.
 30 |         FOREGROUND_GREEN     = 0x0002 # text color contains green.
 31 |         FOREGROUND_RED       = 0x0004 # text color contains red.
 32 |         FOREGROUND_INTENSITY = 0x0008 # text color is intensified.
 33 |         FOREGROUND_WHITE     = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED
 34 |        # winbase.h
 35 |         STD_INPUT_HANDLE = -10
 36 |         STD_OUTPUT_HANDLE = -11
 37 |         STD_ERROR_HANDLE = -12
 38 | 
 39 |         # wincon.h
 40 |         FOREGROUND_BLACK     = 0x0000
 41 |         FOREGROUND_BLUE      = 0x0001
 42 |         FOREGROUND_GREEN     = 0x0002
 43 |         FOREGROUND_CYAN      = 0x0003
 44 |         FOREGROUND_RED       = 0x0004
 45 |         FOREGROUND_MAGENTA   = 0x0005
 46 |         FOREGROUND_YELLOW    = 0x0006
 47 |         FOREGROUND_GREY      = 0x0007
 48 |         FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified.
 49 | 
 50 |         BACKGROUND_BLACK     = 0x0000
 51 |         BACKGROUND_BLUE      = 0x0010
 52 |         BACKGROUND_GREEN     = 0x0020
 53 |         BACKGROUND_CYAN      = 0x0030
 54 |         BACKGROUND_RED       = 0x0040
 55 |         BACKGROUND_MAGENTA   = 0x0050
 56 |         BACKGROUND_YELLOW    = 0x0060
 57 |         BACKGROUND_GREY      = 0x0070
 58 |         BACKGROUND_INTENSITY = 0x0080 # background color is intensified.
 59 | 
 60 |         levelno = args[1].levelno
 61 |         if(levelno>=50):
 62 |             color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY
 63 |         elif(levelno>=40):
 64 |             color = FOREGROUND_RED | FOREGROUND_INTENSITY
 65 |         elif(levelno>=30):
 66 |             color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
 67 |         elif(levelno>=20):
 68 |             color = FOREGROUND_GREEN
 69 |         elif(levelno>=10):
 70 |             color = FOREGROUND_MAGENTA
 71 |         else:
 72 |             color =  FOREGROUND_WHITE
 73 |         args[0]._set_color(color)
 74 | 
 75 |         ret = fn(*args)
 76 |         args[0]._set_color( FOREGROUND_WHITE )
 77 |         #print "after"
 78 |         return ret
 79 |     return new
 80 | 
 81 | def add_coloring_to_emit_ansi(fn):
 82 |     # add methods we need to the class
 83 |     def new(*args):
 84 |         levelno = args[1].levelno
 85 |         if(levelno>=50):
 86 |             color = '\x1b[31m' # red
 87 |         elif(levelno>=40):
 88 |             color = '\x1b[31m' # red
 89 |         elif(levelno>=30):
 90 |             color = '\x1b[33m' # yellow
 91 |         elif(levelno>=20):
 92 |             color = '\x1b[32m' # green
 93 |         elif(levelno>=10):
 94 |             color = '\x1b[35m' # pink
 95 |         else:
 96 |             color = '\x1b[0m' # normal
 97 |         args[1].msg = color + args[1].msg[:200] +  '\x1b[0m'  # normal
 98 |         #print "after"
 99 |         return fn(*args)
100 |     return new
101 | 
102 | import platform
103 | if platform.system()=='Windows':
104 |     # Windows does not support ANSI escapes and we are using API calls to set the console color
105 |     logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
106 | else:
107 |     # all non-Windows platforms are supporting ANSI escapes so we use them
108 |     logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit)
109 |     #log = logging.getLogger()
110 |     #log.addFilter(log_filter())
111 |     #//hdlr = logging.StreamHandler()
112 |     #//hdlr.setFormatter(formatter())
113 | 
114 | def init_logging(verbose=True, log_file='app.log'):
115 |     """
116 |     Initialize settings for Python logger
117 | 
118 |     This allows for logging into console as well as specified log_file.
119 |     After you can use in the code just::
120 | 
121 |         import logging
122 |         logging.info("hello world!")
123 |         # will produce
124 |         # 2016-07-19 16:13:02,931 [MainThread  ][INFO ]:  hello world!
125 | 
126 |     :param bool verbose:
127 |     :param str log_file:
128 |     """
129 | 
130 |     # let's log the same output to console and to file
131 |     log_file = log_file
132 |     format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]:  %(message)s"
133 | 
134 |     root_logger = logging.getLogger()
135 |     logging.basicConfig(
136 |         format=format,
137 |         level=logging.DEBUG if verbose else logging.WARNING,
138 |         stream=sys.stderr)
139 | 
140 |     log_formatter = logging.Formatter(format)
141 |     file_handler = logging.FileHandler(log_file)
142 |     file_handler.setFormatter(log_formatter)
143 |     root_logger.addHandler(file_handler)
144 | 


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1-s2.0-S2352154615001151-main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1-s2.0-S2352154615001151-main.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1507.04296.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1507.04296.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1509.02971v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.02971v2.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1509.06461v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.06461v3.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1509.08731v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1509.08731v1.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1510.09142v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1510.09142v1.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1511.06581v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1511.06581v3.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1512.04860v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1512.04860v1.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1602.01783v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1602.01783v2.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1603.00748v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1603.00748v1.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1605.06676v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1605.06676v2.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1606.02647v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1606.02647v2.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1606.05312.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1606.05312.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/1610.00633.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/1610.00633.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/4031-monte-carlo-planning-in-large-pomdps.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/4031-monte-carlo-planning-in-large-pomdps.pdf


--------------------------------------------------------------------------------
/papers/recent_deepmind_papers/DQNNaturePaper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/papers/recent_deepmind_papers/DQNNaturePaper.pdf


--------------------------------------------------------------------------------
/pics/atari.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/atari.jpg


--------------------------------------------------------------------------------
/pics/blindspot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/blindspot.png


--------------------------------------------------------------------------------
/pics/jan.zikes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/jan.zikes.png


--------------------------------------------------------------------------------
/pics/michal.sustr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/michal.sustr.png


--------------------------------------------------------------------------------
/pics/trophy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/pics/trophy.jpg


--------------------------------------------------------------------------------
/preparation/README.md:
--------------------------------------------------------------------------------
 1 | # Preparation for workshop
 2 | 
 3 | On your personal laptop, please make sure to:
 4 | 
 5 | - create an isolated environment (you can use virtualenv or conda, however you like):
 6 | 
 7 |   		$ pip install virtualenv
 8 |   		# put all the files in a location you like, we'll stick with ~/tf_env for simplicity
 9 |   		$ virtualenv -p /usr/bin/python3.5 ~/tf_env
10 |   		$ source ~/tf_env/bin/activate
11 | 
12 | - install python 3.5 into virtualenv with following packages
13 | 
14 | 		# Put these into text file requirements.txt and execute in your virtualenv
15 | 		$ pip install -r requirements.txt
16 | 		# (there might be some extra packages, it is a list I have on my laptop)
17 | 
18 | - [install Tensorflow in virtualenv](https://www.tensorflow.org/versions/r0.11/get_started/os_setup.html#virtualenv-installation)
19 | - install ALE - [atari learning environment](http://www.arcadelearningenvironment.org/)
20 | 	- Install necessary dependencies:
21 | 
22 | 			sudo apt-get install libsdl-gfx1.2-dev libsdl-image1.2-dev libsdl1.2-dev cmake
23 | 
24 | 	- Clone and build ALE:
25 | 
26 | 			git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git
27 | 			cd Arcade-Learning-Environment
28 | 			cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .
29 | 			make -j 4
30 | 			sudo make install
31 | 			sudo pip install .
32 | 
33 | - install [OpenAI Gym](https://gym.openai.com/docs)
34 | 
35 | 		pip install gym
36 | 		# note: I had to update gym source files, to swap if/elif conditions
37 | 		# for ffmpeg/avconv. I issued PR to the gym but I'm not sure if they
38 | 		# will update it. If this fails for you as well, you can update the code
39 | 		# or clone my repo at https://github.com/michalsustr/gym
40 | 		# Diff:
41 | 		# https://github.com/openai/gym/compare/master...michalsustr:master#diff-54b89e317dc6e7d9dfd407344cafd1bf
42 | 		pip install gym[atari]
43 | 
44 | - optionally: [Set up TensorFlow on AWS GPU](https://github.com/gtoubassi/dqn-atari/wiki/Setting-up-TensorFlow-on-AWS-GPU)
45 | 
46 | # Test setup
47 | - You can test your ALE setup by launching script
48 | 
49 | 		$ python ale_example.py ./space_invaders.bin
50 | 
51 | - Test tensorflow (can take a while to run for the first time)
52 | 
53 | 		$ python tf_example.py
54 | 
55 | - Test gym - get your API key in the [https://gym.openai.com/](gym) (by signing in with github account)
56 |   and update the `gym_example.py` file.
57 | 
58 | 		$ python gym_example.py
59 | 
60 | 	You should get a reference link to your evalution board
61 | 
62 | 		2016-10-02 21:18:26,920 [MainThread  ][INFO ]:
63 | 		****************************************************
64 | 		You successfully uploaded your evaluation on CartPole-v0 to
65 | 		OpenAI Gym! You can find it at:
66 | 
67 | 			https://gym.openai.com/evaluations/eval_8ZzrWOlRICX3ynLBTQ8A
68 | 
69 | 		****************************************************
70 | 
71 | 	Please send this link to my e-mail address `michal.sustr at you know gmail.com` with title
72 | 	`[RL_workshop] gym link` so that we know how many people actually read this and prepared themselves :-)
73 | 


--------------------------------------------------------------------------------
/preparation/ale_example.py:
--------------------------------------------------------------------------------
 1 | # python_example.py
 2 | # Author: Ben Goodrich
 3 | #
 4 | # This is a direct port to python of the shared library example from
 5 | # ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp
 6 | import sys
 7 | from random import randrange
 8 | from ale_python_interface import ALEInterface
 9 | 
10 | if len(sys.argv) < 2:
11 |   print('Usage: %s rom_file' % sys.argv[0])
12 |   sys.exit()
13 | 
14 | ale = ALEInterface()
15 | 
16 | # Get & Set the desired settings
17 | ale.setInt(b'random_seed', 123)
18 | 
19 | # Set USE_SDL to true to display the screen. ALE must be compilied
20 | # with SDL enabled for this to work. On OSX, pygame init is used to
21 | # proxy-call SDL_main.
22 | USE_SDL = False
23 | if USE_SDL:
24 |   if sys.platform == 'darwin':
25 |     import pygame
26 |     pygame.init()
27 |     ale.setBool(b'sound', False) # Sound doesn't work on OSX
28 |   elif sys.platform.startswith('linux'):
29 |     ale.setBool(b'sound', True)
30 |   ale.setBool(b'display_screen', True)
31 | 
32 | # Load the ROM file
33 | rom_file = str.encode(sys.argv[1])
34 | ale.loadROM(rom_file)
35 | 
36 | # Get the list of legal actions
37 | legal_actions = ale.getLegalActionSet()
38 | 
39 | # Play 10 episodes
40 | for episode in range(10):
41 |   total_reward = 0
42 |   while not ale.game_over():
43 |     a = legal_actions[randrange(len(legal_actions))]
44 |     # Apply an action and get the resulting reward
45 |     reward = ale.act(a);
46 |     total_reward += reward
47 |   print('Episode %d ended with score: %d' % (episode, total_reward))
48 |   ale.reset_game()
49 | 


--------------------------------------------------------------------------------
/preparation/gym_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | from os.path import dirname, realpath
 7 | sys.path.append(dirname(dirname(realpath(__file__))))
 8 | import helpers.logging_colorer as logging_colorer
 9 | logging_colorer.init_logging()
10 | 
11 | import gym
12 | 
13 | # The world's simplest agent!
14 | class RandomAgent(object):
15 |     def __init__(self, action_space):
16 |         self.action_space = action_space
17 | 
18 |     def act(self, observation, reward, done):
19 |         return self.action_space.sample()
20 | 
21 | if __name__ == '__main__':
22 |     parser = argparse.ArgumentParser(description=None)
23 |     parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
24 |     args = parser.parse_args()
25 | 
26 |     # Call `undo_logger_setup` if you want to undo Gym's logger setup
27 |     # and configure things manually. (The default should be fine most
28 |     # of the time.)
29 |     gym.undo_logger_setup()
30 |     logging_colorer.init_logging()
31 | 
32 |     env = gym.make(args.env_id)
33 | 
34 |     # You provide the directory to write to (can be an existing
35 |     # directory, including one with existing data -- all monitor files
36 |     # will be namespaced). You can also dump to a tempdir if you'd
37 |     # like: tempfile.mkdtemp().
38 |     outdir = '/tmp/random-agent-results'
39 |     env.monitor.start(outdir, force=True, seed=0)
40 | 
41 |     # This declaration must go *after* the monitor call, since the
42 |     # monitor's seeding creates a new action_space instance with the
43 |     # appropriate pseudorandom number generator.
44 |     agent = RandomAgent(env.action_space)
45 | 
46 |     episode_count = 100
47 |     max_steps = 200
48 |     reward = 0
49 |     done = False
50 | 
51 |     for i in range(episode_count):
52 |         ob = env.reset()
53 | 
54 |         for j in range(max_steps):
55 |             action = agent.act(ob, reward, done)
56 |             ob, reward, done, _ = env.step(action)
57 |             if done:
58 |                 break
59 |             # Note there's no env.render() here. But the environment still can open window and
60 |             # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
61 |             # Video is not recorded every episode, see capped_cubic_video_schedule for details.
62 | 
63 |     # Dump result info to disk
64 |     env.monitor.close()
65 | 
66 |     # Upload to the scoreboard. We could also do this from another
67 |     # process if we wanted.
68 |     logging.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
69 |     gym.upload(outdir, api_key='YOUR_API_KEY')
70 | 


--------------------------------------------------------------------------------
/preparation/space_invaders.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/preparation/space_invaders.bin


--------------------------------------------------------------------------------
/preparation/tf_example.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HelloWorld example using TensorFlow library.
 3 | Author: Aymeric Damien
 4 | Project: https://github.com/aymericdamien/TensorFlow-Examples/
 5 | '''
 6 | 
 7 | from __future__ import print_function
 8 | 
 9 | import tensorflow as tf
10 | 
11 | # Simple hello world using TensorFlow
12 | 
13 | # Create a Constant op
14 | # The op is added as a node to the default graph.
15 | #
16 | # The value returned by the constructor represents the output
17 | # of the Constant op.
18 | hello = tf.constant('Hello, TensorFlow!')
19 | 
20 | # Start tf session
21 | sess = tf.Session()
22 | 
23 | # Run the op
24 | print(sess.run(hello))


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # manually installed later:
 2 | #ale-python-interface==0.0.1
 3 | #tensorflow==0.9.0
 4 | alabaster
 5 | arch
 6 | argcomplete
 7 | Babel
 8 | backports.shutil-get-terminal-size
 9 | blosc
10 | CommonMark
11 | cycler==0.10.0
12 | dask==0.11.0
13 | decorator==4.0.10
14 | descartes
15 | docutils==0.12
16 | ete3==3.0.0b35
17 | filterpy==0.1.3
18 | humanize==0.5.1
19 | imagesize==0.7.1
20 | ipykernel
21 | ipympl
22 | ipython
23 | ipython-genutils
24 | ipywidgets
25 | Jinja2==2.8
26 | jsonschema==2.5.1
27 | jupyter
28 | jupyter-client
29 | jupyter-console
30 | jupyter-contrib-core
31 | jupyter-core
32 | jupyter-nbextensions-configurator
33 | Markdown
34 | MarkupSafe==0.23
35 | matplotlib
36 | mistune
37 | mpld3==0.2
38 | mpmath==0.19
39 | nbconvert
40 | nbformat
41 | networkx==1.11
42 | notebook
43 | numpy
44 | pandas
45 | patsy
46 | pexpect==4.0.1
47 | pickleshare
48 | Pillow==3.3.1
49 | prompt-toolkit
50 | protobuf
51 | ptyprocess==0.5.1
52 | Pygments
53 | pyparsing==2.1.4
54 | python-dateutil
55 | pytz
56 | PyYAML
57 | pyzmq
58 | qtconsole
59 | recommonmark
60 | requests
61 | scikit-image
62 | scikit-learn
63 | scipy
64 | seaborn==0.7.1
65 | simplegeneric
66 | six==1.10.0
67 | snowballstemmer==1.2.1
68 | Sphinx
69 | sphinx-rtd-theme==0.1.9
70 | statsmodels
71 | sympy==1.0
72 | terminado==0.6
73 | toolz==0.8.0
74 | tornado==4.3
75 | tqdm==4.7.6
76 | traitlets
77 | wcwidth
78 | widgetsnbextension==1.2.6
79 | 


--------------------------------------------------------------------------------
/showcase-studies/atari-roms/breakout.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/breakout.bin


--------------------------------------------------------------------------------
/showcase-studies/atari-roms/pong.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/pong.bin


--------------------------------------------------------------------------------
/showcase-studies/atari-roms/seaquest.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/seaquest.bin


--------------------------------------------------------------------------------
/showcase-studies/atari-roms/space_invaders.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/atari-roms/space_invaders.bin


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/.directory:
--------------------------------------------------------------------------------
1 | [Dolphin]
2 | PreviewsShown=true
3 | Timestamp=2016,10,2,13,57,49
4 | Version=3
5 | ViewMode=1
6 | 
7 | [Settings]
8 | HiddenFilesShown=true
9 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/.gitignore:
--------------------------------------------------------------------------------
1 | training_results


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/Makefile:
--------------------------------------------------------------------------------
1 | train:
2 | 	# use default params
3 | 	python3 ./run_dqn.py breakout dqn brick_hunter
4 | 
5 | quick_train:
6 | 	python ./run_dqn.py breakout dqn quick_train --memory_capacity 100000 --training_length=1000 --random_exploration_length=400 --test_games=3 --test_frequency=250
7 | quick_watch:
8 | 	python ./run_dqn.py breakout dqn quick_train --watch --test_games=3


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/README.md:
--------------------------------------------------------------------------------
 1 | Originally from https://github.com/Jabberwockyll/deep_rl_ale
 2 | 
 3 | # deep_rl_ale
 4 | This repo contains an implementation of [this paper](http://home.uchicago.edu/~arij/journalclub/papers/2015_Mnih_et_al.pdf) in TensorFlow.  It also contains the option to use the [double dqn](http://arxiv.org/pdf/1509.06461v3.pdf) loss function, as well as a parallel version that acts and learns simultaneously to speed up training.
 5 | 
 6 | [Watch it play Pong, Breakout, Space Invaders, and Seaquest here](https://youtu.be/gQ9FsAGb148)
 7 | 
 8 | The code is still a little messy in some places, and will be cleaned up in the future, but there will probably not be any significant updates or changes until mid-May.
 9 | 
10 | ## Dependencies/Requirements
11 | 
12 | 1. An nVidia GPU with GDDR5 memory to train in a reasonable amount of time
13 | 2. [Python 3](https://www.python.org/)
14 | 3. [The Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) for the emulator framework.
15 | 4. [Tensorflow](https://www.tensorflow.org/) for gpu numerical computions and symbolic differentiation.
16 | 5. Linux/OSX, because Tensorflow doesn't support Windows.
17 | 6. [Matplotlib](http://matplotlib.org/) and [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/) for visualizations.
18 | 7. [OpenCV](http://opencv.org/) for image scaling.  Might switch to SciPy since OpenCV was a pain for me to install.
19 | 8. Any dependencies of the above software, of course, like NumPy.
20 | 
21 | ## How to run
22 | 
23 | From the top directory of the repo (dir with python files):
24 | ### Training
25 | `$ python3 ./run_dqn.py <name_of_game> <name_of_algorithm/method> <name_of_agent_instance>`
26 | 
27 | For example:
28 | 
29 | `$ python3 ./run_dqn.py breakout dqn brick_hunter`
30 | 
31 | ### Watching
32 | `$ python3 ./run_dqn.py <name_of_game> <name_of_algorithm/method> <name_of_saved_model> --watch`
33 | Where \<name_of_saved_model\> is the \<name_of_agent_instance\> used during training.  If you used any non-default settings, make sure to use the same ones when watching as well.
34 | 
35 | ## Running Notes
36 | 
37 | You can change many hyperparameters/settings by entering optional arguments.
38 | To get a list of arguments:
39 | 
40 | `$ python3 ./run_dqn.py --h`
41 | 
42 | By default rom files are expected to be in a folder titled 'roms' in the parent directory of the repo.  You can pass a diferent directory as an argument or change the default in run_dqn.py.
43 | 
44 | Statistics and saved models are saved in the parent directory of the repo as well.
45 | 
46 | The default settings are very similar to those used in the DeepMond Nature paper.  There are only a few small differences of which I am aware.
47 | 
48 | A full training run takes between 3 and 4 days on my nVidia GTX 970, depending on whether or not the parallel option is used.  Parallel training speeds up training by ~30%, but I'm still testing how different things impact speed.
49 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/atari_emulator.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Class for ale instances to generate experiences and test agents.
  3 | Uses DeepMind's preproessing/initialization methods
  4 | '''
  5 | import logging
  6 | import random
  7 | import sys
  8 | 
  9 | from scipy import ndimage
 10 | import numpy as np
 11 | 
 12 | from ale_python_interface import ALEInterface
 13 | 
 14 | 
 15 | class AtariEmulator:
 16 |     def __init__(self, args):
 17 |         ''' Initialize Atari environment '''
 18 | 
 19 |         # Parameters
 20 |         self.buffer_length = args.buffer_length
 21 |         self.screen_dims = args.screen_dims
 22 |         self.frame_skip = args.frame_skip
 23 |         self.blend_method = args.blend_method
 24 |         self.reward_processing = args.reward_processing
 25 |         self.max_start_wait = args.max_start_wait
 26 |         self.history_length = args.history_length
 27 |         self.start_frames_needed = self.buffer_length - 1 + (
 28 |         (args.history_length - 1) * self.frame_skip)
 29 | 
 30 |         # Initialize ALE instance
 31 |         self.ale = ALEInterface()
 32 |         self.ale.setFloat(b'repeat_action_probability', 0.0)
 33 |         if args.watch:
 34 |             self.ale.setBool(b'sound', True)
 35 |             self.ale.setBool(b'display_screen', True)
 36 |         self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))
 37 | 
 38 |         self.buffer = np.empty((self.buffer_length, 210, 160))
 39 |         self.current = 0
 40 |         self.action_set = self.ale.getMinimalActionSet()
 41 |         self.lives = self.ale.lives()
 42 | 
 43 |         self.reset()
 44 | 
 45 |     def get_possible_actions(self):
 46 |         ''' Return list of possible actions for game '''
 47 |         return self.action_set
 48 | 
 49 |     def get_screen(self):
 50 |         ''' Add screen to frame buffer '''
 51 |         self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
 52 |         self.current = (self.current + 1) % self.buffer_length
 53 | 
 54 |     def reset(self):
 55 |         self.ale.reset_game()
 56 |         self.lives = self.ale.lives()
 57 | 
 58 |         if self.max_start_wait < 0:
 59 |             logging.error("ERROR: max start wait decreased beyond 0")
 60 |             sys.exit()
 61 |         elif self.max_start_wait <= self.start_frames_needed:
 62 |             wait = 0
 63 |         else:
 64 |             wait = random.randint(0,
 65 |                                   self.max_start_wait - self.start_frames_needed)
 66 |         for _ in range(wait):
 67 |             self.ale.act(self.action_set[0])
 68 | 
 69 |         # Fill frame buffer
 70 |         self.get_screen()
 71 |         for _ in range(self.buffer_length - 1):
 72 |             self.ale.act(self.action_set[0])
 73 |             self.get_screen()
 74 |         # get initial_states
 75 |         state = [(self.preprocess(), 0, 0, False)]
 76 |         for step in range(self.history_length - 1):
 77 |             state.append(self.run_step(0))
 78 | 
 79 |         # make sure agent hasn't died yet
 80 |         if self.isTerminal():
 81 |             logging.info(
 82 |                 "Agent lost during start wait.  Decreasing max_start_wait by 1")
 83 |             self.max_start_wait -= 1
 84 |             return self.reset()
 85 | 
 86 |         return state
 87 | 
 88 |     def run_step(self, action):
 89 |         ''' Apply action to game and return next screen and reward '''
 90 | 
 91 |         raw_reward = 0
 92 |         for step in range(self.frame_skip):
 93 |             raw_reward += self.ale.act(self.action_set[action])
 94 |             self.get_screen()
 95 | 
 96 |         reward = None
 97 |         if self.reward_processing == 'clip':
 98 |             reward = np.clip(raw_reward, -1, 1)
 99 |         else:
100 |             reward = raw_reward
101 | 
102 |         terminal = self.isTerminal()
103 |         self.lives = self.ale.lives()
104 | 
105 |         return (self.preprocess(), action, reward, terminal, raw_reward)
106 | 
107 |     def preprocess(self):
108 |         ''' Preprocess frame for agent '''
109 | 
110 |         img = None
111 | 
112 |         if self.blend_method == "max":
113 |             img = np.amax(self.buffer, axis=0)
114 | 
115 |         # no idea where these numbers come from...
116 |         img = ndimage.zoom(img, (0.4, 0.525))
117 |         return img
118 | 
119 |     def isTerminal(self):
120 |         return (self.isGameOver() or (self.lives > self.ale.lives()))
121 | 
122 |     def isGameOver(self):
123 |         return self.ale.game_over()
124 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/dqn_agent.py:
--------------------------------------------------------------------------------
  1 | import experiment
  2 | from visuals import Visuals
  3 | 
  4 | 
  5 | import random
  6 | import logging
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | 
 10 | 
 11 | class DQNAgent():
 12 |     def __init__(self, args, q_network,
 13 |                  train_emulator, test_emulator,
 14 |                  experience_memory, num_actions,
 15 |                  train_stats, test_stats):
 16 | 
 17 |         self.network = q_network
 18 |         self.train_emulator = train_emulator
 19 |         self.test_emulator = test_emulator
 20 | 
 21 |         self.memory = experience_memory
 22 |         self.train_stats = train_stats
 23 |         self.test_stats = test_stats
 24 | 
 25 |         self.num_actions = num_actions
 26 |         self.history_length = args.history_length
 27 | 
 28 |         self.training_frequency = args.training_frequency
 29 |         self.random_exploration_length = args.random_exploration_length
 30 |         self.training_length = args.training_length
 31 |         self.initial_exploration_rate = args.initial_exploration_rate
 32 |         self.final_exploration_rate = args.final_exploration_rate
 33 |         self.final_exploration_frame = args.final_exploration_frame
 34 |         self.test_exploration_rate = args.test_exploration_rate
 35 |         self.recording_frequency = args.recording_frequency
 36 |         self.test_frequency = args.test_frequency
 37 | 
 38 |         self.exploration_rate = self.initial_exploration_rate
 39 |         self.total_steps = 0
 40 | 
 41 |         self.args = args
 42 | 
 43 |         self.test_state = []
 44 | 
 45 |         logging.info("DQN Agent Initialized")
 46 | 
 47 |     def choose_action(self):
 48 |         if random.random() >= self.exploration_rate:
 49 |             state = self.memory.get_current_state()
 50 |             q_values = self.network.inference(state)
 51 |             self.train_stats.add_q_values(q_values)
 52 |             return np.argmax(q_values)
 53 |         else:
 54 |             return random.randrange(self.num_actions)
 55 | 
 56 |     def checkGameOver(self):
 57 |         if self.train_emulator.isGameOver():
 58 |             initial_state = self.train_emulator.reset()
 59 |             for experience in initial_state:
 60 |                 self.memory.add(experience[0], experience[1], experience[2],
 61 |                                 experience[3])
 62 |             self.train_stats.add_game()
 63 | 
 64 |     def run_random_exploration(self):
 65 |         for step in tqdm(range(self.random_exploration_length)):
 66 |             state, action, reward, terminal, raw_reward = self.train_emulator.run_step(
 67 |                 random.randrange(self.num_actions))
 68 |             self.train_stats.add_reward(raw_reward)
 69 |             self.memory.add(state, action, reward, terminal)
 70 |             self.checkGameOver()
 71 |             self.total_steps += 1
 72 |             if (self.total_steps % self.recording_frequency == 0):
 73 |                 self.train_stats.record(self.total_steps)
 74 | 
 75 |     def run_training(self):
 76 |         # show pbars only if not evaluating agent
 77 |         pbar = tqdm()
 78 |         for step in range(self.training_length):
 79 |             # test agent
 80 |             if step % self.test_frequency == 0:
 81 |                 pbar.close()
 82 |                 experiment.evaluate_agent(self.args, self, self.test_emulator, self.test_stats)
 83 |                 self.save_model(step)
 84 |                 logging.info("Training... (%d/%d) " % (step, self.training_length))
 85 |                 pbar = tqdm(total=min(self.test_frequency, self.training_length), unit="step")
 86 |             pbar.update(1)
 87 | 
 88 |             # play step
 89 |             state, action, reward, terminal, raw_reward = self.train_emulator.run_step(
 90 |                 self.choose_action())
 91 |             self.train_stats.add_reward(raw_reward)
 92 |             self.memory.add(state, action, reward, terminal)
 93 |             self.checkGameOver()
 94 | 
 95 |             # training
 96 |             if self.total_steps % self.training_frequency == 0:
 97 |                 states, actions, rewards, next_states, terminals = self.memory.get_batch()
 98 |                 loss = self.network.train(states, actions, rewards, next_states,
 99 |                                           terminals)
100 |                 self.train_stats.add_loss(loss)
101 | 
102 |             self.total_steps += 1
103 | 
104 |             if self.total_steps < self.final_exploration_frame:
105 |                 self.exploration_rate -= (
106 |                                          self.exploration_rate - self.final_exploration_rate) / (
107 |                                          self.final_exploration_frame - self.total_steps)
108 | 
109 |             if self.total_steps % self.recording_frequency == 0:
110 |                 self.train_stats.record(self.total_steps)
111 |                 self.network.record_params(self.total_steps)
112 | 
113 |         pbar.close()
114 | 
115 |     def test_step(self, observation):
116 |         if len(self.test_state) < self.history_length:
117 |             self.test_state.append(observation)
118 | 
119 |         # choose action
120 |         q_values = None
121 |         action = None
122 |         if random.random() >= self.test_exploration_rate:
123 |             state = np.expand_dims(np.transpose(self.test_state, [1, 2, 0]),
124 |                                    axis=0)
125 |             q_values = self.network.inference(state)
126 |             action = np.argmax(q_values)
127 |         else:
128 |             action = random.randrange(self.num_actions)
129 | 
130 |         self.test_state.pop(0)
131 |         return [action, q_values]
132 | 
133 |     def save_model(self, step):
134 |         self.network.save_model(step)
135 | 
136 |     def run_experiment(self):
137 |         logging.info("Running random exploration")
138 |         self.run_random_exploration()
139 | 
140 |         self.train_emulator.reset()
141 |         self.run_training()
142 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/experience_memory.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ExperienceMemory is a class for experience replay.  
  3 | It stores experience samples and samples minibatches for training.
  4 | '''
  5 | 
  6 | import random
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | class ExperienceMemory:
 12 |     def __init__(self, args, num_actions):
 13 |         ''' Initialize emtpy experience dataset. '''
 14 | 
 15 |         # params
 16 |         self.capacity = args.memory_capacity
 17 |         self.history_length = args.history_length
 18 |         self.batch_size = args.batch_size
 19 |         self.num_actions = num_actions
 20 |         self.screen_dims = args.screen_dims
 21 | 
 22 |         # initialize dataset
 23 |         self.observations = np.empty(
 24 |             (self.capacity, self.screen_dims[0], self.screen_dims[1]),
 25 |             dtype=np.uint8)
 26 |         self.actions = np.empty(self.capacity, dtype=np.uint8)
 27 |         self.rewards = np.empty(self.capacity, dtype=np.integer)
 28 |         self.terminals = np.empty(self.capacity, dtype=np.bool)
 29 | 
 30 |         self.size = 0
 31 |         self.current = 0
 32 | 
 33 |     def add(self, obs, act, reward, terminal):
 34 |         ''' Add experience to dataset.
 35 | 
 36 |         Args:
 37 |             obs: single observation frame
 38 |             act: action taken
 39 |             reward: reward
 40 |             terminal: is this a terminal state?
 41 |         '''
 42 | 
 43 |         self.observations[self.current] = obs
 44 |         self.actions[self.current] = act
 45 |         self.rewards[self.current] = reward
 46 |         self.terminals[self.current] = terminal
 47 | 
 48 |         self.current = (self.current + 1) % self.capacity
 49 |         if self.size == self.capacity - 1:
 50 |             self.size = self.capacity
 51 |         else:
 52 |             self.size = max(self.size, self.current)
 53 | 
 54 |     def get_state(self, indices):
 55 |         ''' Return the observation sequence that ends at index
 56 | 
 57 |         Args:
 58 |             indices: list of last observations in sequences
 59 |         '''
 60 |         state = np.empty((
 61 |                          len(indices), self.screen_dims[0], self.screen_dims[1],
 62 |                          self.history_length))
 63 |         count = 0
 64 | 
 65 |         for index in indices:
 66 |             frame_slice = np.arange(index - self.history_length + 1,
 67 |                                     (index + 1))
 68 |             state[count] = np.transpose(
 69 |                 np.take(self.observations, frame_slice, axis=0), [1, 2, 0])
 70 |             count += 1
 71 |         return state
 72 | 
 73 |     def get_current_state(self):
 74 |         '''  Return most recent observation sequence '''
 75 | 
 76 |         return self.get_state([(self.current - 1) % self.capacity])
 77 | 
 78 |     def get_batch(self):
 79 |         ''' Sample minibatch of experiences for training '''
 80 | 
 81 |         samples = []  # indices of the end of each sample
 82 | 
 83 |         while len(samples) < self.batch_size:
 84 | 
 85 |             if self.size < self.capacity:  # make this better
 86 |                 index = random.randrange(self.history_length, self.current)
 87 |             else:
 88 |                 # make sure state from index doesn't overlap with current's gap
 89 |                 index = (self.current + random.randrange(self.history_length,
 90 |                                                          self.size - 1)) % self.capacity
 91 |             # make sure no terminal observations are in the first state
 92 |             if self.terminals[(index - self.history_length):index].any():
 93 |                 continue
 94 |             else:
 95 |                 samples.append(index)
 96 |         # endwhile
 97 |         samples = np.asarray(samples)
 98 | 
 99 |         # create batch
100 |         o1 = self.get_state((samples - 1) % self.capacity)
101 |         a = np.eye(self.num_actions)[
102 |             self.actions[samples]]  # convert actions to one-hot matrix
103 |         r = self.rewards[samples]
104 |         o2 = self.get_state(samples)
105 |         t = self.terminals[samples].astype(int)
106 |         return [o1, a, r, o2, t]
107 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/experiment.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from tqdm import tqdm
 3 | 
 4 | from visuals import Visuals
 5 | 
 6 | 
 7 | def evaluate_agent(args, agent, test_emulator, test_stats):
 8 |     logging.info("Evaluating agent performace in test emulator")
 9 |     step = 0
10 |     total_reward = 0.0
11 |     reset = test_emulator.reset()
12 |     agent.test_state = list(next(zip(*reset)))
13 |     screen = test_emulator.preprocess()
14 |     visuals = None
15 |     if args.watch:
16 |         visuals = Visuals(test_emulator.get_possible_actions())
17 | 
18 |     # either play as many steps as possible or as many games
19 |     for _ in tqdm(range(args.test_games), unit="game"):
20 |         while not test_emulator.isGameOver() and step < args.test_steps:
21 |             action, q_values = agent.test_step(screen)
22 |             screen, action, reward, terminal, raw_reward = test_emulator.run_step(action)
23 |             total_reward += raw_reward
24 | 
25 |             # record stats
26 |             if not (test_stats is None):
27 |                 test_stats.add_reward(raw_reward)
28 |                 if not (q_values is None):
29 |                     test_stats.add_q_values(q_values)
30 |                 # endif
31 |             # endif
32 | 
33 |             # update visuals
34 |             if args.watch and (not (q_values is None)):
35 |                 visuals.update(q_values)
36 | 
37 |             step += 1
38 |         # endwhile
39 |         if not (test_stats is None):
40 |             test_stats.add_game()
41 |         reset = test_emulator.reset()
42 |         agent.test_state = list(next(zip(*reset)))
43 | 
44 |     return total_reward / args.test_games
45 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/logging_colorer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Console coloring of logs
  3 | """
  4 | 
  5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
  6 | import logging
  7 | 
  8 | # now we patch Python code to add color support to logging.StreamHandler
  9 | import sys
 10 | 
 11 | 
 12 | def add_coloring_to_emit_windows(fn):
 13 |     # add methods we need to the class
 14 |     def _out_handle(self):
 15 |         import ctypes
 16 |         return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 17 | 
 18 |     out_handle = property(_out_handle)
 19 | 
 20 |     def _set_color(self, code):
 21 |         import ctypes
 22 |         # Constants from the Windows API
 23 |         self.STD_OUTPUT_HANDLE = -11
 24 |         hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 25 |         ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code)
 26 | 
 27 |     setattr(logging.StreamHandler, '_set_color', _set_color)
 28 | 
 29 |     def new(*args):
 30 |         FOREGROUND_BLUE = 0x0001  # text color contains blue.
 31 |         FOREGROUND_GREEN = 0x0002  # text color contains green.
 32 |         FOREGROUND_RED = 0x0004  # text color contains red.
 33 |         FOREGROUND_INTENSITY = 0x0008  # text color is intensified.
 34 |         FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED
 35 |         # winbase.h
 36 |         STD_INPUT_HANDLE = -10
 37 |         STD_OUTPUT_HANDLE = -11
 38 |         STD_ERROR_HANDLE = -12
 39 | 
 40 |         # wincon.h
 41 |         FOREGROUND_BLACK = 0x0000
 42 |         FOREGROUND_BLUE = 0x0001
 43 |         FOREGROUND_GREEN = 0x0002
 44 |         FOREGROUND_CYAN = 0x0003
 45 |         FOREGROUND_RED = 0x0004
 46 |         FOREGROUND_MAGENTA = 0x0005
 47 |         FOREGROUND_YELLOW = 0x0006
 48 |         FOREGROUND_GREY = 0x0007
 49 |         FOREGROUND_INTENSITY = 0x0008  # foreground color is intensified.
 50 | 
 51 |         BACKGROUND_BLACK = 0x0000
 52 |         BACKGROUND_BLUE = 0x0010
 53 |         BACKGROUND_GREEN = 0x0020
 54 |         BACKGROUND_CYAN = 0x0030
 55 |         BACKGROUND_RED = 0x0040
 56 |         BACKGROUND_MAGENTA = 0x0050
 57 |         BACKGROUND_YELLOW = 0x0060
 58 |         BACKGROUND_GREY = 0x0070
 59 |         BACKGROUND_INTENSITY = 0x0080  # background color is intensified.
 60 | 
 61 |         levelno = args[1].levelno
 62 |         if (levelno >= 50):
 63 |             color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY
 64 |         elif (levelno >= 40):
 65 |             color = FOREGROUND_RED | FOREGROUND_INTENSITY
 66 |         elif (levelno >= 30):
 67 |             color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
 68 |         elif (levelno >= 20):
 69 |             color = FOREGROUND_GREEN
 70 |         elif (levelno >= 10):
 71 |             color = FOREGROUND_MAGENTA
 72 |         else:
 73 |             color = FOREGROUND_WHITE
 74 |         args[0]._set_color(color)
 75 | 
 76 |         ret = fn(*args)
 77 |         args[0]._set_color(FOREGROUND_WHITE)
 78 |         # print "after"
 79 |         return ret
 80 | 
 81 |     return new
 82 | 
 83 | 
 84 | def add_coloring_to_emit_ansi(fn):
 85 |     # add methods we need to the class
 86 |     def new(*args):
 87 |         levelno = args[1].levelno
 88 |         if (levelno >= 50):
 89 |             color = '\x1b[31m'  # red
 90 |         elif (levelno >= 40):
 91 |             color = '\x1b[31m'  # red
 92 |         elif (levelno >= 30):
 93 |             color = '\x1b[33m'  # yellow
 94 |         elif (levelno >= 20):
 95 |             color = '\x1b[32m'  # green
 96 |         elif (levelno >= 10):
 97 |             color = '\x1b[35m'  # pink
 98 |         else:
 99 |             color = '\x1b[0m'  # normal
100 |         args[1].msg = color + args[1].msg[:200] + '\x1b[0m'  # normal
101 |         # print "after"
102 |         return fn(*args)
103 | 
104 |     return new
105 | 
106 | 
107 | import platform
108 | 
109 | if platform.system() == 'Windows':
110 |     # Windows does not support ANSI escapes and we are using API calls to set the console color
111 |     logging.StreamHandler.emit = add_coloring_to_emit_windows(
112 |         logging.StreamHandler.emit)
113 | else:
114 |     # all non-Windows platforms are supporting ANSI escapes so we use them
115 |     logging.StreamHandler.emit = add_coloring_to_emit_ansi(
116 |         logging.StreamHandler.emit)
117 |     # log = logging.getLogger()
118 |     # log.addFilter(log_filter())
119 |     # //hdlr = logging.StreamHandler()
120 |     # //hdlr.setFormatter(formatter())
121 | 
122 | 
123 | def init_logging(verbose=True, log_file='app.log'):
124 |     """
125 |     Initialize settings for Python logger
126 | 
127 |     This allows for logging into console as well as specified log_file.
128 |     After you can use in the code just::
129 | 
130 |         import logging
131 |         logging.info("hello world!")
132 |         # will produce
133 |         # 2016-07-19 16:13:02,931 [MainThread  ][INFO ]:  hello world!
134 | 
135 |     :param bool verbose:
136 |     :param str log_file:
137 |     """
138 | 
139 |     # let's log the same output to console and to file
140 |     log_file = log_file
141 |     format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]:  %(message)s"
142 | 
143 |     root_logger = logging.getLogger()
144 |     logging.basicConfig(
145 |         format=format,
146 |         level=logging.DEBUG if verbose else logging.WARNING,
147 |         stream=sys.stderr)
148 | 
149 |     log_formatter = logging.Formatter(format)
150 |     file_handler = logging.FileHandler(log_file)
151 |     file_handler.setFormatter(log_formatter)
152 |     root_logger.addHandler(file_handler)
153 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/parallel_dqn_agent.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import random
  3 | import threading
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | class ParallelDQNAgent():
  9 |     def __init__(self, args, q_network, emulator, experience_memory,
 10 |                  num_actions, train_stats):
 11 | 
 12 |         self.network = q_network
 13 |         self.emulator = emulator
 14 |         self.memory = experience_memory
 15 |         self.train_stats = train_stats
 16 | 
 17 |         self.num_actions = num_actions
 18 |         self.history_length = args.history_length
 19 |         self.training_frequency = args.training_frequency
 20 |         self.random_exploration_length = args.random_exploration_length
 21 |         self.initial_exploration_rate = args.initial_exploration_rate
 22 |         self.final_exploration_rate = args.final_exploration_rate
 23 |         self.final_exploration_frame = args.final_exploration_frame
 24 |         self.test_exploration_rate = args.test_exploration_rate
 25 |         self.recording_frequency = args.recording_frequency
 26 | 
 27 |         self.exploration_rate = self.initial_exploration_rate
 28 |         self.total_steps = 0
 29 |         self.train_steps = 0
 30 |         self.current_act_steps = 0
 31 |         self.current_train_steps = 0
 32 | 
 33 |         self.test_state = []
 34 |         self.epoch_over = False
 35 | 
 36 |     def choose_action(self):
 37 | 
 38 |         if random.random() >= self.exploration_rate:
 39 |             state = self.memory.get_current_state()
 40 |             q_values = self.network.inference(state)
 41 |             self.train_stats.add_q_values(q_values)
 42 |             return np.argmax(q_values)
 43 |         else:
 44 |             return random.randrange(self.num_actions)
 45 | 
 46 |     def checkGameOver(self):
 47 |         if self.emulator.isGameOver():
 48 |             initial_state = self.emulator.reset()
 49 |             for experience in initial_state:
 50 |                 self.memory.add(experience[0], experience[1], experience[2],
 51 |                                 experience[3])
 52 |             self.train_stats.add_game()
 53 | 
 54 |     def run_random_exploration(self):
 55 | 
 56 |         for step in range(self.random_exploration_length):
 57 | 
 58 |             state, action, reward, terminal, raw_reward = self.emulator.run_step(
 59 |                 random.randrange(self.num_actions))
 60 |             self.train_stats.add_reward(raw_reward)
 61 |             self.memory.add(state, action, reward, terminal)
 62 |             self.checkGameOver()
 63 |             self.total_steps += 1
 64 |             self.current_act_steps += 1
 65 |             if (self.total_steps % self.recording_frequency == 0):
 66 |                 self.train_stats.record(self.total_steps)
 67 | 
 68 |     def train(self, steps):
 69 | 
 70 |         for step in range(steps):
 71 |             states, actions, rewards, next_states, terminals = self.memory.get_batch()
 72 |             loss = self.network.train(states, actions, rewards, next_states,
 73 |                                       terminals)
 74 |             self.train_stats.add_loss(loss)
 75 |             self.train_steps += 1
 76 |             self.current_train_steps += 1
 77 | 
 78 |             if self.train_steps < (
 79 |                 self.final_exploration_frame / self.training_frequency):
 80 |                 self.exploration_rate -= (
 81 |                                          self.exploration_rate - self.final_exploration_rate) / (
 82 |                                          (
 83 |                                          self.final_exploration_frame / self.training_frequency) - self.train_steps)
 84 | 
 85 |             if ((
 86 |                     self.train_steps * self.training_frequency) % self.recording_frequency == 0) and not (
 87 |                 step == steps - 1):
 88 |                 self.train_stats.record(self.random_exploration_length + (
 89 |                 self.train_steps * self.training_frequency))
 90 |                 self.network.record_params(self.random_exploration_length + (
 91 |                 self.train_steps * self.training_frequency))
 92 | 
 93 |         self.epoch_over = True
 94 | 
 95 |     def run_epoch(self, steps, epoch):
 96 | 
 97 |         self.epoch_over = False
 98 |         threading.Thread(target=self.train,
 99 |                          args=(int(steps / self.training_frequency),)).start()
100 | 
101 |         while not self.epoch_over:
102 |             state, action, reward, terminal, raw_reward = self.emulator.run_step(
103 |                 self.choose_action())
104 |             self.memory.add(state, action, reward, terminal)
105 |             self.train_stats.add_reward(raw_reward)
106 |             self.checkGameOver()
107 | 
108 |             self.total_steps += 1
109 |             self.current_act_steps += 1
110 | 
111 |         logging.info("act_steps: {0}".format(self.current_act_steps))
112 |         logging.info("learn_steps: {0}".format(self.current_train_steps))
113 |         self.train_stats.record(self.random_exploration_length + (
114 |         self.train_steps * self.training_frequency))
115 |         self.network.record_params(self.random_exploration_length + (
116 |         self.train_steps * self.training_frequency))
117 |         self.network.save_model(epoch)
118 |         self.current_act_steps = 0
119 |         self.current_train_steps = 0
120 | 
121 |     def test_step(self, observation):
122 | 
123 |         if len(self.test_state) < self.history_length:
124 |             self.test_state.append(observation)
125 | 
126 |         # choose action
127 |         q_values = None
128 |         action = None
129 |         if random.random() >= self.test_exploration_rate:
130 |             state = np.expand_dims(np.transpose(self.test_state, [1, 2, 0]),
131 |                                    axis=0)
132 |             q_values = self.network.gpu_inference(state)
133 |             action = np.argmax(q_values)
134 |         else:
135 |             action = random.randrange(self.num_actions)
136 | 
137 |         self.test_state.pop(0)
138 |         return [action, q_values]
139 | 
140 |     def save_model(self, epoch):
141 |         self.network.save_model(epoch)
142 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/record_stats.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | 
  7 | class RecordStats:
  8 |     def __init__(self, args, test):
  9 | 
 10 |         self.test = test
 11 |         self.reward = 0
 12 |         self.step_count = 0
 13 |         self.loss = 0.0
 14 |         self.loss_count = 0
 15 |         self.games = 0
 16 |         self.q_values = 0.0
 17 |         self.q_count = 0
 18 |         self.current_score = 0
 19 |         self.max_score = -1000000000
 20 |         self.min_score = 1000000000
 21 |         self.recording_frequency = args.recording_frequency
 22 | 
 23 |         with tf.device('/cpu:0'):
 24 |             self.spg = tf.placeholder(tf.float32, shape=[],
 25 |                                       name="score_per_game")
 26 |             self.mean_q = tf.placeholder(tf.float32, shape=[])
 27 |             self.total_gp = tf.placeholder(tf.float32, shape=[])
 28 |             self.max_r = tf.placeholder(tf.float32, shape=[])
 29 |             self.min_r = tf.placeholder(tf.float32, shape=[])
 30 |             self.time = tf.placeholder(tf.float32, shape=[])
 31 | 
 32 |             self.spg_summ = tf.scalar_summary('score_per_game', self.spg)
 33 |             self.q_summ = tf.scalar_summary('q_values', self.mean_q)
 34 |             self.gp_summ = tf.scalar_summary('steps_per_game', self.total_gp)
 35 |             self.max_summ = tf.scalar_summary('maximum_score', self.max_r)
 36 |             self.min_summ = tf.scalar_summary('minimum_score', self.min_r)
 37 |             self.time_summ = tf.scalar_summary('steps_per_second', self.time)
 38 | 
 39 |             if not test:
 40 |                 self.mean_l = tf.placeholder(tf.float32, shape=[], name='loss')
 41 |                 self.l_summ = tf.scalar_summary('loss', self.mean_l)
 42 |                 self.summary_op = tf.merge_summary(
 43 |                     [self.spg_summ, self.q_summ, self.gp_summ, self.l_summ,
 44 |                      self.max_summ, self.min_summ, self.time_summ])
 45 |                 self.path = (
 46 |                 args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/train')
 47 |             else:
 48 |                 self.summary_op = tf.merge_summary(
 49 |                     [self.spg_summ, self.q_summ, self.gp_summ, self.max_summ,
 50 |                      self.min_summ, self.time_summ])
 51 |                 self.path = (
 52 |                 args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/test')
 53 | 
 54 |             # self.summary_op = tf.merge_all_summaries()
 55 |             self.sess = tf.Session()
 56 |             self.summary_writer = tf.train.SummaryWriter(self.path)
 57 |             self.start_time = time.time()
 58 | 
 59 |     def record(self, epoch):
 60 | 
 61 |         seconds = time.time() - self.start_time
 62 | 
 63 |         avg_loss = 0
 64 |         if self.loss_count != 0:
 65 |             avg_loss = self.loss / self.loss_count
 66 |         # print("average loss: {0}".format(avg_loss))
 67 | 
 68 |         mean_q_values = 0
 69 |         if self.q_count > 0:
 70 |             mean_q_values = self.q_values / self.q_count
 71 |         # print("average q_values: {0}".format(mean_q_values))
 72 | 
 73 |         score_per_game = 0.0
 74 |         steps_per_game = 0
 75 | 
 76 |         if self.games == 0:
 77 |             score_per_game = self.reward
 78 |             steps_per_game = self.step_count
 79 |         else:
 80 |             score_per_game = self.reward / self.games
 81 |             steps_per_game = self.step_count / self.games
 82 | 
 83 |         score_per_game = float(score_per_game)
 84 | 
 85 |         if not self.test:
 86 |             step_per_sec = self.recording_frequency / seconds
 87 |             summary_str = self.sess.run(self.summary_op,
 88 |                                         feed_dict={self.spg: score_per_game,
 89 |                                                    self.mean_l: avg_loss,
 90 |                                                    self.mean_q: mean_q_values,
 91 |                                                    self.total_gp: steps_per_game,
 92 |                                                    self.max_r: self.max_score,
 93 |                                                    self.min_r: self.min_score,
 94 |                                                    self.time: step_per_sec})
 95 |             self.summary_writer.add_summary(summary_str, global_step=epoch)
 96 |         else:
 97 |             step_per_sec = self.step_count / seconds
 98 |             summary_str = self.sess.run(self.summary_op,
 99 |                                         feed_dict={self.spg: score_per_game,
100 |                                                    self.mean_q: mean_q_values,
101 |                                                    self.total_gp: steps_per_game,
102 |                                                    self.max_r: self.max_score,
103 |                                                    self.min_r: self.min_score,
104 |                                                    self.time: step_per_sec})
105 |             self.summary_writer.add_summary(summary_str, global_step=epoch)
106 |             current_score = 0
107 | 
108 |         self.reward = 0
109 |         self.step_count = 0
110 |         self.loss = 0
111 |         self.loss_count = 0
112 |         self.games = 0
113 |         self.q_values = 0
114 |         self.q_count = 0
115 |         self.max_score = -1000000000
116 |         self.min_score = 1000000000
117 | 
118 |     def add_reward(self, r):
119 |         self.reward += r
120 |         self.current_score += r
121 | 
122 |         if self.step_count == 0:
123 |             self.start_time = time.time()
124 | 
125 |         self.step_count += 1
126 | 
127 |     def add_loss(self, l):
128 |         self.loss += l
129 |         self.loss_count += 1
130 | 
131 |     def add_game(self):
132 |         self.games += 1
133 | 
134 |         if self.current_score > self.max_score:
135 |             self.max_score = self.current_score
136 |         if self.current_score < self.min_score:
137 |             self.min_score = self.current_score
138 | 
139 |         self.current_score = 0
140 | 
141 |     def add_q_values(self, q_vals):
142 |         mean_q = np.mean(q_vals)
143 |         self.q_values += mean_q
144 |         self.q_count += 1
145 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_0_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_0.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_0_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_1.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_0_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_2.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_0_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_0_3.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_12_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_0.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_1.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_12_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_2.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_12_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_12_3.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_19_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_0.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_19_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_1.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_19_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_2.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_19_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_19_3.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_6_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_0.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_1.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_6_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_2.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/states_examples/state_6_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/states_examples/state_6_3.png


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/brick_hunter_qsub.ckpt-24.meta


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/training_results/saved_models/breakout/dqn/brick_hunter_example/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "brick_hunter_qsub.ckpt-24"
2 | all_model_checkpoint_paths: "brick_hunter_qsub.ckpt-24"
3 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-gym/visuals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | 
 4 | matplotlib.use('TKAgg')
 5 | from matplotlib import pyplot as plt
 6 | import seaborn as sns
 7 | 
 8 | 
 9 | class Visuals:
10 |     def __init__(self, actions):
11 | 
12 |         all_action_names = ['no-op', 'fire', 'up', 'right', 'left', 'down',
13 |                             'up_right', 'up_left', 'down-right', 'down-left',
14 |                             'up-fire', 'right-fire', 'left-fire', 'down-fire',
15 |                             'up-right-fire', 'up-left-fire', 'down-right-fire',
16 |                             'down-left-fire']
17 | 
18 |         action_names = [all_action_names[i] for i in actions]
19 |         self.num_actions = len(actions)
20 |         self.max_q = 1
21 |         self.min_q = 0
22 |         # self.max_avg_q = 1
23 | 
24 |         xlocations = np.linspace(0.5, self.num_actions - 0.5,
25 |                                  num=self.num_actions)
26 |         xlocations = np.append(xlocations, self.num_actions + 0.05)
27 |         if self.num_actions > 7:
28 |             self.fig = plt.figure(figsize=(self.num_actions * 1.1, 6.0))
29 |         else:
30 |             self.fig = plt.figure()
31 |         self.bars = plt.bar(np.arange(self.num_actions),
32 |                             np.zeros(self.num_actions), 0.9)
33 |         plt.xticks(xlocations, action_names + [''])
34 |         plt.ylabel('Expected Future Reward')
35 |         plt.xlabel('Action')
36 |         plt.title("State-Action Values")
37 |         color_palette = sns.color_palette(n_colors=self.num_actions)
38 |         for bar, color in zip(self.bars, color_palette):
39 |             bar.set_color(color)
40 |         self.fig.show()
41 | 
42 |     def update(self, q_values):
43 | 
44 |         for bar, q_value in zip(self.bars, q_values):
45 |             bar.set_height(q_value)
46 |         step_max = np.amax(q_values)
47 |         step_min = np.amin(q_values)
48 |         if step_max > self.max_q:
49 |             self.max_q = step_max
50 |             plt.gca().set_ylim([self.min_q, self.max_q])
51 |         if step_min < self.min_q:
52 |             self.min_q = step_min
53 |             plt.gca().set_ylim([self.min_q, self.max_q])
54 | 
55 |         self.fig.canvas.draw()
56 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | out-*
3 | training-progression
4 | __pycache__


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/Makefile:
--------------------------------------------------------------------------------
1 | train:
2 | 	python ./play_atari.py ./../atari-roms/space_invaders.bin | tee train.log


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/README.md:
--------------------------------------------------------------------------------
 1 | Originally from https://github.com/gtoubassi/dqn-atari
 2 | 
 3 | # DQN Atari
 4 | 
 5 | [![Click to play video](https://img.youtube.com/vi/DqzSrEuA2Jw/1.jpg)](https://www.youtube.com/watch?v=DqzSrEuA2Jw)
 6 | 
 7 | This repo represents my attempt to reproduce the DeepMind Atari playing agent described in the recent [Nature paper](http://home.uchicago.edu/~arij/journalclub/papers/2015_Mnih_et_al.pdf).
 8 | 
 9 | While the DeepMind implementation is built in [lua with torch7](https://github.com/kuz/DeepMind-Atari-Deep-Q-Learner), this implementation uses [TensorFlow](http://tensorflow.org).  Like DeepMind, it also depends on the [Arcade Learning Environment](http://www.arcadelearningenvironment.org/) (technically I believe DeepMind uses their [Xitari](https://github.com/deepmind/xitari) fork of ALE).
10 | 
11 | ### Results
12 | 
13 | I have been focused on attempting to match DeepMind's performance on Space Invaders, which in their publication is 1976+/-800, though I do not know exactly how they compute those results.  For my results I compute average/stdev over the final 20 evals of the training regime.  I did a run with the DeepMind code ([results here](https://docs.google.com/spreadsheets/d/1IKfiD9wQVXtx8q9RJk52x8HtTnqsbeJqa1ioS_bh-k8/edit?usp=sharing)) and by this measure saw results of 1428+/189.  My current results are far short at 1139+/-138 (random agent scores ~150).  Thus far I have not found anyone that has reproduced the DeepMind results using the approach described in the Nature paper.  If you've done it, particularly with TensorFlow, let me know!
14 | 
15 | I have also tried breakout and got a score of 284+/-78 but that was an older version with the wrong target network update frequency. (DeepMind reported 400+/-30 using their eval method).
16 | 
17 | I have also experimented with compressing experience replay to have larger capacity than 1M.  Both breakout and space invaders show ~10% improvement with 4M and 3M respectively.
18 | 
19 | A publicly viewable google spreadsheet has [results](https://docs.google.com/spreadsheets/d/1RZM2qhKQaXaud4S2ILsRVukmiPCjM-xtJTuPRpb96HY/edit#gid=2001383367) for various experiments I have run.
20 | 
21 | ### Running
22 | 
23 | 1. Get Python and Tensorflow running, preferably on a GPU (see notes on [AWS setup](https://github.com/gtoubassi/dqn-atari/wiki/Setting-up-TensorFlow-on-AWS-GPU)).
24 | 2. Install the arcade learning environment (see [wiki](https://github.com/gtoubassi/dqn-atari/wiki/Installing-ALE))
25 | 3. Install dqn-atari specific dependencies, currently just ``sudo pip install blosc``
26 | 4. Download a game rom, and name it properly like space_invaders.bin (all lower case ending in bin -- the names must match for ALE).
27 | 5. Get the repo:
28 | 
29 |         git clone https://github.com/gtoubassi/dqn-atari.git
30 | 
31 | 5. Run it!  The default parameters attempt to mimic the Nature paper configuration:
32 | 
33 |         cd dqn-atari
34 | 	    python ./play_atari.py ~/space_invaders.bin | tee train.log
35 | 
36 | 6. Periodically check progress
37 | 
38 |         ./logstats.sh train.log
39 | 
40 | ### References
41 | 
42 | The following were very helpful:
43 | 
44 | * [Overview of Deep Q Learning](http://www.nervanasys.com/demystifying-deep-reinforcement-learning/)
45 | * David Silver's [Introduction to Reinforcement Learning](https://www.youtube.com/watch?v=2pWv7GOvuf0&list=PL5X3mDkKaJrL42i_jhE4N-p6E2Ol62Ofa)
46 | * [deep_rl_ale](https://github.com/Jabberwockyll/deep_rl_ale)
47 | * [Flabbybird agent using TensorFlow](https://github.com/yenchenlin1994/DeepLearningFlappyBird)
48 | * [Space Invaders using Theano](http://maciejjaskowski.github.io/2016/03/09/space-invaders.html)
49 | * [Deep Q Learning Google Group](https://groups.google.com/forum/#!forum/deep-q-learning)
50 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/atari_environment.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | import os
  4 | import random
  5 | from state import State
  6 | from ale_python_interface import ALEInterface
  7 | 
  8 | # Terminology in this class:
  9 | #   Episode: the span of one game life
 10 | #   Game: an ALE game (e.g. in space invaders == 3 Episodes or 3 Lives)
 11 | #   Frame: An ALE frame (e.g. 60 fps)
 12 | #   Step: An Environment step (e.g. covers 4 frames)
 13 | #
 14 | class AtariEnvironment:
 15 |     
 16 |     def __init__(self, args, outputDir):
 17 |         
 18 |         self.outputDir = outputDir
 19 |         self.screenCaptureFrequency = args.screen_capture_freq
 20 |         
 21 |         self.ale = ALEInterface()
 22 |         self.ale.setInt(b'random_seed', 123456)
 23 |         random.seed(123456)
 24 |         # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
 25 |         self.ale.setFloat(b'repeat_action_probability', 0.0)
 26 | 
 27 |         # Load the ROM file
 28 |         self.ale.loadROM(args.rom.encode('UTF-8'))
 29 | 
 30 |         self.actionSet = self.ale.getMinimalActionSet()
 31 |         self.gameNumber = 0
 32 |         self.stepNumber = 0
 33 |         self.resetGame()
 34 | 
 35 |     def getNumActions(self):
 36 |         return len(self.actionSet)
 37 | 
 38 |     def getState(self):
 39 |         return self.state
 40 |     
 41 |     def getGameNumber(self):
 42 |         return self.gameNumber
 43 |     
 44 |     def getFrameNumber(self):
 45 |         return self.ale.getFrameNumber()
 46 |     
 47 |     def getEpisodeFrameNumber(self):
 48 |         return self.ale.getEpisodeFrameNumber()
 49 |     
 50 |     def getEpisodeStepNumber(self):
 51 |         return self.episodeStepNumber
 52 |     
 53 |     def getStepNumber(self):
 54 |         return self.stepNumber
 55 |     
 56 |     def getGameScore(self):
 57 |         return self.gameScore
 58 | 
 59 |     def isGameOver(self):
 60 |         return self.ale.game_over()
 61 | 
 62 |     def step(self, action):
 63 |         previousLives = self.ale.lives()
 64 |         reward = 0
 65 |         isTerminal = 0
 66 |         self.stepNumber += 1
 67 |         self.episodeStepNumber += 1
 68 |         
 69 |         for i in range(4):
 70 |             prevScreenRGB = self.ale.getScreenRGB()
 71 |             reward += self.ale.act(self.actionSet[action])
 72 |             screenRGB = self.ale.getScreenRGB()
 73 |     
 74 |             # Detect end of episode, I don't think I'm handling this right in terms
 75 |             # of the overall game loop (??)
 76 |             if self.ale.lives() < previousLives or self.ale.game_over():
 77 |                 isTerminal = 1
 78 |                 break
 79 | 
 80 |             if self.gameNumber % self.screenCaptureFrequency == 0:
 81 |                 dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
 82 |                 if not os.path.isdir(dir):
 83 |                     logging.info("Capturing screen this iteration into "+self.outputDir + '/screen_cap/')
 84 |                     os.makedirs(dir)
 85 |                 self.ale.saveScreenPNG((dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())).encode('UTF-8'))
 86 | 
 87 | 
 88 |         maxedScreen = np.maximum(screenRGB, prevScreenRGB)
 89 |         self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
 90 |         self.gameScore += reward
 91 |         return reward, self.state, isTerminal
 92 | 
 93 |     def resetGame(self):
 94 |         if self.ale.game_over():
 95 |             self.gameNumber += 1
 96 |         self.ale.reset_game()
 97 |         self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
 98 |         self.gameScore = 0
 99 |         self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number
100 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/encode_ffmpeg:
--------------------------------------------------------------------------------
1 | ffmpeg -r 60 -f image2 -i frame-%06d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p video.mp4


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/logging_colorer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Console coloring of logs
  3 | """
  4 | 
  5 | # thanks stackoverflow :-P https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
  6 | import logging
  7 | 
  8 | # now we patch Python code to add color support to logging.StreamHandler
  9 | import sys
 10 | 
 11 | 
 12 | def add_coloring_to_emit_windows(fn):
 13 |         # add methods we need to the class
 14 |     def _out_handle(self):
 15 |         import ctypes
 16 |         return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 17 |     out_handle = property(_out_handle)
 18 | 
 19 |     def _set_color(self, code):
 20 |         import ctypes
 21 |         # Constants from the Windows API
 22 |         self.STD_OUTPUT_HANDLE = -11
 23 |         hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
 24 |         ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code)
 25 | 
 26 |     setattr(logging.StreamHandler, '_set_color', _set_color)
 27 | 
 28 |     def new(*args):
 29 |         FOREGROUND_BLUE      = 0x0001 # text color contains blue.
 30 |         FOREGROUND_GREEN     = 0x0002 # text color contains green.
 31 |         FOREGROUND_RED       = 0x0004 # text color contains red.
 32 |         FOREGROUND_INTENSITY = 0x0008 # text color is intensified.
 33 |         FOREGROUND_WHITE     = FOREGROUND_BLUE|FOREGROUND_GREEN |FOREGROUND_RED
 34 |        # winbase.h
 35 |         STD_INPUT_HANDLE = -10
 36 |         STD_OUTPUT_HANDLE = -11
 37 |         STD_ERROR_HANDLE = -12
 38 | 
 39 |         # wincon.h
 40 |         FOREGROUND_BLACK     = 0x0000
 41 |         FOREGROUND_BLUE      = 0x0001
 42 |         FOREGROUND_GREEN     = 0x0002
 43 |         FOREGROUND_CYAN      = 0x0003
 44 |         FOREGROUND_RED       = 0x0004
 45 |         FOREGROUND_MAGENTA   = 0x0005
 46 |         FOREGROUND_YELLOW    = 0x0006
 47 |         FOREGROUND_GREY      = 0x0007
 48 |         FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified.
 49 | 
 50 |         BACKGROUND_BLACK     = 0x0000
 51 |         BACKGROUND_BLUE      = 0x0010
 52 |         BACKGROUND_GREEN     = 0x0020
 53 |         BACKGROUND_CYAN      = 0x0030
 54 |         BACKGROUND_RED       = 0x0040
 55 |         BACKGROUND_MAGENTA   = 0x0050
 56 |         BACKGROUND_YELLOW    = 0x0060
 57 |         BACKGROUND_GREY      = 0x0070
 58 |         BACKGROUND_INTENSITY = 0x0080 # background color is intensified.
 59 | 
 60 |         levelno = args[1].levelno
 61 |         if(levelno>=50):
 62 |             color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY
 63 |         elif(levelno>=40):
 64 |             color = FOREGROUND_RED | FOREGROUND_INTENSITY
 65 |         elif(levelno>=30):
 66 |             color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
 67 |         elif(levelno>=20):
 68 |             color = FOREGROUND_GREEN
 69 |         elif(levelno>=10):
 70 |             color = FOREGROUND_MAGENTA
 71 |         else:
 72 |             color =  FOREGROUND_WHITE
 73 |         args[0]._set_color(color)
 74 | 
 75 |         ret = fn(*args)
 76 |         args[0]._set_color( FOREGROUND_WHITE )
 77 |         #print "after"
 78 |         return ret
 79 |     return new
 80 | 
 81 | def add_coloring_to_emit_ansi(fn):
 82 |     # add methods we need to the class
 83 |     def new(*args):
 84 |         levelno = args[1].levelno
 85 |         if(levelno>=50):
 86 |             color = '\x1b[31m' # red
 87 |         elif(levelno>=40):
 88 |             color = '\x1b[31m' # red
 89 |         elif(levelno>=30):
 90 |             color = '\x1b[33m' # yellow
 91 |         elif(levelno>=20):
 92 |             color = '\x1b[32m' # green
 93 |         elif(levelno>=10):
 94 |             color = '\x1b[35m' # pink
 95 |         else:
 96 |             color = '\x1b[0m' # normal
 97 |         args[1].msg = color + args[1].msg[:200] +  '\x1b[0m'  # normal
 98 |         #print "after"
 99 |         return fn(*args)
100 |     return new
101 | 
102 | import platform
103 | if platform.system()=='Windows':
104 |     # Windows does not support ANSI escapes and we are using API calls to set the console color
105 |     logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
106 | else:
107 |     # all non-Windows platforms are supporting ANSI escapes so we use them
108 |     logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit)
109 |     #log = logging.getLogger()
110 |     #log.addFilter(log_filter())
111 |     #//hdlr = logging.StreamHandler()
112 |     #//hdlr.setFormatter(formatter())
113 | 
114 | def init_logging(verbose=True, log_file='app.log'):
115 |     """
116 |     Initialize settings for Python logger
117 | 
118 |     This allows for logging into console as well as specified log_file.
119 |     After you can use in the code just::
120 | 
121 |         import logging
122 |         logging.info("hello world!")
123 |         # will produce
124 |         # 2016-07-19 16:13:02,931 [MainThread  ][INFO ]:  hello world!
125 | 
126 |     :param bool verbose:
127 |     :param str log_file:
128 |     """
129 | 
130 |     # let's log the same output to console and to file
131 |     log_file = log_file
132 |     format = "%(asctime)s [%(threadName)-12.12s][%(levelname)-5.5s]:  %(message)s"
133 | 
134 |     root_logger = logging.getLogger()
135 |     logging.basicConfig(
136 |         format=format,
137 |         level=logging.DEBUG if verbose else logging.WARNING,
138 |         stream=sys.stderr)
139 | 
140 |     log_formatter = logging.Formatter(format)
141 |     file_handler = logging.FileHandler(log_file)
142 |     file_handler.setFormatter(log_formatter)
143 |     root_logger.addHandler(file_handler)


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/logstats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo -n "Number of episodes:       "
 4 | grep Episode $1 | awk '{print $2}' | tail -1
 5 | 
 6 | echo -n "Number of frames:         "
 7 | grep Episode $1 | awk '{print $7}' | tr -d '(' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{printf "%'"'"'d\n",  SUM}'
 8 | 
 9 | echo -n "Average score first 50:   "
10 | grep Episode $1 | head -50 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
11 | 
12 | echo -n "Average score last 50:    "
13 | grep Episode $1 | tail -50 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
14 | 
15 | echo -n "Average score first 100:  "
16 | grep Episode $1 | head -100 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
17 | 
18 | echo -n "Average score last 100:   "
19 | grep Episode $1 | tail -100 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
20 | 
21 | echo -n "Average score first 250:  "
22 | grep Episode $1 | head -250 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
23 | 
24 | echo -n "Average score last 250:   "
25 | grep Episode $1 | tail -250 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
26 | 
27 | echo -n "Average score first 500:  "
28 | grep Episode $1 | head -500 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
29 | 
30 | echo -n "Average score last 500:   "
31 | grep Episode $1 | tail -500 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
32 | 
33 | echo -n "Average score first 1000: "
34 | grep Episode $1 | head -1000 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
35 | 
36 | echo -n "Average score last 1000:  "
37 | grep Episode $1 | tail -1000 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
38 | 
39 | echo -n "Average score for ALL:    "
40 | grep Episode $1 | awk '{print $6}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
41 | 
42 | echo -n "Best scores ever:         "
43 | grep Episode $1 | awk '{print $6}' | sort -n | tail -5 | sort -rn | tr  '\n' ',' | sed 's#,$#\n#g' | sed 's#,#, #g'
44 | 
45 | echo -n "Recent eval runs:         "
46 | grep Average.eval $1 | tail -5 | awk '{print $NF}' | tr  '\n' ',' | sed 's#,$#\n#g' | sed 's#,#, #g'
47 | 
48 | echo -n "Average of last 20 evals: "
49 | grep Average.eval $1 | tail -20 | awk '{print $NF}' | awk '{SUM=SUM+$1;COUNT=COUNT+1} END{print SUM/COUNT}'
50 | 
51 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/play_atari.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | import logging
  4 | import sys
  5 | import numpy as np
  6 | import os
  7 | import random
  8 | from tqdm import tqdm
  9 | import replay
 10 | import time
 11 | import argparse
 12 | import dqn
 13 | from atari_environment import AtariEnvironment
 14 | from state import State
 15 | import logging_colorer
 16 | 
 17 | logging_colorer.init_logging()
 18 | 
 19 | parser = argparse.ArgumentParser()
 20 | parser.add_argument("--train-epoch-steps", type=int, default=250000, help="how many steps (=4 frames) to run during a training epoch (approx -- will finish current game)")
 21 | parser.add_argument("--eval-epoch-steps", type=int, default=125000, help="how many steps (=4 frames) to run during an eval epoch (approx -- will finish current game)")
 22 | parser.add_argument("--replay-capacity", type=int, default=1000000, help="how many states to store for future training")
 23 | parser.add_argument("--prioritized-replay", action='store_true', help="Prioritize interesting states when training (e.g. terminal or non zero rewards)")
 24 | parser.add_argument("--compress-replay", action='store_true', help="if set replay memory will be compressed with blosc, allowing much larger replay capacity")
 25 | parser.add_argument("--normalize-weights", action='store_true', default=True, help="if set weights/biases are normalized like torch, with std scaled by fan in to the node")
 26 | parser.add_argument("--screen-capture-freq", type=int, default=50, help="record screens for a game this often")
 27 | parser.add_argument("--save-model-freq", type=int, default=5000, help="save the model once per 10000 training sessions")
 28 | parser.add_argument("--observation-steps", type=int, default=50000, help="train only after this many stesp (=4 frames)")
 29 | parser.add_argument("--learning-rate", type=float, default=0.00025, help="learning rate (step size for optimization algo)")
 30 | parser.add_argument("--target-model-update-freq", type=int, default=10000, help="how often (in steps) to update the target model.  Note nature paper says this is in 'number of parameter updates' but their code says steps. see tinyurl.com/hokp4y8")
 31 | parser.add_argument("--model", help="tensorflow model checkpoint file to initialize from")
 32 | parser.add_argument("rom", help="rom file to run")
 33 | args = parser.parse_args()
 34 | 
 35 | print('Arguments: %s' % (args))
 36 | 
 37 | game_name = os.path.splitext(os.path.split(args.rom)[1])[0]
 38 | baseOutputDir = 'out-'+ game_name + '-' + time.strftime("%Y-%m-%d-%H-%M-%S")
 39 | os.makedirs(baseOutputDir)
 40 | logging.info("Training game "+game_name)
 41 | logging.info("Storing training into "+baseOutputDir)
 42 | 
 43 | State.setup(args)
 44 | 
 45 | environment = AtariEnvironment(args, baseOutputDir)
 46 | 
 47 | dqn_network = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args)
 48 | 
 49 | replayMemory = replay.ReplayMemory(args)
 50 | 
 51 | def runEpoch(minEpochSteps, evalWithEpsilon=None):
 52 |     logging.info('Running epoch with min epoch steps: %d' % minEpochSteps)
 53 |     stepStart = environment.getStepNumber()
 54 |     isTraining = True if evalWithEpsilon is None else False
 55 |     startGameNumber = environment.getGameNumber()
 56 |     epochTotalScore = 0
 57 | 
 58 |     pbar = tqdm(total=minEpochSteps)
 59 |     while environment.getStepNumber() - stepStart < minEpochSteps:
 60 |         startTime = lastLogTime = time.time()
 61 |         stateReward = 0
 62 |         state = None
 63 | 
 64 |         while not environment.isGameOver():
 65 |             # Choose next action
 66 |             if evalWithEpsilon is None:
 67 |                 epsilon = max(.1, 1.0 - 0.9 * environment.getStepNumber() / 1e6)
 68 |             else:
 69 |                 epsilon = evalWithEpsilon
 70 | 
 71 |             if state is None or random.random() > (1 - epsilon):
 72 |                 action = random.randrange(environment.getNumActions())
 73 |             else:
 74 |                 screens = np.reshape(state.getScreens(), (1, 84, 84, 4))
 75 |                 action = dqn_network.inference(screens)
 76 | 
 77 |             # Make the move
 78 |             oldState = state
 79 |             reward, state, isTerminal = environment.step(action)
 80 |             pbar.update(4)
 81 | 
 82 |             # Record experience in replay memory and train
 83 |             if isTraining and oldState is not None:
 84 |                 clippedReward = min(1, max(-1, reward))
 85 |                 replayMemory.addSample(replay.Sample(oldState, action, clippedReward, state, isTerminal))
 86 | 
 87 |                 if environment.getStepNumber() > args.observation_steps and environment.getEpisodeStepNumber() % 4 == 0:
 88 |                     batch = replayMemory.drawBatch(32)
 89 |                     dqn_network.train(batch, environment.getStepNumber())
 90 | 
 91 |             if time.time() - lastLogTime > 60:
 92 |                 print(('  ...frame %d' % environment.getEpisodeFrameNumber()))
 93 |                 lastLogTime = time.time()
 94 | 
 95 |             if isTerminal:
 96 |                 state = None
 97 | 
 98 |         episodeTime = time.time() - startTime
 99 |         logging.info(('%s %d ended with score: %d (%d frames in %fs for %d fps)' %
100 |             ('Episode' if isTraining else 'Eval', environment.getGameNumber(), environment.getGameScore(),
101 |             environment.getEpisodeFrameNumber(), episodeTime, environment.getEpisodeFrameNumber() / episodeTime)))
102 | 
103 |         epochTotalScore += environment.getGameScore()
104 |         environment.resetGame()
105 | 
106 |     pbar.close()
107 | 
108 |     # return the average score
109 |     return epochTotalScore / (environment.getGameNumber() - startGameNumber)
110 | 
111 | 
112 | try:
113 |     while True:
114 |         aveScore = runEpoch(args.train_epoch_steps) #train
115 |         print(('Average training score: %d' % (aveScore)))
116 |         aveScore = runEpoch(args.eval_epoch_steps, evalWithEpsilon=.05) #eval
117 |         print(('Average eval score: %d' % (aveScore)))
118 | except KeyboardInterrupt or TypeError:
119 |     dqn_network.save_snapshot(environment.getStepNumber())


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/replay.py:
--------------------------------------------------------------------------------
  1 | import bisect
  2 | import math
  3 | import random
  4 | 
  5 | class Sample:
  6 |     
  7 |     def __init__(self, state1, action, reward, state2, terminal):
  8 |         self.state1 = state1
  9 |         self.action = action
 10 |         self.reward = reward
 11 |         self.state2 = state2
 12 |         self.terminal = terminal
 13 |         self.weight = 1
 14 |         self.cumulativeWeight = 1
 15 | 
 16 |     def isInteresting(self):
 17 |         return self.terminal or self.reward != 0
 18 | 
 19 |     def __cmp__(self, obj):
 20 |         return self.cumulativeWeight - obj.cumulativeWeight
 21 | 
 22 | 
 23 | class ReplayMemory:
 24 |     
 25 |     def __init__(self, args):
 26 |         self.samples = []
 27 |         self.maxSamples = args.replay_capacity
 28 |         self.prioritizedReplay = args.prioritized_replay
 29 |         self.numInterestingSamples = 0
 30 |         self.batchesDrawn = 0
 31 | 
 32 |     def numSamples():
 33 |         return len(self.samples)
 34 | 
 35 |     def addSample(self, sample):
 36 |         self.samples.append(sample)
 37 |         self._updateWeightsForNewlyAddedSample()
 38 |         self._truncateListIfNecessary()
 39 | 
 40 |     def _updateWeightsForNewlyAddedSample(self):
 41 |         if len(self.samples) > 1:
 42 |             self.samples[-1].cumulativeWeight = self.samples[-1].weight + self.samples[-2].cumulativeWeight
 43 | 
 44 |         if self.samples[-1].isInteresting():
 45 |             self.numInterestingSamples += 1
 46 |             
 47 |             # Boost the neighboring samples.  How many samples?  Roughly the number of samples
 48 |             # that are "uninteresting".  Meaning if interesting samples occur 3% of the time, then boost 33
 49 |             uninterestingSampleRange = max(1, len(self.samples) / max(1, self.numInterestingSamples))
 50 |             for i in range(int(uninterestingSampleRange), 0, -1):
 51 |                 index = len(self.samples) - i
 52 |                 if index < 1:
 53 |                     break
 54 |                 # This is an exponential that ranges from 3.0 to 1.01 over the domain of [0, uninterestingSampleRange]
 55 |                 # So the interesting sample gets a 3x boost, and the one furthest away gets a 1% boost
 56 |                 boost = 1.0 + 3.0/(math.exp(i/(uninterestingSampleRange/6.0)))
 57 |                 self.samples[index].weight *= boost
 58 |                 self.samples[index].cumulativeWeight = self.samples[index].weight + self.samples[index - 1].cumulativeWeight
 59 |     
 60 |     def _truncateListIfNecessary(self):
 61 |         # premature optimizastion alert :-), don't truncate on each
 62 |         # added sample since (I assume) it requires a memcopy of the list (probably 8mb)
 63 |         if len(self.samples) > self.maxSamples * 1.05:
 64 |             truncatedWeight = 0
 65 |             # Before truncating the list, correct self.numInterestingSamples, and prepare
 66 |             # for correcting the cumulativeWeights of the remaining samples
 67 |             for i in range(self.maxSamples, len(self.samples)):
 68 |                 truncatedWeight += self.samples[i].weight
 69 |                 if self.samples[i].isInteresting():
 70 |                     self.numInterestingSamples -= 1
 71 | 
 72 |             # Truncate the list
 73 |             self.samples = self.samples[(len(self.samples) - self.maxSamples):]
 74 |             
 75 |             # Correct cumulativeWeights
 76 |             for sample in self.samples:
 77 |                 sample.cumulativeWeight -= truncatedWeight
 78 |     
 79 |     def drawBatch(self, batchSize):
 80 |         if batchSize > len(self.samples):
 81 |             raise IndexError('Too few samples (%d) to draw a batch of %d' % (len(self.samples), batchSize))
 82 |         
 83 |         self.batchesDrawn += 1
 84 |         
 85 |         if self.prioritizedReplay:
 86 |             return self._drawPrioritizedBatch(batchSize)
 87 |         else:
 88 |             return random.sample(self.samples, batchSize)
 89 | 
 90 |     # The nature paper doesn't do this but they mention the idea.
 91 |     # This particular approach and the weighting I am using is a total
 92 |     # uninformed fabrication on my part.  There is probably a more
 93 |     # principled way to do this
 94 |     def _drawPrioritizedBatch(self, batchSize):
 95 |         batch = []
 96 |         probe = Sample(None, 0, 0, None, False)
 97 |         while len(batch) < batchSize:
 98 |             probe.cumulativeWeight = random.uniform(0, self.samples[-1].cumulativeWeight)
 99 |             index = bisect.bisect_right(self.samples, probe, 0, len(self.samples) - 1)
100 |             sample = self.samples[index]
101 |             sample.weight = max(1, .8 * sample.weight)
102 |             if sample not in batch:
103 |                 batch.append(sample)
104 | 
105 |         if self.batchesDrawn % 100 == 0:
106 |             cumulative = 0
107 |             for sample in self.samples:
108 |                 cumulative += sample.weight
109 |                 sample.cumulativeWeight = cumulative
110 |         return batch
111 |     
112 |     def _printBatchWeight(self, batch):
113 |         batchWeight = 0
114 |         for i in range(0, len(batch)):
115 |             batchWeight += batch[i].weight
116 |         print(('batch weight: %f' % batchWeight))
117 | 


--------------------------------------------------------------------------------
/showcase-studies/dqn-simple/state.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.ndimage as ndimage
 3 | import blosc
 4 | import png
 5 | 
 6 | class State:
 7 | 
 8 |     useCompression = False
 9 | 
10 |     @staticmethod
11 |     def setup(args):
12 |         State.useCompression = args.compress_replay
13 | 
14 |     def stateByAddingScreen(self, screen, frameNumber):
15 |         screen = np.dot(screen, np.array([.299, .587, .114])).astype(np.uint8)
16 |         screen = ndimage.zoom(screen, (0.4, 0.525))
17 |         screen.resize((84, 84, 1))
18 |         #self.saveScreenAsPNG('screen', screen, frameNumber)
19 |         
20 |         if State.useCompression:
21 |             screen = blosc.compress(np.reshape(screen, 84 * 84).tobytes(), typesize=1)
22 | 
23 |         newState = State()
24 |         if hasattr(self, 'screens'):
25 |             newState.screens = self.screens[:3]
26 |             newState.screens.insert(0, screen)
27 |         else:
28 |             newState.screens = [screen, screen, screen, screen]
29 |         return newState
30 |     
31 |     def getScreens(self):
32 |         if State.useCompression:
33 |             s = []
34 |             for i in range(4):
35 |                 s.append(np.reshape(np.fromstring(blosc.decompress(self.screens[i]), dtype=np.uint8), (84, 84, 1)))
36 |         else:
37 |             s = self.screens
38 |         return np.concatenate(s, axis=2)
39 |     
40 |     def saveScreenAsPNG(self, basefilename, screen, frameNumber):
41 |         pngfile = open(basefilename + ('-%08d.png' % frameNumber), 'wb')
42 |         pngWriter = png.Writer(screen.shape[1], screen.shape[0], greyscale=True)
43 |         pngWriter.write(pngfile, screen)
44 |         pngfile.close()
45 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.sgf
 3 | *.tar.bz2
 4 | *.npz
 5 | *.swp
 6 | *.bin
 7 | data/
 8 | play/
 9 | work/
10 | cgos/
11 | kgsGtp/
12 | 
13 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/CGOSEngine.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Engine import *
  4 | from HelperEngine import HelperEngine
  5 | 
  6 | # forwards commands to both a main engine
  7 | # and a helper engine. When picking a move,
  8 | # we first ask the helper engine. If it passes,
  9 | # we pass. Otherwise we ask the main engine
 10 | class CGOSEngine(BaseEngine):
 11 |     def __init__(self, engine):
 12 |         self.engine = engine
 13 |         self.helper= HelperEngine()
 14 | 
 15 |     # subclasses must override this
 16 |     def name(self):
 17 |         return self.engine.name()
 18 | 
 19 |     # subclasses must override this
 20 |     def version(self):
 21 |         return self.engine.version()
 22 | 
 23 |     def set_board_size(self, N):
 24 |         return self.engine.set_board_size(N) and \
 25 |                self.helper.set_board_size(N)
 26 | 
 27 |     def clear_board(self):
 28 |         self.engine.clear_board()
 29 |         self.helper.clear_board()
 30 |         self.cleanup_mode = False
 31 | 
 32 |     def set_komi(self, komi):
 33 |         self.engine.set_komi(komi)
 34 |         self.helper.set_komi(komi)
 35 | 
 36 |     def player_passed(self, color):
 37 |         self.engine.player_passed(color)
 38 |         self.helper.player_passed(color)
 39 | 
 40 |     def stone_played(self, x, y, color):
 41 |         self.engine.stone_played(x, y, color)
 42 |         self.helper.stone_played(x, y, color)
 43 | 
 44 |     def generate_move(self, color, cleanup=False):
 45 |         # enter cleanup mode if helper passes.
 46 |         # if it resigns, resign.
 47 |         if not self.cleanup_mode:
 48 |             self.helper.set_level(5)
 49 |             move = self.helper.generate_move(color, cleanup=False)
 50 |             if move.is_pass(): 
 51 |                 print "CGOSEngine: helper passed! Entering cleanup mode."
 52 |                 self.cleanup_mode = True
 53 |             elif move.is_resign(): 
 54 |                 print "CGOSEngine: helper resigned! Resigning."
 55 |                 return Move.Resign
 56 |             else: # helper didn't pass or resign
 57 |                 self.helper.undo() # helper must support this
 58 | 
 59 |         # in cleanup mode, moves are made by helper_cleanup
 60 |         if self.cleanup_mode:
 61 |             print "CGOSEngine: In cleanup mode: using helper to generate move."
 62 |             self.helper.set_level(10)
 63 |             move = self.helper.generate_move(color, cleanup=True)
 64 |             self.engine.move_was_played(move)
 65 |             return move
 66 | 
 67 |         # otherwise, moves are made by the main engine
 68 |         print "CGOSEngine: Generating move using main engine."
 69 |         move = self.engine.generate_move(color)
 70 |         if move.is_play(): 
 71 |             self.helper.stone_played(move.x, move.y, color)
 72 |         elif move.is_pass(): 
 73 |             self.helper.player_passed(color)
 74 |         return move
 75 | 
 76 |     def undo(self):
 77 |         self.engine.undo()
 78 |         self.helper.undo()
 79 | 
 80 |     def quit(self):
 81 |         self.engine.quit()
 82 |         self.helper.quit()
 83 | 
 84 |     def supports_final_status_list(self):
 85 |         return True
 86 | 
 87 |     def final_status_list(self, status):
 88 |         return self.helper.final_status_list(status)
 89 | 
 90 |     def final_score(self):
 91 |         return self.helper.final_score()
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     import GTP
 96 |     fclient = GTP.redirect_all_output("log_engine.txt")
 97 | 
 98 |     import MoveModels
 99 |     from TFEngine import TFEngine
100 |     from Book import PositionRecord
101 |     from Book import MoveRecord
102 |     
103 |     engine = CGOSEngine(TFEngine("conv12posdepELU", MoveModels.Conv12PosDepELU(N=19, Nfeat=21)))
104 |     
105 |     gtp = GTP.GTP(engine, fclient)
106 |     gtp.loop()
107 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Checkpoint.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def restore_from_checkpoint(sess, saver, ckpt_dir):
 4 |     print "Trying to restore from checkpoint in dir", ckpt_dir
 5 |     ckpt = tf.train.get_checkpoint_state(ckpt_dir)
 6 |     if ckpt and ckpt.model_checkpoint_path:
 7 |         print "Checkpoint file is ", ckpt.model_checkpoint_path
 8 |         saver.restore(sess, ckpt.model_checkpoint_path)
 9 |         global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
10 |         print "Restored from checkpoint %s" % global_step
11 |         return global_step
12 |     else:
13 |         print "No checkpoint file found"
14 |         assert False
15 | 
16 | def optionally_restore_from_checkpoint(sess, saver, train_dir):
17 |     while True:
18 |         response = raw_input("Restore from checkpoint [y/n]? ").lower()
19 |         if response == 'y': 
20 |             return restore_from_checkpoint(sess, saver, train_dir)
21 |         if response == 'n':
22 |             return 0
23 | 
24 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Engine.py:
--------------------------------------------------------------------------------
  1 | from Board import Board
  2 | from GTP import Move
  3 | import copy
  4 | 
  5 | class BaseEngine(object):
  6 |     def __init__(self):
  7 |         self.board = None
  8 |         self.opponent_passed = False
  9 |         self.state_stack = []
 10 | 
 11 |     def push_state(self):
 12 |         self.state_stack.append(copy.deepcopy(self.board))
 13 | 
 14 |     def pop_state(self):
 15 |         self.board = self.state_stack.pop()
 16 |         self.opponent_passed = False
 17 | 
 18 |     def undo(self):
 19 |         if len(self.state_stack) > 0:
 20 |             self.pop_state()
 21 |             print "BaseEngine: after undo, board is"
 22 |             self.board.show()
 23 |         else:
 24 |             print "BaseEngine: undo called, but state_stack is empty. Board is"
 25 |             self.board.show()
 26 | 
 27 |     # subclasses must override this
 28 |     def name(self):
 29 |         assert False
 30 | 
 31 |     # subclasses must override this
 32 |     def version(self):
 33 |         assert False
 34 | 
 35 |     # subclasses may override to only accept
 36 |     # certain board sizes. They should call this
 37 |     # base method.
 38 |     def set_board_size(self, N):
 39 |         self.board = Board(N)
 40 |         return True
 41 | 
 42 |     def clear_board(self):
 43 |         self.board.clear()
 44 |         self.state_stack = []
 45 |         self.opponent_passed = False
 46 | 
 47 |     def set_komi(self, komi):
 48 |         self.komi = float(komi)
 49 | 
 50 |     def player_passed(self, color):
 51 |         self.push_state()
 52 |         self.board.play_pass()
 53 |         self.opponent_passed = True
 54 | 
 55 |     def stone_played(self, x, y, color):
 56 |         self.push_state()
 57 |         self.board.play_stone(x, y, color)
 58 |         self.opponent_passed = False
 59 |         self.board.show()
 60 | 
 61 |     def move_was_played(self, move):
 62 |         if move.is_play():
 63 |             self.stone_played(move.x, move.y, self.board.color_to_play)
 64 |         elif move.is_pass():
 65 |             self.player_passed(self.board.color_to_play)
 66 | 
 67 |     # subclasses must override this
 68 |     def pick_move(self, color):
 69 |         assert False
 70 | 
 71 |     def generate_move(self, color, cleanup=False):
 72 |         move = self.pick_move(color)
 73 |         self.push_state()
 74 |         if move.is_play():
 75 |             self.board.play_stone(move.x, move.y, color)
 76 |         self.board.show()
 77 |         return move
 78 | 
 79 |     def quit(self):
 80 |         pass
 81 | 
 82 |     def supports_final_status_list(self):
 83 |         return False
 84 | 
 85 | 
 86 | class IdiotEngine(BaseEngine):
 87 |     def __init__(self):
 88 |         super(IdiotEngine,self).__init__() 
 89 | 
 90 |     def name(self):
 91 |         return "IdiotEngine"
 92 | 
 93 |     def version(self):
 94 |         return "1.0"
 95 | 
 96 |     def pick_move(self, color):
 97 |         for x in xrange(self.board.N):
 98 |             for y in xrange(self.board.N):
 99 |                 if self.board.play_is_legal(x, y, color):
100 |                     return Move(x,y)
101 |         return Move.Pass()
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Eval.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os.path
 4 | import Features
 5 | import Normalization
 6 | 
 7 | 
 8 | def restore_from_checkpoint(sess, saver, ckpt_dir):
 9 |     print "Trying to restore from checkpoint in dir", ckpt_dir
10 |     ckpt = tf.train.get_checkpoint_state(ckpt_dir)
11 |     if ckpt and ckpt.model_checkpoint_path:
12 |         print "Checkpoint file is ", ckpt.model_checkpoint_path
13 |         saver.restore(sess, ckpt.model_checkpoint_path)
14 |         global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
15 |         print "Restored from checkpoint %s" % global_step
16 |     else:
17 |         print "No checkpoint file found"
18 |         assert False
19 | 
20 | class TFEval:
21 |     def __init__(self, model):
22 |         self.model = model
23 | 
24 |         # build the graph
25 |         with tf.Graph().as_default():
26 |             with tf.device('/cpu:0'):
27 |                 self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
28 |                 self.score_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
29 |                 saver = tf.train.Saver(tf.trainable_variables())
30 |                 init = tf.initialize_all_variables()
31 |                 self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
32 |                 self.sess.run(init)
33 |                 checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
34 |                 restore_from_checkpoint(self.sess, saver, checkpoint_dir)
35 | 
36 |     def evaluate(self, board):
37 |         board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(board, board.color_to_play).astype(np.float32)
38 |         Normalization.apply_featurewise_normalization_C(board_feature_planes)
39 |         feed_dict = {self.feature_planes: board_feature_planes.reshape(1,self.model.N,self.model.N,self.model.Nfeat)}
40 |         score = np.asscalar(self.sess.run(self.score_op, feed_dict))
41 |         return score
42 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/EvalEngine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import os
 5 | from Engine import *
 6 | from Board import *
 7 | import Features
 8 | import Normalization
 9 | import Symmetry
10 | import Checkpoint
11 | 
12 | def average_probs_over_symmetries(probs):
13 |     assert probs.size == 8
14 |     return probs.mean()
15 | 
16 | class EvalEngine(BaseEngine):
17 |     def name(self):
18 |         return "EvalEngine"
19 | 
20 |     def version(self):
21 |         return "1.0"
22 | 
23 |     def __init__(self, model):
24 |         BaseEngine.__init__(self) 
25 |         self.model = model
26 |         with tf.Graph().as_default():
27 |             with tf.device('/cpu:0'):
28 |                 self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
29 |                 self.probs_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
30 |                 saver = tf.train.Saver(tf.trainable_variables())
31 |                 init = tf.initialize_all_variables()
32 |                 self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
33 |                 self.sess.run(init)
34 |                 checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
35 |                 Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
36 | 
37 |     def get_position_eval(self):
38 |         #assert self.model.Nfeat == 21
39 |         #board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, self.board.color_to_play).astype(np.float32)
40 |         #Normalization.apply_featurewise_normalization_C(board_feature_planes)
41 |         assert self.model.Nfeat == 22
42 |         board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi(self.board, self.board.color_to_play, self.komi).astype(np.float32)
43 |         Normalization.apply_featurewise_normalization_D(board_feature_planes)
44 |         feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)
45 |         feed_dict = {self.feature_planes: feature_batch}
46 |         probs_batch = self.sess.run(self.probs_op, feed_dict)
47 |         prob = average_probs_over_symmetries(probs_batch)
48 |         if self.board.color_to_play == Color.White:
49 |             prob *= -1
50 |         return prob
51 | 
52 |     def pick_move(self, color):
53 |         for i in xrange(10000):
54 |             x = np.random.randint(0, self.board.N-1)
55 |             y = np.random.randint(0, self.board.N-1)
56 |             if self.board.play_is_legal(x, y, color):
57 |                 return Move(x,y)
58 |         return Move.Pass
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     import GTP
63 |     fclient = GTP.redirect_all_output("log_engine.txt")
64 | 
65 |     import EvalModels
66 |     
67 |     engine = EvalEngine(EvalModels.Conv11PosDepFC1ELU(N=19, Nfeat=22))
68 |     
69 |     gtp = GTP.GTP(engine, fclient)
70 |     gtp.loop()
71 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/EvalModels.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Layers import *
 3 | 
 4 | class Conv5PosDepFC1ELU: 
 5 |     def __init__(self, N, Nfeat):
 6 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/eval_conv5posdepfc1ELU_N%d_fe%d" % (N, Nfeat)
 7 |         self.N = N
 8 |         self.Nfeat = Nfeat
 9 |     def inference(self, feature_planes, N, Nfeat):
10 |         NK = 64
11 |         NKfirst = 64
12 |         Nfc = 256
13 |         conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1')
14 |         conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2')
15 |         conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3')
16 |         conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4')
17 |         conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5')
18 |         conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6')
19 |         conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7')
20 |         conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8')
21 |         conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9')
22 |         conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10')
23 |         conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11')
24 |         conv11_flat = tf.reshape(conv11, [-1, NK*N*N])
25 |         fc = ELU_fully_connected_layer(conv11_flat, NK*N*N, Nfc)
26 |         score = tf.tanh(linear_layer(fc, Nfc, 1))
27 |         return score
28 | 
29 | class Conv11PosDepFC1ELU: 
30 |     def __init__(self, N, Nfeat):
31 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/eval_conv11posdepfc1ELU_N%d_fe%d" % (N, Nfeat)
32 |         self.N = N
33 |         self.Nfeat = Nfeat
34 |     def inference(self, feature_planes, N, Nfeat):
35 |         NK = 256
36 |         NKfirst = 256
37 |         Nfc = 256
38 |         conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1')
39 |         conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2')
40 |         conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3')
41 |         conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4')
42 |         conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5')
43 |         conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6')
44 |         conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7')
45 |         conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8')
46 |         conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9')
47 |         conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10')
48 |         conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11')
49 |         conv11_flat = tf.reshape(conv11, [-1, NK*N*N])
50 |         fc = ELU_fully_connected_layer(conv11_flat, NK*N*N, Nfc)
51 |         score = tf.tanh(linear_layer(fc, Nfc, 1))
52 |         return score
53 | 
54 | class Linear:
55 |     def __init__(self, N, Nfeat):
56 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/linear_N%d_fe%d" % (N, Nfeat)
57 |         self.N = N
58 |         self.Nfeat = Nfeat
59 |     def inference(self, feature_planes, N, Nfeat):
60 |         features_flat = tf.reshape(feature_planes, [-1, N*N*Nfeat])
61 |         weights = tf.Variable(tf.constant(0.0, shape=[N*N*Nfeat, 1]), name='weights')
62 |         #weights = tf.constant(0.0, shape=[N*N*Nfeat, 1])
63 |         bias = tf.Variable(tf.constant(0.0, shape=[1]))
64 |         out = tf.matmul(features_flat, weights) + bias
65 |         #out = tf.matmul(features_flat, weights)
66 |         score = tf.tanh(out)
67 |         return score
68 | 
69 | class Zero:
70 |     def __init__(self, N, Nfeat):
71 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/zero_N%d_fe%d" % (N, Nfeat)
72 |     def inference(self, feature_planes, N, Nfeat):
73 |         dummy = tf.Variable(tf.constant(0.0, dtype=tf.float32), name='dummy')
74 |         return dummy * tf.constant(0.0, dtype=tf.float32, shape=[128])
75 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/EvalStats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | 
 4 | 
 5 | def do_game(sgf, correct, tries):
 6 |     reader = SGFReader(sgf)
 7 | 
 8 |     if reader.komi == None:
 9 |         print "skiping %s b/c there's no komi given" % sgf
10 |         return
11 |     komi = float(reader.komi)
12 |     if not komi_allowed(komi):
13 |         print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi)
14 | 
15 |     if reader.result == None:
16 |         print "skipping %s because there's no result given" % sgf
17 |         return
18 |     elif "B+" in reader.result:
19 |         winner = Color.Black
20 |     elif "W+" in reader.result:
21 |         winner = Color.White
22 |     else:
23 |         print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result)
24 |         return
25 | 
26 |     turn_num = 0
27 |     while True:
28 |         feature_planes = feature_maker(reader.board, reader.next_play_color(), komi)
29 |         final_score = +1 if reader.next_play_color() == winner else -1
30 |         final_score_arr = np.array([final_score], dtype=np.int8)
31 | 
32 |         writer.push_example((feature_planes, final_score_arr))
33 |         if reader.has_more():
34 |             reader.play_next_move()
35 |         else:
36 |             break
37 | 
38 | def do_stats_on_sgfs(sgfs):
39 |     for sgf in sgfs:
40 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/EvalTraining.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import time
  4 | import multiprocessing
  5 | import random
  6 | import Symmetry
  7 | import NPZ
  8 | import gc
  9 | 
 10 | def apply_random_symmetries(many_feature_planes):
 11 |     N = many_feature_planes.shape[1]
 12 |     for i in range(many_feature_planes.shape[0]):
 13 |         s = random.randint(0, 7)
 14 |         #Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s)
 15 |         Symmetry.apply_symmetry_features_example(many_feature_planes, i, s)
 16 | 
 17 | 
 18 | """
 19 | def build_feed_dict(loader, apply_normalization, feature_planes, final_scores):
 20 |     a = time.time()
 21 |     batch = loader.next_minibatch(('feature_planes', 'final_scores'))
 22 |     b = time.time()
 23 |     loaded_feature_planes = batch['feature_planes'].astype(np.float32)
 24 |     loaded_scores = batch['final_scores'].astype(np.float32) # BIT ME HARD.
 25 |     c = time.time()
 26 | 
 27 |     loaded_scores = np.ravel(loaded_scores) # flatten to 1D
 28 |     d = time.time()
 29 | 
 30 |     apply_normalization(loaded_feature_planes)
 31 |     e = time.time()
 32 | 
 33 |     #print "WARNING: NOT APPLYING SYMMETRIES!!!!!!!!!!!!!!!!"
 34 |     apply_random_symmetries(loaded_feature_planes)
 35 |     f = time.time()
 36 | 
 37 |     print "b-a = %f, c-b = %f, d-c = %f, e-d = %f, f-e = %f" % ((b-a,c-b,d-c,e-d,f-e))
 38 | 
 39 |     #N = loaded_feature_planes.shape[1]
 40 | 
 41 |     #print "loaded_feature_planes ="
 42 |     #print loaded_feature_planes
 43 |     #print "loaded_scores ="
 44 |     #print loaded_scores
 45 | 
 46 |     return { feature_planes: loaded_feature_planes,
 47 |              final_scores: loaded_scores }
 48 | """
 49 | 
 50 | def build_feed_dict_strings(loader, apply_normalization):
 51 |     a = time.time()
 52 |     batch = loader.next_minibatch(('feature_planes', 'final_scores'))
 53 |     b = time.time()
 54 |     loaded_feature_planes = batch['feature_planes'].astype(np.float32)
 55 |     loaded_scores = batch['final_scores'].astype(np.float32) # BIT ME HARD.
 56 |     loaded_scores = np.ravel(loaded_scores) # flatten to 1D
 57 |     c = time.time()
 58 |     apply_normalization(loaded_feature_planes)
 59 |     d = time.time()
 60 |     apply_random_symmetries(loaded_feature_planes)
 61 |     e = time.time()
 62 |     print "b-a=%f, c-b=%f, d-c=%f, e-d=%f" % (b-a, c-b, d-c, e-d)
 63 |     return { 'feature_planes': loaded_feature_planes,
 64 |              'final_scores': loaded_scores }
 65 | 
 66 | def dict_strings_to_ops(feed_dict_strings, feature_planes_ph, final_scores_ph):
 67 |     return { feature_planes_ph: feed_dict_strings['feature_planes'],
 68 |              final_scores_ph: feed_dict_strings['final_scores'] }
 69 | 
 70 | def build_feed_dict(loader, apply_normalization, feature_planes_ph, final_scores_ph):
 71 |     return dict_strings_to_ops(build_feed_dict_strings(loader, apply_normalization), feature_planes_ph, final_scores_ph)
 72 | 
 73 | def async_worker(q, npz_dir, minibatch_size, apply_normalization):
 74 |     print "Hello from EvalTraining async_worker process!!!"
 75 |     gc.set_debug(gc.DEBUG_STATS)
 76 |     loader = NPZ.RandomizingLoader(npz_dir, minibatch_size)
 77 |     names = ('feature_planes', 'final_scores')
 78 |     while True:
 79 |         feed_dict_strings = build_feed_dict_strings(loader, apply_normalization)
 80 |         q.put(feed_dict_strings, block=True) # will block if queue is full
 81 | 
 82 | class AsyncRandomBatchQueue:
 83 |     def __init__(self, feature_planes_ph, final_scores_ph, npz_dir, minibatch_size, apply_normalization):
 84 |         self.q = multiprocessing.Queue(maxsize=5)
 85 |         self.process = multiprocessing.Process(target=async_worker, args=(self.q, npz_dir, minibatch_size, apply_normalization))
 86 |         self.process.daemon = True
 87 |         self.process.start()
 88 |         self.feature_planes_ph = feature_planes_ph
 89 |         self.final_scores_ph = final_scores_ph
 90 |     def next_feed_dict(self):
 91 |         feed_dict_strings = self.q.get(block=True, timeout=30)
 92 |         feed_dict = dict_strings_to_ops(feed_dict_strings, self.feature_planes_ph, self.final_scores_ph)
 93 |         return feed_dict
 94 | 
 95 | 
 96 | def loss_func(score_op):
 97 |     final_scores = tf.placeholder(tf.float32, shape=[None])
 98 | 
 99 |     squared_errors = tf.square(tf.reshape(score_op, [-1]) - final_scores)
100 |     #mean_sq_err = tf.reduce_mean(squared_errors, name='mean_sq_err')
101 |     cross_entropy_ish_loss = tf.reduce_mean(-tf.log(tf.constant(1.0) - tf.constant(0.5) * tf.abs(tf.reshape(score_op, [-1]) - final_scores), name='cross-entropy-ish-loss'))
102 | 
103 |     correct_prediction = tf.equal(tf.sign(tf.reshape(score_op, [-1])), tf.sign(final_scores))
104 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
105 |     #return final_scores, mean_sq_err, accuracy, squared_errors
106 |     return final_scores, cross_entropy_ish_loss, accuracy
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Game.py:
--------------------------------------------------------------------------------
 1 | from Board import *
 2 | 
 3 | def str_from_vertex(x, y):
 4 |     return chr(ord('a')+x) + chr(ord('a'))+y
 5 | 
 6 | class Game:
 7 |     def __init__(self, N, komi, rules):
 8 |         self.moves = []
 9 |         self.N = N
10 |         self.komi = komi
11 |         self.rules = rules
12 |         self.result = None
13 | 
14 |     def add_move(self, move):
15 |         self.moves.append(move)
16 | 
17 |     def set_result(self, move):
18 |         self.result = result
19 | 
20 |     def write_sgf(self, filename):
21 |         assert self.result != None
22 |         with open(filename, 'w') as f:
23 |             f.write("(;GM[1]FF[4]")
24 |             f.write("RU[%s]SZ[%d]KM[%0.2f]\n" % self.rules, self.N, self.komi)
25 |             f.write("RE[%s]\n" % self.result)
26 |             color = Color.Black
27 |             for move in moves:
28 |                 if move.is_resign(): continue
29 |                 color_str = "B" if color == Color.Black else "W" 
30 |                 vert_str = "" if move.is_pass() else str_from_vertex(move.x, move.y)
31 |                 f.write(";%s[%s]\n" % (color_str, vert_str))
32 |                 color = flipped_color[color]
33 |             f.write(")\n")
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/HelperEngine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import subprocess
 3 | from GTP import *
 4 | from Board import *
 5 | 
 6 | # Using gnugo to determine when to pass and to play cleanup moves
 7 | 
 8 | class HelperEngine:
 9 |     def __init__(self, level=10):
10 |         command = ["gnugo", "--mode", "gtp", "--level", str(level), "--chinese-rules", "--positional-superko"]
11 |         self.proc = subprocess.Popen(command, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # bufsize=1 is line buffered
12 | 
13 |     def send_command(self, command):
14 |         print "HelperEngine: sending command \"%s\"" % command
15 |         self.proc.stdin.write(command)
16 |         self.proc.stdin.write('\n')
17 |     
18 |         response = ""
19 |         while True:
20 |             line = self.proc.stdout.readline()
21 |             if line.startswith('='):
22 |                 response += line[2:]
23 |             elif line.startswith('?'):
24 |                 print "HelperEngine: error response! line is \"%s\"" % line
25 |                 response += line[2:]
26 |             elif len(line.strip()) == 0:
27 |                 # blank line ends response
28 |                 break
29 |             else:
30 |                 response += line
31 |         response = response.strip()
32 |         print "HelperEngine: got response \"%s\"" % response
33 |         return response
34 | 
35 |     def set_board_size(self, N):
36 |         self.send_command("boardsize %d" % N)
37 |         return True # could parse helper response
38 | 
39 |     def clear_board(self):
40 |         self.send_command("clear_board")
41 | 
42 |     def set_komi(self, komi):
43 |         self.send_command("komi %.2f" % komi)
44 | 
45 |     def player_passed(self, color):
46 |         self.send_command("play %s pass" % color_names[color])
47 | 
48 |     def stone_played(self, x, y, color):
49 |         self.send_command("play %s %s" % (color_names[color], str_from_coords(x, y)))
50 | 
51 |     def set_level(self, level):
52 |         self.send_command("level %d" % level)
53 | 
54 |     def generate_move(self, color, cleanup=False):
55 |         cmd = "kgs-genmove_cleanup" if cleanup else "genmove"
56 |         response = self.send_command("%s %s" % (cmd, color_names[color]))
57 |         if 'pass' in response.lower():
58 |             return Move.Pass
59 |         elif 'resign' in response.lower():
60 |             return Move.Resign
61 |         else: 
62 |             x, y= coords_from_str(response)
63 |             return Move(x, y)
64 | 
65 |     def undo(self):
66 |         self.send_command('undo')
67 | 
68 |     def quit(self):
69 |         pass
70 | 
71 |     def final_status_list(self, status):
72 |         return self.send_command("final_status_list %s" % status)
73 | 
74 |     def final_score(self):
75 |         return self.send_command("final_score")
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     helper = HelperEngine()
80 | 
81 |     helper.set_board_size(19)
82 |     helper.clear_board()
83 |     helper.set_komi(6.5)
84 |     helper.stone_played(5, 5, Color.Black)
85 |     move = helper.generate_move(Color.White)
86 |     print "move =", move
87 |     helper.undo()
88 |     move = helper.pick_move(Color.White)
89 |     print "move =", move
90 | 
91 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/InfluenceEngine.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os
 4 | from Engine import *
 5 | from Board import *
 6 | import Features
 7 | import Symmetry
 8 | import Checkpoint
 9 | 
10 | class InfluenceEngine(BaseEngine):
11 |     def name(self):
12 |         return "InfluenceEngine"
13 | 
14 |     def version(self):
15 |         return "1.0"
16 | 
17 |     def __init__(self, model):
18 |         BaseEngine.__init__(self) 
19 |         self.model = model
20 |         with tf.Graph().as_default():
21 |             with tf.device('/cpu:0'):
22 |                 self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
23 |                 self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
24 |                 saver = tf.train.Saver(tf.trainable_variables())
25 |                 init = tf.initialize_all_variables()
26 |                 self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
27 |                 self.sess.run(init)
28 |                 checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
29 |                 Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
30 | 
31 |     def make_influence_map(self):
32 |         if self.model.Nfeat == 15:
33 |             board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(self.board, self.board.color_to_play)
34 |             assert False, "for some reason I commented out the normalization???"
35 |             #Normalization.apply_featurewise_normalization_B(board_feature_planes)
36 |         else: 
37 |             assert False
38 |         feature_batch = make_symmetry_batch(board_feature_planes)
39 |         feed_dict = {self.feature_planes: feature_batch}
40 |         logit_batch = self.sess.run(self.logits, feed_dict)
41 |         move_logits = Symmetry.average_plane_over_symmetries(logit_batch, self.model.N)
42 |         move_logits = move_logits.reshape((self.model.N, self.model.N))
43 |         influence_map = np.tanh(move_logits)
44 |         if self.board.color_to_play == Color.White:
45 |             influence_map *= -1
46 |         #influence_map = -1 * np.ones((self.model.N, self.model.N), dtype=np.float32)
47 |         return influence_map
48 | 
49 | 
50 |     def pick_move(self, color):
51 |         for i in xrange(10000):
52 |             x = np.random.randint(0, self.board.N-1)
53 |             y = np.random.randint(0, self.board.N-1)
54 |             if self.board.play_is_legal(x, y, color):
55 |                 return Move(x,y)
56 |         return Move.Pass()
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/InfluenceModels.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Layers import *
 3 | 
 4 | class Conv4PosDep:
 5 |     def __init__(self, N, Nfeat):
 6 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/influence_conv4posdep_N%d_fe%d" % (N, Nfeat)
 7 |         self.N = N
 8 |         self.Nfeat = Nfeat
 9 |     def inference(self, feature_planes, N, Nfeat):
10 |         NK = 16
11 |         NKfirst = 16
12 |         conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1')
13 |         conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2')
14 |         conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3')
15 |         conv4 = conv_pos_dep_bias(conv3, 3, NK, 1, N, 'conv4') 
16 |         logits = tf.reshape(conv4, [-1, N*N])        
17 |         return logits # use with sigmoid and sigmoid_cross_entropy_with_logits
18 | 
19 | 
20 | class Conv12PosDepELU: 
21 |     def __init__(self, N, Nfeat):
22 |         self.train_dir = "/home/greg/coding/ML/go/NN/work/train_dirs/influence_conv12posdep_N%d_fe%d" % (N, Nfeat)
23 |         self.N = N
24 |         self.Nfeat = Nfeat
25 |     def inference(self, feature_planes, N, Nfeat):
26 |         NK = 192
27 |         NKfirst = 192
28 |         conv1 = ELU_conv_pos_dep_bias(feature_planes, 5, Nfeat, NKfirst, N, 'conv1')
29 |         conv2 = ELU_conv_pos_dep_bias(conv1, 3, NKfirst, NK, N, 'conv2')
30 |         conv3 = ELU_conv_pos_dep_bias(conv2, 3, NK, NK, N, 'conv3')
31 |         conv4 = ELU_conv_pos_dep_bias(conv3, 3, NK, NK, N, 'conv4')
32 |         conv5 = ELU_conv_pos_dep_bias(conv4, 3, NK, NK, N, 'conv5')
33 |         conv6 = ELU_conv_pos_dep_bias(conv5, 3, NK, NK, N, 'conv6')
34 |         conv7 = ELU_conv_pos_dep_bias(conv6, 3, NK, NK, N, 'conv7')
35 |         conv8 = ELU_conv_pos_dep_bias(conv7, 3, NK, NK, N, 'conv8')
36 |         conv9 = ELU_conv_pos_dep_bias(conv8, 3, NK, NK, N, 'conv9')
37 |         conv10 = ELU_conv_pos_dep_bias(conv9, 3, NK, NK, N, 'conv10')
38 |         conv11 = ELU_conv_pos_dep_bias(conv10, 3, NK, NK, N, 'conv11')
39 |         conv12 = conv_pos_dep_bias(conv11, 3, NK, 1, N, 'conv12') 
40 |         logits = tf.reshape(conv12, [-1, N*N])        
41 |         return logits
42 | 
43 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/InfluenceTraining.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import random
 4 | import Symmetry
 5 | 
 6 | def apply_random_symmetries(many_feature_planes, many_final_maps):
 7 |     for i in range(many_feature_planes.shape[0]):
 8 |         s = random.randint(0, 7)
 9 |         Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s)
10 |         Symmetry.apply_symmetry_plane(many_final_maps[i,:,:], s)
11 | 
12 | def build_feed_dict(loader, apply_normalization, feature_planes, final_maps):
13 |     batch = loader.next_minibatch(('feature_planes', 'final_maps'))
14 |     loaded_feature_planes = batch['feature_planes'].astype(np.float32)
15 |     loaded_final_maps = batch['final_maps'].astype(np.float32)
16 | 
17 |     apply_normalization(loaded_feature_planes)
18 | 
19 |     apply_random_symmetries(loaded_feature_planes, loaded_final_maps)
20 | 
21 |     minibatch_size = loaded_feature_planes.shape[0]
22 |     N = loaded_feature_planes.shape[1]
23 |     return { feature_planes: loaded_feature_planes,
24 |              final_maps: loaded_final_maps.reshape((minibatch_size, N*N)) }
25 | 
26 | def loss_func(logits):
27 |     final_maps = tf.placeholder(tf.float32, shape=[None, 361])
28 | 
29 |     # final maps are originally -1 to 1. rescale them to 0 to 1 probabilities:
30 |     final_prob_maps = final_maps * tf.constant(0.5) + tf.constant(0.5)
31 |     cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=final_prob_maps)
32 |     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
33 | 
34 |     correct_prediction = tf.equal(tf.sign(logits), tf.sign(final_maps))
35 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
36 |     return final_maps, cross_entropy_mean, accuracy
37 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/KGSEngine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from Engine import *
 3 | from HelperEngine import HelperEngine
 4 | 
 5 | # forwards commands to both a main engine
 6 | # and a helper engine. When picking a move,
 7 | # we first ask the helper engine. If it passes,
 8 | # we pass. Otherwise we ask the main engine
 9 | class KGSEngine(BaseEngine):
10 |     def __init__(self, engine):
11 |         self.engine = engine
12 |         self.helper = HelperEngine()
13 | 
14 |     # subclasses must override this
15 |     def name(self):
16 |         return self.engine.name()
17 | 
18 |     # subclasses must override this
19 |     def version(self):
20 |         return self.engine.version()
21 | 
22 |     def set_board_size(self, N):
23 |         return self.engine.set_board_size(N) and \
24 |                self.helper.set_board_size(N)
25 | 
26 |     def clear_board(self):
27 |         self.engine.clear_board()
28 |         self.helper.clear_board()
29 | 
30 |     def set_komi(self, komi):
31 |         self.engine.set_komi(komi)
32 |         self.helper.set_komi(komi)
33 | 
34 |     def player_passed(self, color):
35 |         self.engine.player_passed(color)
36 |         self.helper.player_passed(color)
37 | 
38 |     def stone_played(self, x, y, color):
39 |         self.engine.stone_played(x, y, color)
40 |         self.helper.stone_played(x, y, color)
41 | 
42 |     def generate_move(self, color, cleanup=False):
43 |         pass_checking_level = 5
44 |         cleanup_level = 10
45 |         self.helper.set_level(cleanup_level if cleanup else pass_checking_level)
46 | 
47 |         move = self.helper.generate_move(color, cleanup)
48 |         if move.is_resign():
49 |             return Move.Resign
50 |         elif move.is_pass() or cleanup:
51 |             self.engine.move_was_played(move)
52 |             return move
53 |         else: 
54 |             self.helper.undo()
55 | 
56 |         move = self.engine.generate_move(color)
57 |         if move.is_play(): 
58 |             self.helper.stone_played(move.x, move.y, color)
59 |         elif move.is_pass(): 
60 |             self.helper.player_passed(color)
61 |         return move
62 | 
63 |     def undo(self):
64 |         self.engine.undo()
65 |         self.helper.undo()
66 | 
67 |     def quit(self):
68 |         self.engine.quit()
69 |         self.helper.quit()
70 | 
71 |     def supports_final_status_list(self):
72 |         return True
73 | 
74 |     def final_status_list(self, status):
75 |         return self.helper.final_status_list(status)
76 | 
77 |     def get_last_move_probs(self):
78 |         return self.engine.get_last_move_probs()
79 | 
80 |     def toggle_kibitz_mode(self):
81 |         return self.engine.toggle_kibitz_mode()
82 | 
83 | if __name__ == '__main__':
84 |     import GTP
85 |     fclient = GTP.redirect_all_output("log_engine.txt")
86 |     
87 |     from GTP import GTP
88 |     from TFEngine import TFEngine
89 |     import MoveModels
90 |     from Book import PositionRecord
91 |     from Book import MoveRecord
92 |     
93 |     engine = KGSEngine(TFEngine("conv12posdepELU", MoveModels.Conv12PosDepELU(N=19, Nfeat=21)))
94 |     
95 |     gtp = GTP(engine, fclient)
96 |     gtp.loop()
97 | 
98 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Layers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import math
 3 | 
 4 | def conv(inputs, diameter, Nin, Nout, name):
 5 |     fan_in = diameter * diameter * Nin
 6 |     #stddev = math.sqrt(2.0 / fan_in)
 7 |     print "WARNING: USING DIFFERENT STDDEV FOR CONV!"
 8 |     stddev = math.sqrt(1.0 / fan_in)
 9 |     kernel = tf.Variable(tf.truncated_normal([diameter, diameter, Nin, Nout], stddev=stddev), name=name+'_kernel')
10 |     return tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME')
11 | 
12 | def conv_uniform_bias(inputs, diameter, Nin, Nout, name):
13 |     bias = tf.Variable(tf.constant(0.0, shape=[Nout]), name=name+'_bias')
14 |     return conv(inputs, diameter, Nin, Nout, name) + bias
15 | 
16 | def conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name):
17 |     bias = tf.Variable(tf.constant(0.0, shape=[N, N, Nout]), name=name+'_bias')
18 |     return conv(inputs, diameter, Nin, Nout, name) + bias
19 | 
20 | def ReLU_conv_uniform_bias(inputs, diameter, Nin, Nout, name):
21 |     return tf.nn.relu(conv_uniform_bias(inputs, diameter, Nin, Nout, name))
22 | 
23 | def ReLU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name):
24 |     return tf.nn.relu(conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name))
25 | 
26 | def ELU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name):
27 |     return tf.nn.elu(conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name))
28 | 
29 | def linear_layer(inputs, Nin, Nout):
30 |     #stddev = math.sqrt(2.0 / Nin)
31 |     print "WARNING: USING DIFFERENT STDDEV FOR LINEAR!"
32 |     stddev = math.sqrt(1.0 / Nin)
33 |     print "linear layer using stddev =", stddev
34 |     weights = tf.Variable(tf.truncated_normal([Nin, Nout], stddev=0.1))
35 |     bias = tf.Variable(tf.constant(0.0, shape=[Nout]))
36 |     out = tf.matmul(inputs, weights) + bias
37 |     return out
38 | 
39 | def ReLU_fully_connected_layer(inputs, Nin, Nout):
40 |     return tf.nn.relu(linear_layer(inputs, Nin, Nout))
41 | 
42 | def ELU_fully_connected_layer(inputs, Nin, Nout):
43 |     return tf.nn.elu(linear_layer(inputs, Nin, Nout))
44 | 
45 | 
46 | def preReLU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name):
47 |     return conv_pos_dep_bias(tf.nn.relu(inputs), diameter, Nin, Nout, N, name)
48 | 
49 | def preELU_conv_pos_dep_bias(inputs, diameter, Nin, Nout, N, name):
50 |     return conv_pos_dep_bias(tf.nn.elu(inputs), diameter, Nin, Nout, N, name)
51 | 
52 | def residual_block_preReLU_2convs_pos_dep_bias(inputs, diameter, Nfeat, N, name):
53 |     conv1 = preReLU_conv_pos_dep_bias(inputs, diameter, Nfeat, Nfeat, N, name + '_1')
54 |     conv2 = preReLU_conv_pos_dep_bias(conv1, diameter, Nfeat, Nfeat, N, name + '_2')
55 |     return inputs + conv2
56 | 
57 | def residual_block_preELU_2convs_pos_dep_bias(inputs, diameter, Nfeat, N, name):
58 |     conv1 = preELU_conv_pos_dep_bias(inputs, diameter, Nfeat, Nfeat, N, name + '_1')
59 |     conv2 = preELU_conv_pos_dep_bias(conv1, diameter, Nfeat, Nfeat, N, name + '_2')
60 |     return inputs + conv2
61 | 
62 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/MakeEvalData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import numpy as np
  3 | import os
  4 | import os.path
  5 | import random
  6 | from Board import *
  7 | from SGFReader import SGFReader
  8 | import Features
  9 | import NPZ
 10 | 
 11 | 
 12 | def write_game_data(sgf, writer, feature_maker, rank_allowed, komi_allowed):
 13 |     reader = SGFReader(sgf)
 14 | 
 15 |     if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank):
 16 |         print "skipping %s b/c of disallowed rank. ranks are %s, %s" % (sgf, reader.black_rank, reader.white_rank)
 17 |         return
 18 | 
 19 |     if reader.komi == None:
 20 |         print "skiping %s b/c there's no komi given" % sgf
 21 |         return
 22 |     komi = float(reader.komi)
 23 |     if not komi_allowed(komi):
 24 |         print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi)
 25 | 
 26 |     if reader.result == None:
 27 |         print "skipping %s because there's no result given" % sgf
 28 |         return
 29 |     elif "B+" in reader.result:
 30 |         winner = Color.Black
 31 |     elif "W+" in reader.result:
 32 |         winner = Color.White
 33 |     else:
 34 |         print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result)
 35 |         return
 36 | 
 37 |     while True:
 38 |         feature_planes = feature_maker(reader.board, reader.next_play_color(), komi)
 39 |         final_score = +1 if reader.next_play_color() == winner else -1
 40 |         final_score_arr = np.array([final_score], dtype=np.int8)
 41 | 
 42 |         writer.push_example((feature_planes, final_score_arr))
 43 |         if reader.has_more():
 44 |             reader.play_next_move()
 45 |         else:
 46 |             break
 47 | 
 48 | def make_KGS_eval_data():
 49 |     N = 19
 50 |     Nfeat = 22
 51 |     feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi
 52 | 
 53 |     #for set_name in ['train', 'val', 'test']:
 54 |     print "WARNING: ONLY DOING VAL AND TEST SETS!"
 55 |     for set_name in ['val', 'test']:
 56 |         games_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/%s" % set_name
 57 |         out_dir = "/home/greg/coding/ML/go/NN/data/KGS/eval_examples/stones_4lib_4hist_ko_4cap_komi_Nf22/%s" % set_name
 58 | 
 59 |         writer = NPZ.RandomizingWriter(out_dir=out_dir,
 60 |                 names=['feature_planes', 'final_scores'],
 61 |                 shapes=[(N,N,Nfeat), (1,)],
 62 |                 dtypes=[np.int8, np.int8],
 63 |                 Nperfile=128, buffer_len=50000)
 64 |     
 65 |         rank_allowed = lambda rank: True
 66 | 
 67 |         komi_allowed = lambda komi: komi in [0.5, 5.5, 6.5, 7.5]
 68 |     
 69 |         sgfs = []
 70 |         for sub_dir in os.listdir(games_dir):
 71 |             for fn in os.listdir(os.path.join(games_dir, sub_dir)):
 72 |                     sgfs.append(os.path.join(games_dir, sub_dir, fn))
 73 |         random.shuffle(sgfs)
 74 | 
 75 |         num_games = 0
 76 |         for sgf in sgfs:
 77 |             #print "making eval data from %s" % sgf
 78 |             write_game_data(sgf, writer, feature_maker, rank_allowed, komi_allowed)
 79 |             num_games += 1
 80 |             if num_games % 100 == 0: print "Finished %d games of %d" % (num_games, len(sgfs))
 81 |     
 82 |         writer.drain()
 83 | 
 84 | 
 85 | def komi_test():
 86 |     games_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/train"
 87 |     sgfs = []
 88 |     for sub_dir in os.listdir(games_dir):
 89 |         for fn in os.listdir(os.path.join(games_dir, sub_dir)):
 90 |             sgfs.append(os.path.join(games_dir, sub_dir, fn))
 91 |     random.shuffle(sgfs)
 92 |     counts = {}
 93 |     num_games = 0
 94 |     for sgf in sgfs:
 95 |         reader = SGFReader(sgf)
 96 |         print "komi =", reader.komi
 97 |         if reader.komi in counts:
 98 |             counts[reader.komi] += 1
 99 |         else:
100 |             counts[reader.komi] = 1
101 |         num_games += 1
102 |         if num_games % 100 == 0:
103 |             print "counts:", counts
104 | 
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     make_KGS_eval_data()
109 |     #komi_test()
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/MakeInfluenceData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import numpy as np
  3 | import os
  4 | import os.path
  5 | import random
  6 | from Board import *
  7 | from SGFReader import SGFReader
  8 | #from MakeMoveData import show_plane
  9 | import Features
 10 | import NPZ
 11 | 
 12 | def find_vertices_reached_by_color(board, color):
 13 |     visited = np.zeros((board.N, board.N), dtype=np.bool_)
 14 |     reached = np.zeros((board.N, board.N), dtype=np.int8)
 15 | 
 16 |     for x in xrange(board.N):
 17 |         for y in xrange(board.N):
 18 |             if not visited[x,y] and board[x,y] == color:
 19 |                 q = [(x,y)]
 20 |                 visited[x,y] = True
 21 |                 reached[x,y] = 1
 22 |                 while q:
 23 |                     vert = q.pop()
 24 |                     for adj in board.adj_vertices(vert):
 25 |                         if not visited[adj] and (board[adj] == color or board[adj] == Color.Empty):
 26 |                             q.append(adj)
 27 |                             visited[adj] = True
 28 |                             reached[adj] = True
 29 |     return reached
 30 | 
 31 | 
 32 | 
 33 | 
 34 | def get_final_territory_map(sgf):
 35 |     reader = SGFReader(sgf)
 36 |     while reader.has_more():
 37 |         reader.play_next_move()
 38 | 
 39 |     #reader.board.show()
 40 | 
 41 |     reached_by_black = find_vertices_reached_by_color(reader.board, Color.Black)
 42 |     reached_by_white = find_vertices_reached_by_color(reader.board, Color.White)
 43 | 
 44 |     #print "reached_by_black:"
 45 |     #show_plane(reached_by_black)
 46 |     #print "reached_by_white:"
 47 |     #show_plane(reached_by_white)
 48 | 
 49 |     territory_map = reached_by_black - reached_by_white
 50 |     #print "territory_map:\n", territory_map
 51 |     return territory_map
 52 | 
 53 | 
 54 | def write_game_data(sgf, sgf_aftermath, writer, feature_maker, rank_allowed):
 55 |     final_map_black_positive = get_final_territory_map(sgf_aftermath)
 56 |     final_map_white_positive = (-1) * final_map_black_positive
 57 |     reader = SGFReader(sgf)
 58 | 
 59 |     if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank):
 60 |         #print "skipping game b/c of disallowed rank. ranks are %s, %s" % (reader.black_rank, reader.white_rank)
 61 |         return
 62 | 
 63 |     while True:
 64 |         feature_planes = feature_maker(reader.board, reader.next_play_color())
 65 |         final_map = final_map_black_positive if reader.next_play_color() == Color.Black else final_map_white_positive
 66 |         writer.push_example((feature_planes, final_map))
 67 |         if reader.has_more():
 68 |             reader.play_next_move()
 69 |         else:
 70 |             break
 71 | 
 72 | def make_KGS_influence_data():
 73 |     N = 19
 74 |     Nfeat = 15
 75 |     feature_maker = Features.make_feature_planes_stones_3liberties_4history_ko
 76 | 
 77 |     for set_name in ['train', 'val', 'test']:
 78 |         games_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/games/played_out/%s" % set_name
 79 |         aftermath_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/games/playouts"
 80 |         out_dir = "/home/greg/coding/ML/go/NN/data/KGS/influence/examples/stones_3lib_4hist_ko_Nf15/%s" % set_name
 81 | 
 82 |         writer = NPZ.RandomizingWriter(out_dir=out_dir,
 83 |                 names=['feature_planes', 'final_maps'],
 84 |                 shapes=[(N,N,Nfeat), (N,N)],
 85 |                 dtypes=[np.int8, np.int8],
 86 |                 Nperfile=128, buffer_len=50000)
 87 |     
 88 |         rank_allowed = lambda rank: rank in ['1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d',
 89 |                                              '1p', '2p', '3p', '4p', '5p', '6p', '7p', '8p', '9p', '10p']
 90 |     
 91 |         game_fns = os.listdir(games_dir)
 92 |         random.shuffle(game_fns)
 93 |         num_games = 0
 94 |         for fn in game_fns:
 95 |             print "making influence data from %s" % fn
 96 |             sgf = os.path.join(games_dir, fn)
 97 |             sgf_aftermath = os.path.join(aftermath_dir, 'played_out_' + fn)
 98 |             assert os.path.isfile(sgf_aftermath)
 99 |     
100 |             write_game_data(sgf, sgf_aftermath, writer, feature_maker, rank_allowed)
101 |             
102 |             num_games += 1
103 |             if num_games % 100 == 0: print "Finished %d games of %d" % (num_games, len(game_fns))
104 |     
105 |         writer.drain()
106 | 
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     #get_final_territory_map("/home/greg/coding/ML/go/NN/data/KGS/influence/games/playouts/played_out_2001-05-01-2.sgf")
111 |     #make_KGS_influence_data()
112 |     import cProfile
113 |     cProfile.run('make_KGS_influence_data()', sort='cumtime')
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/MakeMoveData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import numpy as np
  3 | import sys
  4 | import os
  5 | import os.path
  6 | import time
  7 | import random
  8 | from SGFReader import SGFReader
  9 | from Board import *
 10 | import Features
 11 | import NPZ
 12 | 
 13 | def make_move_arr(x, y):
 14 |     return np.array([x,y], dtype=np.int8)
 15 | 
 16 | def show_plane(array):
 17 |     assert len(array.shape) == 2
 18 |     N = array.shape[0]
 19 |     print "=" * N
 20 |     for y in xrange(N):
 21 |         for x in xrange(N):
 22 |             sys.stdout.write('1' if array[x,y]==1 else '0')
 23 |         sys.stdout.write('\n')
 24 |     print "=" * array.shape[1]
 25 | 
 26 | def show_all_planes(array):
 27 |     assert len(array.shape) == 3
 28 |     for i in xrange(array.shape[2]):
 29 |         print "PLANE %d:" % i
 30 |         show_plane(array[:,:,i])
 31 | 
 32 | def show_feature_planes_and_move(feature_planes, move):
 33 |     print "FEATURE PLANES:"
 34 |     show_all_planes(feature_planes)
 35 |     print "MOVE:"
 36 |     print move
 37 | 
 38 | def show_batch(all_feature_planes, all_moves):
 39 |     batch_size = all_feature_planes.shape[0]
 40 |     print "MINIBATCH OF SIZE", batch_size
 41 |     for i in xrange(batch_size):
 42 |         print "EXAMPLE", i
 43 |         show_feature_planes_and_move(all_feature_planes[i,:,:,:], all_moves[i,:])
 44 | 
 45 | def test_feature_planes():
 46 |     board = Board(5)
 47 |     moves = [(0,0), (1,1), (2,2), (3,3), (4,4)]
 48 |     play_color = Color.Black
 49 |     for x,y in moves:
 50 |         board.show()
 51 |         feature_planes = make_feature_planes(board, play_color)
 52 |         move_arr = make_move_arr(x, y)
 53 |         show_feature_planes_and_move(feature_planes, move_arr)
 54 |         print
 55 |         board.play_stone(x, y, play_color)
 56 |         play_color = flipped_color[play_color]
 57 | 
 58 | def write_game_data(sgf, writer, feature_maker, rank_allowed):
 59 |     reader = SGFReader(sgf)
 60 | 
 61 |     color_is_good = { Color.Black: rank_allowed(reader.black_rank),
 62 |                       Color.White: rank_allowed(reader.white_rank) }
 63 |     if (not color_is_good[Color.White]) and (not color_is_good[Color.Black]):
 64 |         print "skipping game b/c of disallowed rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank)
 65 |         return
 66 |     elif not color_is_good[Color.White]:
 67 |         print "ignoring white because of rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank)
 68 |     elif not color_is_good[Color.Black]:
 69 |         print "ignoring black because of rank. ranks are B=%s, W=%s" % (reader.black_rank, reader.white_rank)
 70 | 
 71 |     try:
 72 |         while reader.has_more():
 73 |             vertex, color = reader.peek_next_move()
 74 |             if vertex and color_is_good[color]: # if not pass, and if player is good enough
 75 |                 x, y = vertex
 76 |                 if reader.board.play_is_legal(x, y, color):
 77 |                     feature_planes = feature_maker(reader.board, color)
 78 |                     move_arr = make_move_arr(x, y)
 79 |                     writer.push_example((feature_planes, move_arr))
 80 |                 else:
 81 |                     raise IllegalMoveException("playing a %s stone at (%d,%d) is illegal" % (color_names[color], x, y))
 82 |             reader.play_next_move()
 83 |     except IllegalMoveException, e:
 84 |         print "Aborting b/c of illegal move!"
 85 |         print "sgf causing exception is %s" % sgf
 86 |         print e
 87 |         exit(-1)
 88 | 
 89 | def make_move_prediction_data(sgf_list, N, Nfeat, out_dir, feature_maker, rank_allowed):
 90 |     sgf_list = list(sgf_list) # make local copy to permute
 91 |     random.shuffle(sgf_list)
 92 | 
 93 |     writer = NPZ.RandomizingWriter(out_dir=out_dir,
 94 |             names=['feature_planes', 'moves'],
 95 |             shapes=[(N,N,Nfeat), (2,)],
 96 |             dtypes=[np.int8, np.int8],
 97 |             Nperfile=128, buffer_len=50000)
 98 | 
 99 |     num_games = 0
100 |     for sgf in sgf_list:
101 |         print "processing %s" % sgf
102 |         write_game_data(sgf, writer, feature_maker, rank_allowed)
103 |         num_games += 1
104 |         if num_games % 100 == 0: print "num_games =", num_games
105 |     writer.drain()
106 | 
107 | def make_KGS_move_data():
108 |     N = 19
109 |     Nfeat = 21
110 |     feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures
111 |     rank_allowed = lambda rank: rank in ['6d', '7d', '8d', '9d', '10d', '11d', 
112 |                                          '1p', '2p', '3p', '4p', '5p', '6p', '7p', '8p', '9p', '10p', '11p']
113 | 
114 |     for set_name in ['train', 'val', 'test']:
115 |         base_dir = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/%s" % set_name
116 |         sgfs = [os.path.join(base_dir, sub_dir, fn) for sub_dir in os.listdir(base_dir) for fn in os.listdir(os.path.join(base_dir, sub_dir))]
117 |         out_dir = "/home/greg/coding/ML/go/NN/data/KGS/move_examples/6dan_stones_4lib_4hist_ko_4cap_Nf21/%s" % set_name
118 | 
119 |         make_move_prediction_data(sgfs, N, Nfeat, out_dir, feature_maker, rank_allowed)
120 | 
121 | def make_GoGoD_move_data():
122 |     N = 19
123 |     Nfeat = 21
124 |     feature_maker = Features.make_feature_planes_stones_4liberties_4history_ko_4captures
125 |     rank_allowed = lambda rank: rank in [ '1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d', '11d' ]
126 | 
127 |     for set_name in ['train', 'val', 'test']:
128 |         with open('/home/greg/coding/ML/go/NN/data/GoGoD/bad_sgfs/bad_sgfs.txt', 'r') as f:
129 |             excluded_sgfs = [line.strip() for line in f.readlines()]
130 |             print "excluded_sgfs =\n", excluded_sgfs
131 |         base_dir = "/home/greg/coding/ML/go/NN/data/GoGoD/sets/%s" % set_name
132 |         sgfs = [os.path.join(base_dir, sub_dir, fn) for sub_dir in os.listdir(base_dir) for fn in os.listdir(os.path.join(base_dir, sub_dir)) if fn not in excluded_sgfs]
133 |         out_dir = "/home/greg/coding/ML/go/NN/data/GoGoD/move_examples/stones_4lib_4hist_ko_4cap_Nf21/%s" % set_name
134 |         make_move_prediction_data(sgfs, N, Nfeat, out_dir, feature_maker, rank_allowed)
135 | 
136 |         
137 | 
138 | if __name__ == "__main__":
139 |     #test_feature_planes()
140 |     #test_minibatch_read_write()
141 |     #test_TrainingDataWrite()
142 |     #run_PlaneTester()
143 |     
144 |     #make_KGS_move_data()
145 |     make_GoGoD_move_data()
146 |     #make_CGOS9x9_training_data()
147 |     
148 |     #import cProfile
149 |     #cProfile.run('make_KGS_training_data()', sort='cumtime')
150 | 
151 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/MirrorEngine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import numpy as np
 3 | from Engine import *
 4 | from Board import *
 5 | 
 6 | class MirrorEngine(BaseEngine):
 7 |     def __init__(self):
 8 |         super(BaseEngine,self).__init__() 
 9 |         self.last_opponent_play = None
10 | 
11 |     def name(self):
12 |         return "MirrorEngine"
13 | 
14 |     def version(self):
15 |         return "1.0"
16 | 
17 |     def stone_played(self, x, y, color):
18 |         super(MirrorEngine, self).stone_played(x, y, color)
19 |         self.last_opponent_play = (x,y)
20 | 
21 |     def pick_move(self, color):
22 |         if not self.opponent_passed and self.last_opponent_play:
23 |             mirror_x = self.board.N - self.last_opponent_play[0] - 1
24 |             mirror_y = self.board.N - self.last_opponent_play[1] - 1
25 |             if self.board.play_is_legal(mirror_x, mirror_y, color):
26 |                 return (mirror_x, mirror_y)
27 | 
28 |         enemy_stones = (self.board.vertices == flipped_color[color])
29 |         our_stones = (self.board.vertices == color)
30 |         rot_enemy_stones = np.rot90(enemy_stones, 2)
31 | 
32 |         play_vertices = np.logical_and(rot_enemy_stones, np.logical_not(our_stones))
33 |         play_vertices =  np.logical_and(play_vertices, np.logical_not(enemy_stones))
34 | 
35 |         for x in xrange(self.board.N):
36 |             for y in xrange(self.board.N):
37 |                 if play_vertices[x,y] and self.board.play_is_legal(x, y, color):
38 |                     return (x,y)
39 | 
40 |         center = (self.board.N/2, self.board.N/2)
41 |         if self.board[center] == Color.Empty and self.board.play_is_legal(center[0], center[1], color):
42 |             return center
43 | 
44 |         return None
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     import sys
49 |     import os
50 |     from GTP import GTP
51 | 
52 |     # Redirect stuff that would normally go to stdout
53 |     # and stderr to a file.
54 |     fclient = sys.stdout
55 |     logfile = "log_mirror.txt"
56 |     sys.stdout = sys.stderr = open(logfile, 'w', 0) # 0 = unbuffered
57 | 
58 |     engine = MirrorEngine()
59 |     gtp = GTP(engine, fclient)
60 |     gtp.loop()
61 | 
62 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/MoveTraining.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import random
 4 | import Symmetry
 5 | 
 6 | def apply_random_symmetries(many_feature_planes, many_move_arrs):
 7 |     N = many_feature_planes.shape[1]
 8 |     for i in range(many_feature_planes.shape[0]):
 9 |         s = random.randint(0, 7)
10 |         Symmetry.apply_symmetry_planes(many_feature_planes[i,:,:,:], s)
11 |         Symmetry.apply_symmetry_vertex(many_move_arrs[i,:], N, s)
12 | 
13 | 
14 | def build_feed_dict(loader, apply_normalization, feature_planes, move_indices):
15 |     batch = loader.next_minibatch(('feature_planes', 'moves')  )
16 |     loaded_feature_planes = batch['feature_planes'].astype(np.float32)
17 |     loaded_move_arrs = batch['moves'].astype(np.int32) # BIT ME HARD.
18 | 
19 |     apply_normalization(loaded_feature_planes)
20 | 
21 |     apply_random_symmetries(loaded_feature_planes, loaded_move_arrs)
22 | 
23 |     N = loaded_feature_planes.shape[1]
24 |     loaded_move_indices = N * loaded_move_arrs[:,0] + loaded_move_arrs[:,1] 
25 | 
26 |     return { feature_planes: loaded_feature_planes.astype(np.float32),
27 |              move_indices: loaded_move_indices }
28 | 
29 | def loss_func(logits):
30 |     move_indices = tf.placeholder(tf.int64, shape=[None])
31 | 
32 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, move_indices)
33 |     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
34 | 
35 |     correct_prediction = tf.equal(tf.argmax(logits,1), move_indices)
36 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
37 |     return move_indices, cross_entropy_mean, accuracy
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/OnlineExampleMaker.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | class QueueWriter:
  4 |     def __init__(self, batch_queue, names, shapes, dtypes, minibatch_size, buffer_len):
  5 |         assert buffer_len >= Nperfile
  6 |         assert len(names) == len(shapes) == len(dtypes)
  7 |         self.batch_queue = batch_queue
  8 |         self.names = names
  9 |         self.shapes = shapes
 10 |         self.dtypes = dtypes
 11 |         self.minibatch_size = minibatch_size
 12 |         self.buffer_len = buffer_len
 13 |         self.examples = []
 14 | 
 15 |     def push_example(self, example):
 16 |         assert len(example) == len(self.names)
 17 |         for i in xrange(len(example)):
 18 |             assert example[i].dtype == self.dtypes[i]
 19 |         self.examples.append(example)
 20 |         if len(self.examples) >= self.buffer_len:
 21 |             self.write_minibatch_to_queue()
 22 | 
 23 |     def write_minibatch_to_queue(self):
 24 |         assert len(self.examples) >= self.minibatch_size
 25 | 
 26 |         # put minibatch_size random examples at the end of the list
 27 |         for i in xrange(self.minibatch_size):
 28 |             a = len(self.examples) - i - 1
 29 |             if a > 0:
 30 |               b = random.randint(0, a-1)
 31 |               self.examples[a], self.examples[b] = self.examples[b], self.examples[a]
 32 | 
 33 |         # pop minibatch_size examples off the end of the list
 34 |         # put each component into a separate numpy batch array
 35 |         save_dict = {}
 36 |         for c,name in enumerate(self.names):
 37 |             batch_shape = (self.minibatch_size,) + self.shapes[c]
 38 |             batch = np.empty(batch_shape, dtype=self.dtypes[c])
 39 |             for i in xrange(self.Nperfile):
 40 |                 batch[i,:] = self.examples[-1-i][c]
 41 |             save_dict[name] = batch
 42 | 
 43 |         del self.examples[-self.Nperfile:]
 44 | 
 45 |         self.batch_queue.put(batch, block=True)
 46 | 
 47 | 
 48 | 
 49 | def make_game_data_eval(sgf, writer, feature_maker, apply_normalization, rank_allowed, komi_allowed):
 50 |     reader = SGFReader(sgf)
 51 | 
 52 |     if not rank_allowed(reader.black_rank) or not rank_allowed(reader.white_rank):
 53 |         print "skipping %s b/c of disallowed rank. ranks are %s, %s" % (sgf, reader.black_rank, reader.white_rank)
 54 |         return
 55 | 
 56 |     if reader.komi == None:
 57 |         print "skiping %s b/c there's no komi given" % sgf
 58 |         return
 59 |     komi = float(reader.komi)
 60 |     if not komi_allowed(komi):
 61 |         print "skipping %s b/c of non-allowed komi \"%s\"" % (sgf, reader.komi)
 62 | 
 63 |     if reader.result == None:
 64 |         print "skipping %s because there's no result given" % sgf
 65 |         return
 66 |     elif "B+" in reader.result:
 67 |         winner = Color.Black
 68 |     elif "W+" in reader.result:
 69 |         winner = Color.White
 70 |     else:
 71 |         print "skipping %s because I can't figure out the winner from \"%s\"" % (sgf, reader.result)
 72 |         return
 73 | 
 74 |     while True:
 75 |         feature_planes = feature_maker(reader.board, reader.next_play_color(), komi)
 76 |         final_score = +1 if reader.next_play_color() == winner else -1
 77 |         final_score_arr = np.array([final_score], dtype=np.int8)
 78 | 
 79 |         feature_planes_normalized = feature_plane.astype(np.float32)
 80 |         apply_normalization(feature_planes_normalized)
 81 | 
 82 |         assert False, "need to add random symmetries and maybe other stuff"
 83 | 
 84 |         writer.push_example((feature_planes_normalized, final_score_arr))
 85 |         if reader.has_more():
 86 |             reader.play_next_move()
 87 |         else:
 88 |             break
 89 | 
 90 | 
 91 | def async_worker_eval(self, batch_queue, sgfs, make_game_data):
 92 |     writer = QueueWriter(batch_queue=batch_queue,
 93 |             names=['feature_planes', 'final_scores'],
 94 |             shapes=[(N,N,Nfeat), (1,)],
 95 |             dtypes=[np.int8, np.int8],
 96 |             minibatch_size=128, buffer_len=50000)
 97 |     while True:
 98 |         random.shuffle(sgfs)
 99 |         for sgf in sgfs:
100 |             make_game_data(sgf, writer)
101 | 
102 | 
103 | 
104 | 
105 | class OnlineExampleQueue:
106 |     def __init__(self, sgfs, make_example):
107 |         base_dir = '/home/greg/coding/ML/go/NN/data/4dKGS/SGFs/train'
108 |         sgfs = []
109 |         for sub_dir in os.listdir(base_dir):
110 |             for fn in os.listdir(os.path.join(base_dir, sub_dir)):
111 |                     sgfs.append(os.path.join(base_dir, sub_dir, fn))
112 | 
113 |         self.q = multiprocessing.Queue(maxsize=5)
114 | 
115 |         make_game_data = functools.partial(make_game_data_eval(
116 |             feature_maker=Features.make_feature_planes_stones_4liberties_4history_ko_4captures_komi, 
117 |             apply_normalization=Normalization.apply_featurewise_normalization_D, 
118 |             rank_allowed=lambda rank: rank in ['1d', '2d', '3d', '4d', '5d', '6d', '7d', '8d', '9d', '10d'], 
119 |             komi_allowed=lambda komi: komi in [0.5, 5.5, 6.5, 7.5])
120 | 
121 |         self.process = multiprocessing.Process(target=async_worker_eval, args=(self.q, sgfs, make_game_data))
122 |         self.process.daemon = True
123 |         self.process.start()
124 | 
125 |     def next_feed_dict(self, feature_planes_ph, final_scores_ph):
126 |         feed_dict_strings = self.q.get(block=True, timeout=5)
127 |         return { feature_planes_ph: feed_dict_strings['feature_planes'],
128 |                  final_scores_ph: feed_dict_strings['final_scores'] }
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Policy.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os.path
 4 | import Checkpoint
 5 | import Features
 6 | import Normalization
 7 | 
 8 | class AllPolicy:
 9 |     def suggest_moves(self, board):
10 |         moves = []
11 |         for x in xrange(board.N):
12 |             for y in xrange(board.N):
13 |                 if board.play_is_legal(x, y, board.color_to_play):
14 |                     moves.append((x,y))
15 |         return moves
16 | 
17 | def softmax(E, temp):
18 |     #print "E =\n", E
19 |     expE = np.exp(temp * (E - max(E))) # subtract max to avoid overflow
20 |     return expE / np.sum(expE)
21 | 
22 | class TFPolicy:
23 |     def __init__(self, model, threshold_prob, softmax_temp):
24 |         self.model = model
25 |         self.threshold_prob = threshold_prob
26 |         self.softmax_temp = softmax_temp
27 | 
28 |         # build the graph
29 |         with tf.Graph().as_default():
30 |             with tf.device('/cpu:0'):
31 |                 self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
32 |                 self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
33 |                 saver = tf.train.Saver(tf.trainable_variables())
34 |                 init = tf.initialize_all_variables()
35 |                 self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
36 |                 self.sess.run(init)
37 |                 checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
38 |                 Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
39 | 
40 |     def suggest_moves(self, board):
41 |         board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(board, board.color_to_play).astype(np.float32)
42 |         Normalization.apply_featurewise_normalization_C(board_feature_planes)
43 |         feed_dict = {self.feature_planes: board_feature_planes.reshape(1,self.model.N,self.model.N,self.model.Nfeat)}
44 |         move_logits = self.sess.run(self.logits, feed_dict).ravel() # ravel flattens to 1D
45 |         # zero out illegal moves
46 |         for x in xrange(self.model.N):
47 |             for y in xrange(self.model.N):
48 |                 ind = self.model.N * x + y 
49 |                 if not board.play_is_legal(x, y, board.color_to_play):
50 |                     move_logits[ind] = -1e99
51 |         move_probs = softmax(move_logits, self.softmax_temp)
52 |         sum_probs = np.sum(move_probs)
53 |         if sum_probs == 0: return [] # no legal moves
54 |         move_probs /= sum_probs # re-normalize probabilities
55 | 
56 |         good_moves = []
57 |         cum_prob = 0.0
58 |         while cum_prob < self.threshold_prob:
59 |             ind = np.argmax(move_probs)
60 |             x,y = ind / self.model.N, ind % self.model.N
61 |             good_moves.append((x,y))
62 |             prob = move_probs[ind]
63 |             cum_prob += prob
64 |             move_probs[ind] = 0
65 | 
66 |         return good_moves
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/SGFReader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Board import *
  4 | 
  5 | READING_NAME = 1
  6 | READING_DATA = 2
  7 | 
  8 | separators = set(['(', ')', ' ', '\n', '\r', '\t', ';'])
  9 | 
 10 | properties_taking_lists = set(['AB', # add black stone (handicap)
 11 |                                'AW', # add white stone (handicap)
 12 |                               ])
 13 | 
 14 | def parse_property_name(file_data, ptr):
 15 |     while file_data[ptr] in separators: 
 16 |         ptr += 1
 17 |         if ptr >= len(file_data): return (None, ptr)
 18 |     name = ''
 19 |     while file_data[ptr] != '[':
 20 |         name += file_data[ptr]
 21 |         ptr += 1
 22 |     return (name, ptr)
 23 | 
 24 | def parse_property_data(file_data, ptr):
 25 |     while file_data[ptr].isspace(): 
 26 |         ptr += 1
 27 |     if file_data[ptr] != '[':
 28 |         return (None, ptr)
 29 |     ptr += 1
 30 |     data = ''
 31 |     while file_data[ptr] != ']':
 32 |         data += file_data[ptr]
 33 |         ptr += 1
 34 |     ptr += 1
 35 |     return (data, ptr)
 36 | 
 37 | def parse_property_data_list(file_data, ptr):
 38 |     data_list = []
 39 |     while True:
 40 |         (data, ptr) = parse_property_data(file_data, ptr)
 41 |         if data == None:
 42 |             return (data_list, ptr)
 43 |         else:
 44 |             data_list.append(data)
 45 | 
 46 | def parse_vertex(s):
 47 |     if len(s) == 0:
 48 |         return None # pass
 49 |     if s == "tt": # GoGoD sometimes uses this to indicate a pass
 50 |         return None # We are sacrificing >19x19 support here
 51 |     x = ord(s[0]) - ord('a')
 52 |     y = ord(s[1]) - ord('a')
 53 |     return (x,y)
 54 | 
 55 | class SGFParser:
 56 |     def __init__(self, filename):
 57 |         with open(filename, 'r') as f: 
 58 |             self.file_data = f.read()
 59 |         self.ptr = 0
 60 | 
 61 |     def __iter__(self):
 62 |         return self
 63 | 
 64 |     def next(self):
 65 |         (property_name, self.ptr) = parse_property_name(self.file_data, self.ptr)
 66 |         if property_name == None:
 67 |             raise StopIteration
 68 |         elif property_name in properties_taking_lists:
 69 |             (property_data, self.ptr) = parse_property_data_list(self.file_data, self.ptr)
 70 |         else:
 71 |             (property_data, self.ptr) = parse_property_data(self.file_data, self.ptr)
 72 |         return (property_name, property_data)
 73 | 
 74 | 
 75 | def test_SGFParser():
 76 |     #sgf = "../data/KGS/SGFs/KGS2001/2000-10-10-1.sgf"
 77 |     sgf = "/home/greg/coding/ML/go/NN/data/GoGoD/modern_games/2007/2007-08-21g.sgf"
 78 |     parser = SGFParser(sgf)
 79 |     for property_name, property_data in parser:
 80 |         print "\"%s\" = \"%s\"" % (property_name, property_data)
 81 | 
 82 | 
 83 | class SGFReader:
 84 |     def __init__(self, filename):
 85 |         self.filename = filename
 86 |         parser = SGFParser(filename)
 87 |         self.initial_stones = []
 88 |         self.moves = []
 89 |         self.black_rank = None
 90 |         self.white_rank = None
 91 |         self.result = None
 92 |         self.board = None
 93 |         self.komi = None
 94 |         for property_name, property_data in parser:
 95 |             if property_name == "SZ": # board size
 96 |                 self.board = Board(int(property_data))
 97 |             elif property_name == "AB": # black initial stones
 98 |                 for vertex_str in property_data:
 99 |                     self.initial_stones.append((parse_vertex(vertex_str), Color.Black))
100 |             elif property_name == "AW": # white initial stones
101 |                 for vertex_str in property_data:
102 |                     self.initial_stones.append((parse_vertex(vertex_str), Color.White))
103 |             elif property_name == "B": # black plays
104 |                 self.moves.append((parse_vertex(property_data), Color.Black))
105 |             elif property_name == "W": # white plays
106 |                 self.moves.append((parse_vertex(property_data), Color.White))
107 |             elif property_name == "BR": # black rank
108 |                 self.black_rank = property_data
109 |             elif property_name == "WR": # white rank
110 |                 self.white_rank = property_data
111 |             elif property_name == "RE": # result
112 |                 self.result = property_data
113 |             elif property_name == "KM": # komi
114 |                 self.komi = property_data
115 | 
116 |         if not self.board:
117 |             self.board = Board(19) # assume 19x19 if we didn't see a size
118 | 
119 |         for (x,y), color in self.initial_stones:
120 |             self.board.play_stone(x, y, color)
121 | 
122 |         self.moves_played = 0
123 | 
124 |     def has_more(self):
125 |         return self.moves_played < len(self.moves)
126 | 
127 |     def peek_next_move(self):
128 |         return self.moves[self.moves_played]
129 | 
130 |     def play_next_move(self):
131 |         move = self.moves[self.moves_played]
132 |         self.moves_played += 1
133 |         vertex, color = move
134 |         if vertex:
135 |             x,y = vertex
136 |             self.board.play_stone(x, y, color)
137 |         else:
138 |             self.board.play_pass()
139 |         return move
140 | 
141 |     def next_play_color(self):
142 |         if self.has_more():
143 |             return self.moves[self.moves_played][1]
144 |         elif self.moves:
145 |             return flipped_color[self.moves[-1][1]]
146 |         elif self.initial_stones:
147 |             return flipped_color[self.initial_stones[-1][1]]
148 |         else:
149 |             return Color.Black
150 | 
151 | 
152 | def test_SGFReader():
153 |     #sgf = "/home/greg/coding/ML/go/NN/data/KGS/SGFs/kgs-19-2008-02-new/2008-02-09-18.sgf"
154 |     sgf = "/home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-07-01c.sgf"
155 |     reader = SGFReader(sgf)
156 | 
157 |     print "initial position:"
158 |     reader.board.show()
159 | 
160 |     while reader.has_more():
161 |         print "before move, next play color is", color_names[reader.next_play_color()]
162 |         vertex, color = reader.play_next_move()
163 |         print "after move", vertex, "by", color_names[color], "board is"
164 |         reader.board.show()
165 |         print "after move, next play color is", color_names[reader.next_play_color()]
166 | 
167 |     print "Game over."
168 | 
169 | if __name__ == "__main__":
170 |     #test_SGFParser()
171 |     test_SGFReader()
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/SelfPlay.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Self play games as used by DeepMind to train AlphaGo's value network. Play a
 4 | # policy against itself, but insert single random move somewhere in the game.
 5 | # Use the position immediately after the random move together with the final
 6 | # game result as a single training example for the value network.
 7 | 
 8 | def run_self_play_game_with_random_move(engine, N=19, komi=7.5):
 9 |     board = Board(N)
10 | 
11 |     engine.clear_board()
12 |     engine.set_board_size(N)
13 |     engine.set_komi(komi)
14 | 
15 |     random_policy = RandomPolicy()
16 | 
17 |     example_feature_planes = None
18 |     example_color_to_play = None
19 | 
20 |     random_move_num = np.randint(0, 450)
21 |     print "random_move_num = ", random_move_num
22 |     move_num = 0
23 |     consecutive_passes = 0
24 |     result = None
25 |     while consecutive_passes < 2:
26 |         if move_num == random_move_num:
27 |             move = random_policy.pick_move(board)
28 |             board.play_move(move)
29 |             engine.move_was_played(move)
30 |             example_color_to_play = board.color_to_play
31 |             print "chose random move (%d,%d) for %s on move #%d" % (move.x, move.y, color_names[example_color_to_play], move_num)
32 |             example_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(board, example_color_to_play)
33 |         else:
34 |             move = engine.generate_move(board)
35 |             if move.is_resign():
36 |                 result = "B+Resign" if board.color_to_play == Color.Black else "W+Resign"
37 |                 break
38 |             elif move.is_pass():
39 |                 consecutive_passes += 1
40 |             else:
41 |                 consecutive_passes = 0
42 |             board.play_move(move)
43 |         move_num += 1
44 | 
45 |     if result == None:
46 |         result = engine.final_score()
47 | 
48 |     print "self play game finished. result is", result
49 | 
50 |     if example_feature_planes != None:
51 |         winner = Color.Black if "B+" in result else Color.White
52 |         example_outcome = +1 if winner == example_color_to_play else -1
53 |         print "produced example with example_outcome = %d" % example_outcome
54 |         return (example_feature_planes, example_outcome)
55 |     else:
56 |         print "game didn't go long enough: no example produced."
57 |         return None
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/Symmetry.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | # in place, hopefully
 5 | def apply_symmetry_features_example(many_planes, i, s):
 6 |     assert len(many_planes.shape) == 4
 7 |     if (s & 1) != 0: # flip x
 8 |         many_planes[i,:,:,:] = many_planes[i,::-1,:,:]
 9 |     if (s & 2) != 0: # flip y
10 |         many_planes[i,:,:,:] = many_planes[i,:,::-1,:]
11 |     if (s & 4) != 0: # swap x and y
12 |         many_planes[i,:,:,:] = many_planes[i,:,:,:].transpose(1, 0, 2)
13 | 
14 | 
15 | def apply_symmetry_planes(planes, s):
16 |     assert len(planes.shape) == 3
17 |     if (s & 1) != 0: # flip x
18 |         np.copyto(planes, planes[::-1,:,:])
19 |     if (s & 2) != 0: # flip y
20 |         np.copyto(planes, planes[:,::-1,:])
21 |     if (s & 4) != 0: # swap x and y
22 |         np.copyto(planes, np.transpose(planes[:,:,:], (1,0,2)))
23 | 
24 | def apply_symmetry_plane(plane, s):
25 |     assert len(plane.shape) == 2
26 |     if (s & 1) != 0: # flip x
27 |         np.copyto(plane, plane[::-1,:])
28 |     if (s & 2) != 0: # flip y
29 |         np.copyto(plane, plane[:,::-1])
30 |     if (s & 4) != 0: # swap x and y
31 |         np.copyto(plane, np.transpose(plane[:,:], (1,0)))
32 | 
33 | def invert_symmetry_plane(plane, s):
34 |     assert len(plane.shape) == 2
35 |     # note reverse order of 4,2,1
36 |     if (s & 4) != 0: # swap x and y
37 |         np.copyto(plane, np.transpose(plane[:,:], (1,0)))
38 |     if (s & 2) != 0: # flip y
39 |         np.copyto(plane, plane[:,::-1])
40 |     if (s & 1) != 0: # flip x
41 |         np.copyto(plane, plane[::-1,:])
42 | 
43 | def apply_symmetry_vertex(vertex, N, s):
44 |     assert vertex.size == 2
45 |     if (s & 1) != 0: # flip x
46 |         vertex[0] = N - vertex[0] - 1
47 |     if (s & 2) != 0: # flip y
48 |         vertex[1] = N - vertex[1] - 1
49 |     if (s & 4) != 0: # swap x and y
50 |         np.copyto(vertex, vertex[::-1])
51 |     assert 0 <= vertex[0] < N
52 |     assert 0 <= vertex[1] < N
53 | 
54 | def get_symmetry_vertex_tuple(vertex, N, s):
55 |     x,y = vertex
56 |     if (s & 1) != 0: # flip x
57 |         x = N - x - 1
58 |     if (s & 2) != 0: # flip y
59 |         y = N - y - 1
60 |     if (s & 4) != 0: # swap x and y
61 |         x,y = y,x
62 |     assert 0 <= x < N
63 |     assert 0 <= y < N
64 |     return (x,y)
65 | 
66 | def get_inverse_symmetry_vertex_tuple(vertex, N, s):
67 |     x,y = vertex
68 |     # note reverse order of 4,2,1
69 |     if (s & 4) != 0: # swap x and y
70 |         x,y = y,x
71 |     if (s & 2) != 0: # flip y
72 |         y = N - y - 1
73 |     if (s & 1) != 0: # flip x
74 |         x = N - x - 1
75 |     assert 0 <= x < N
76 |     assert 0 <= y < N
77 |     return (x,y)
78 | 
79 | def make_symmetry_batch(features):
80 |     assert len(features.shape) == 3
81 |     N = features.shape[0]
82 |     Nfeat = features.shape[2]
83 |     feature_batch = np.empty((8, N, N, Nfeat), dtype=features.dtype)
84 |     for s in xrange(8):
85 |         feature_batch[s,:,:,:] = features
86 |         apply_symmetry_planes(feature_batch[s,:,:,:], s)
87 |     return feature_batch
88 | 
89 | def average_plane_over_symmetries(planes, N):
90 |     assert planes.shape == (8, N*N)
91 |     planes = planes.reshape((8, N, N))
92 |     for s in xrange(8):
93 |         invert_symmetry_plane(planes[s,:,:], s)
94 |     mean_plane = planes.mean(axis=0)
95 |     mean_plane = mean_plane.reshape((N*N,))
96 |     return mean_plane
97 | 
98 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/TFEngine.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import random
  4 | import os
  5 | from Engine import *
  6 | import Book
  7 | import Features
  8 | import Normalization
  9 | import Symmetry
 10 | import Checkpoint
 11 | from GTP import Move, true_stderr
 12 | from Board import *
 13 | 
 14 | def softmax(E, temp):
 15 |     #print "E =\n", E
 16 |     expE = np.exp(temp * (E - max(E))) # subtract max to avoid overflow
 17 |     return expE / np.sum(expE)
 18 | 
 19 | def sample_from(probs):
 20 |     cumsum = np.cumsum(probs)
 21 |     r = random.random()
 22 |     for i in xrange(len(probs)):
 23 |         if r <= cumsum[i]: 
 24 |             return i
 25 |     assert False, "problem with sample_from" 
 26 | 
 27 | 
 28 | class TFEngine(BaseEngine):
 29 |     def __init__(self, eng_name, model):
 30 |         super(TFEngine,self).__init__() 
 31 |         self.eng_name = eng_name
 32 |         self.model = model
 33 |         self.book = Book.load_GoGoD_book()
 34 | 
 35 |         self.last_move_probs = np.zeros((self.model.N, self.model.N,))
 36 |         self.kibitz_mode = False
 37 | 
 38 |         # build the graph
 39 |         with tf.Graph().as_default():
 40 |             with tf.device('/cpu:0'):
 41 |                 self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
 42 |                 self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
 43 |                 saver = tf.train.Saver(tf.trainable_variables())
 44 |                 init = tf.initialize_all_variables()
 45 |                 self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
 46 |                 self.sess.run(init)
 47 |                 checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
 48 |                 Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
 49 | 
 50 | 
 51 |     def name(self):
 52 |         return self.eng_name
 53 | 
 54 |     def version(self):
 55 |         return "1.0"
 56 | 
 57 |     def set_board_size(self, N):
 58 |         if N != self.model.N:
 59 |             return False
 60 |         return BaseEngine.set_board_size(self, N)
 61 | 
 62 |     def pick_book_move(self, color):
 63 |         if self.book:
 64 |             book_move = Book.get_book_move(self.board, self.book)
 65 |             if book_move:
 66 |                 print "playing book move", book_move
 67 |                 return Move(book_move[0], book_move[1])
 68 |             print "no book move"
 69 |         else:
 70 |             print "no book"
 71 |         return None
 72 | 
 73 |     def pick_model_move(self, color):
 74 |         if self.model.Nfeat == 15:
 75 |             board_feature_planes = Features.make_feature_planes_stones_3liberties_4history_ko(self.board, color)
 76 |             Normalization.apply_featurewise_normalization_B(board_feature_planes)
 77 |         elif self.model.Nfeat == 21:
 78 |             board_feature_planes = Features.make_feature_planes_stones_4liberties_4history_ko_4captures(self.board, color).astype(np.float32)
 79 |             Normalization.apply_featurewise_normalization_C(board_feature_planes)
 80 |         else:
 81 |             assert False
 82 |         feature_batch = Symmetry.make_symmetry_batch(board_feature_planes)
 83 | 
 84 |         feed_dict = {self.feature_planes: feature_batch}
 85 | 
 86 |         logit_batch = self.sess.run(self.logits, feed_dict)
 87 |         move_logits = Symmetry.average_plane_over_symmetries(logit_batch, self.model.N)
 88 |         softmax_temp = 1.0
 89 |         move_probs = softmax(move_logits, softmax_temp)
 90 | 
 91 |         # zero out illegal moves
 92 |         for x in xrange(self.model.N):
 93 |             for y in xrange(self.model.N):
 94 |                 ind = self.model.N * x + y 
 95 |                 if not self.board.play_is_legal(x, y, color):
 96 |                     move_probs[ind] = 0
 97 |         sum_probs = np.sum(move_probs)
 98 |         if sum_probs == 0: return Move.Pass() # no legal moves, pass
 99 |         move_probs /= sum_probs # re-normalize probabilities
100 | 
101 |         pick_best = True
102 |         if pick_best:
103 |             move_ind = np.argmax(move_probs)
104 |         else:
105 |             move_ind = sample_from(move_probs)
106 |         move_x = move_ind / self.model.N
107 |         move_y = move_ind % self.model.N
108 | 
109 |         self.last_move_probs = move_probs.reshape((self.board.N, self.board.N))
110 | 
111 |         return Move(move_x, move_y)
112 | 
113 |     def pick_move(self, color):
114 |         book_move = self.pick_book_move(color)
115 |         if book_move:
116 |             if self.kibitz_mode: # in kibitz mode compute model probabilities anyway
117 |                 self.pick_model_move(color) # ignore the model move
118 |             return book_move
119 |         return self.pick_model_move(color)
120 | 
121 |     def get_last_move_probs(self):
122 |         return self.last_move_probs
123 | 
124 |     def stone_played(self, x, y, color):
125 |         # if we are in kibitz mode, we want to compute model probabilities for ALL turns
126 |         if self.kibitz_mode:
127 |             self.pick_model_move(color)
128 |             true_stderr.write("probability of played move %s (%d, %d) was %.2f%%\n" % (color_names[color], x, y, 100*self.last_move_probs[x,y]))
129 | 
130 |         BaseEngine.stone_played(self, x, y, color)
131 | 
132 |     def toggle_kibitz_mode(self):
133 |         self.kibitz_mode = ~self.kibitz_mode
134 |         return self.kibitz_mode
135 | 
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/TreeSearch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import copy
  3 | from Engine import BaseEngine
  4 | from GTP import Move
  5 | 
  6 | # want policy network to influence evaluation????
  7 | # could modify score by policy probability, possibly in a depth-dependent way
  8 | 
  9 | def get_board_after_move(board, move):
 10 |     ret = copy.deepcopy(board)
 11 |     ret.play_stone(move[0], move[1], board.color_to_play)
 12 |     return ret
 13 | 
 14 | def minimax_eval(board, policy, value, depth):
 15 |     if depth == 0:
 16 |         score = value.evaluate(board)
 17 |         print "  "*(3-depth), "leaf node, score =", score
 18 |         return score
 19 | 
 20 |     moves = policy.suggest_moves(board)
 21 |     assert len(moves) > 0
 22 |     best_score = -99
 23 |     for move in moves:
 24 |         next_board = get_board_after_move(board, move)
 25 |         print "  "*(3-depth), "trying move", move
 26 |         score = -1 * minimax_eval(next_board, policy, value, depth-1)
 27 |         print "  "*(3-depth), "move", move, "has score", score
 28 |         if score > best_score: 
 29 |             best_score = score
 30 |     return best_score
 31 | 
 32 | def choose_move_minimax(board, policy, value, depth):
 33 |     assert depth > 0
 34 | 
 35 |     moves = policy.suggest_moves(board)
 36 |     best_score = -99
 37 |     best_move = None
 38 |     for move in moves:
 39 |         next_board = get_board_after_move(board, move)
 40 |         print "minimax root node: trying (%d,%d)..." % (move[0], move[1])
 41 |         score = -1 * minimax_eval(next_board, policy, value, depth-1)
 42 |         print "minimax root node: (%d,%d) gives score %f" % (move[0], move[1], score)
 43 |         if score > best_score: 
 44 |             best_score, best_move = score, move
 45 |     return best_move
 46 | 
 47 | 
 48 | # Return value of position if it's between lower and upper.
 49 | # If it's <= lower, return lower; if it's >= upper return upper.
 50 | def alphabeta_eval(board, policy, value, lower, upper, depth):
 51 |     if depth == 0:
 52 |         score = value.evaluate(board)
 53 |         print "  "*(3-depth), "leaf node, score =", score
 54 |         return score
 55 | 
 56 |     moves = policy.suggest_moves(board)
 57 |     assert len(moves) > 0
 58 |     for move in moves:
 59 |         next_board = get_board_after_move(board, move)
 60 |         print "  "*(3-depth), "trying move", move
 61 |         score = -1 * alphabeta_eval(next_board, policy, value, -upper, -lower, depth-1)
 62 |         print "  "*(3-depth), "move", move, "has score", score
 63 |         if score >= upper: 
 64 |             print "  "*(3-depth), "fail high!"
 65 |             return upper
 66 |         if score > lower:
 67 |             lower = score
 68 |     return lower
 69 | 
 70 | def choose_move_alphabeta(board, policy, value, depth):
 71 |     assert depth > 0
 72 | 
 73 |     moves = policy.suggest_moves(board)
 74 |     lower = -1
 75 |     upper = +1
 76 |     best_move = None
 77 |     for move in moves:
 78 |         next_board = get_board_after_move(board, move)
 79 |         print "alpha-beta root node: trying (%d,%d)..." % (move[0], move[1])
 80 |         score = -1 * alphabeta_eval(next_board, policy, value, -upper, -lower, depth-1)
 81 |         print "alpha-beta root node: (%d,%d) gives score %f" % (move[0], move[1], score)
 82 |         if score > lower:
 83 |             lower, best_move = score, move
 84 |     return best_move
 85 | 
 86 | 
 87 | 
 88 | class TreeSearchEngine(BaseEngine):
 89 |     def __init__(self, policy, value):
 90 |         self.policy = policy
 91 |         self.value = value
 92 |     def name(self):
 93 |         return "TreeSearch"
 94 |     def version(self):
 95 |         return "1.0"
 96 |     def pick_move(self, color):
 97 |         x,y = choose_move_alphabeta(self.board, self.policy, self.value, depth=3)
 98 |         return Move(x,y)
 99 |     def get_position_eval(self):
100 |         return self.value.evaluate(self.board)
101 | 
102 | if __name__ == '__main__':
103 |     import GTP
104 |     fclient = GTP.redirect_all_output("log_engine.txt")
105 | 
106 |     import Policy
107 |     import MoveModels
108 |     import Eval
109 |     import EvalModels
110 | 
111 |     #policy = Policy.AllPolicy()
112 |     policy = Policy.TFPolicy(model=MoveModels.Conv12PosDepELU(N=19, Nfeat=21), threshold_prob=0.8, softmax_temp=1.0)
113 |     value = Eval.TFEval(EvalModels.Conv11PosDepFC1ELU(N=19, Nfeat=21))
114 | 
115 |     engine = TreeSearchEngine(policy, value)
116 |     
117 |     gtp = GTP.GTP(engine, fclient)
118 |     gtp.loop()
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/showcase-studies/go-NN/engine/games_with_illegal_moves_sorted.txt:
--------------------------------------------------------------------------------
 1 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1800-49/1802-01-22a.sgf
 2 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1970-75/1971-06-05a.sgf
 3 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1976-79/1976-09-08a.sgf
 4 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1980/1980-10-08a.sgf
 5 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1982/1982-09-29b.sgf
 6 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1983/1983-08-09b.sgf
 7 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1985/1985-11-07a.sgf
 8 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1988/1988-01-12b.sgf
 9 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1988/1988-10-27a.sgf
10 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1991/1991-07-10b.sgf
11 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1992/1992-06-11a.sgf
12 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1992/1992-12-00c.sgf
13 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1993/1993-06-08b.sgf
14 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1993/1993-06-10d.sgf
15 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1994/1994-06-30f.sgf
16 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-03-12g.sgf
17 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-05-15c.sgf
18 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1995/1995-06-08j.sgf
19 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1996/1996-07-31b.sgf
20 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1997/1997-03-18o.sgf
21 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1997/1997-12-04a.sgf
22 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1998/1998-05-11b.sgf
23 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/1999/1999-04-10h.sgf
24 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-04-05aa.sgf
25 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-04-23a.sgf
26 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2001/2001-05-10aa.sgf
27 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2002/2002-04-25q.sgf
28 | /home/greg/coding/ML/go/NN/data/GoGoD/sets/train/2004/2004-05-10f.sgf
29 | 


--------------------------------------------------------------------------------
/showcase-studies/samuel_checkers/README.md:
--------------------------------------------------------------------------------
 1 | Checkers AI: a look at Arthur Samuel's ideas
 2 | =====================================
 3 | 
 4 | Usage
 5 | ---
 6 | Download project. Navigate to directory. Do `python game.py`, and type in `arthur` when prompted for agent module.
 7 | 
 8 | (Note: to adjust how long the computer player "thinks" about its next move, you can vary the default depth parameter of the look ahead search. Go into `arthur.py` and change `depth=x` parameter of the function `move_function`.)
 9 | 
10 | Files
11 | ---
12 | 
13 |  
14 |  `checkers.py`
15 |  
16 |      This file contains the definition of the CheckerBoard class. Its methods include new game
17 |      initialization, ASCII printed output, and getting legal moves from a given state.
18 |  
19 |  `agent.py`
20 |  
21 |      This file contains the implementation of the AI CheckersAgent class. All that is required
22 |      of a CheckersAgent is that it have a function move_function() that takes in a game state and
23 |      returns a legal move.
24 |  
25 |  `arthur.py`
26 |  
27 |      This file contains the implementation of an agent that is inspired by Arthur Samuel's 
28 |      historic machine learning checkers program.
29 |  
30 |  `game.py`
31 |  
32 |      This file contains the harness for running an actual game of checkers.
33 | 
34 | > Written with [StackEdit](https://stackedit.io/).
35 | 


--------------------------------------------------------------------------------
/showcase-studies/samuel_checkers/agent.py:
--------------------------------------------------------------------------------
 1 | # Andrew Edwards -- almostimplemented.com
 2 | # =======================================
 3 | # A checkers agent class.
 4 | #
 5 | # Last updated: July 21, 2014
 6 | 
 7 | 
 8 | class CheckersAgent:
 9 |     def __init__(self, move_function):
10 |         self.move_function = move_function
11 | 
12 |     def make_move(self, board):
13 |         return self.move_function(board)
14 | 


--------------------------------------------------------------------------------
/showcase-studies/samuel_checkers/random_agent.py:
--------------------------------------------------------------------------------
 1 | # Andrew Edwards -- almostimplemented.com
 2 | # =======================================
 3 | # A checkers agent that picks a random move
 4 | #
 5 | # Last updated: July 21, 2014
 6 | import random
 7 | 
 8 | def move_function(board):
 9 |     return random.choice(board.get_moves())
10 | 


--------------------------------------------------------------------------------
/showcase-studies/samuel_checkers/test.py:
--------------------------------------------------------------------------------
 1 | import checkers
 2 | import agent
 3 | import arthur
 4 | import random_agent
 5 | 
 6 | BLACK, WHITE = 0, 1
 7 | 
 8 | f = open('logfile', 'w')
 9 | 
10 | for i in range(100):
11 |     print "game: " + str(i)
12 |     B = checkers.CheckerBoard()
13 |     cpu_1 = agent.CheckersAgent(lambda board: arthur.move_function(board, 4))
14 |     cpu_2 = agent.CheckersAgent(lambda board: arthur.move_function(board, 6))
15 |     current_player = B.active
16 |     turn = 1
17 |     while not B.is_over():
18 |         f.write(str(B))
19 |         if turn % 100 == 0:
20 |             print "# of turns: " + str(turn)
21 |         B.make_move(cpu_1.make_move(B))
22 |         if B.active == current_player:
23 |             continue
24 |         current_player = B.active
25 |         turn += 1
26 |         while not B.is_over() and B.active == current_player:
27 |             B.make_move(cpu_2.make_move(B))
28 |         current_player = B.active
29 |     if B.active == WHITE:
30 |         print "Congrats Black, you win!"
31 |     else:
32 |         print "Congrats White, you win!"
33 |     print "Game took %i turns" % turn
34 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .env/
3 | logs/
4 | summaries/
5 | models/
6 | *.pyc
7 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/Makefile:
--------------------------------------------------------------------------------
1 | train:
2 | 	python main.py
3 | 
4 | play:
5 | 	python main.py --play --restore


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/README.md:
--------------------------------------------------------------------------------
 1 | # TD-Gammon
 2 | 
 3 | Originally from https://github.com/fomorians/td-gammon
 4 | 
 5 | Implementation of [TD-Gammon](http://www.bkgm.com/articles/tesauro/tdl.html) in TensorFlow.
 6 | 
 7 | Before DeepMind tackled playing Atari games or built AlphaGo there was TD-Gammon, the first algorithm to reach an expert level of play in backgammon. Gerald Tesauro published his paper in 1992 describing TD-Gammon as a neural network trained with reinforcement learning. It is referenced in both Atari and AlphaGo research papers and helped set the groundwork for many of the advancements made in the last few years.
 8 | 
 9 | The code features [eligibility traces](https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node87.html#fig:GDTDl) on the gradients which are an elegant way to assign credit to actions made in the past.
10 | 
11 | ## Training
12 | 
13 | 1. [Install TensorFlow](https://www.tensorflow.org/versions/r0.7/get_started/os_setup.html#pip-installation).
14 | 2. Clone the repo: `git clone https://github.com/fomorians/td-gammon.git && cd td-gammon`
15 | 3. Run training: `python main.py`
16 | 
17 | ## Play
18 | 
19 | To play against a trained model: `python main.py --play --restore`
20 | 
21 | ## Things to try
22 | 
23 | - Compare with and without eligibility traces by replacing the trace with the unmodified gradient.
24 | - Try different activation functions on the hidden layer.
25 | - Expand the board representation. Currently it uses the "compact" representation from the paper. A full board representation should remove some ambiguity between board states.
26 | - Increase the number of turns the agent will look at before making a move. The paper used a 2-ply and 3-ply search while this implementation only uses 1-ply.
27 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/README.md:
--------------------------------------------------------------------------------
1 | Backgammon
2 | ===
3 | 
4 | This is a fork of https://github.com/awni/backgammon.
5 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/backgammon/agents/__init__.py


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/agents/human_agent.py:
--------------------------------------------------------------------------------
 1 | from ..game import Game
 2 | 
 3 | class HumanAgent(object):
 4 |     def __init__(self, player):
 5 |         self.player = player
 6 |         self.name = 'Human'
 7 | 
 8 |     def get_action(self, moves, game=None):
 9 |         if not moves:
10 |             input("No moves for you...(hit enter)")
11 |             return None
12 | 
13 |         while True:
14 |             while True:
15 |                 mv1 = input('Please enter a move "<location start>,<location end>" ("%s" for on the board, "%s" for off the board): ' % (Game.ON, Game.OFF))
16 |                 mv1 = self.get_formatted_move(mv1)
17 |                 if not mv1:
18 |                     print('Bad format enter e.g. "3,4"')
19 |                 else:
20 |                     break
21 | 
22 |             while True:
23 |                 mv2 = input('Please enter a second move (enter to skip): ')
24 |                 if mv2 == '':
25 |                     mv2 = None
26 |                     break
27 |                 mv2 = self.get_formatted_move(mv2)
28 |                 if not mv2:
29 |                     print('Bad format enter e.g. "3,4"')
30 |                 else:
31 |                     break
32 | 
33 |             if mv2:
34 |                 move = (mv1, mv2)
35 |             else:
36 |                 move = (mv1,)
37 | 
38 |             if move in moves:
39 |                 return move
40 |             elif move[::-1] in moves:
41 |                 move = move[::-1]
42 |                 return move
43 |             else:
44 |                 print("You can't play that move")
45 | 
46 |         return None
47 | 
48 |     def get_formatted_move(self, move):
49 |         try:
50 |             start, end = move.split(",")
51 |             if start == Game.ON:
52 |                 return (start, int(end))
53 |             if end == Game.OFF:
54 |                 return (int(start), end)
55 |             return (int(start), int(end))
56 |         except:
57 |             return False
58 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/agents/random_agent.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class RandomAgent(object):
 4 | 
 5 |     def __init__(self, player):
 6 |         self.player = player
 7 |         self.name = 'Random'
 8 | 
 9 |     def get_action(self, moves, game=None):
10 |         return random.choice(list(moves)) if moves else None
11 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/backgammon/agents/td_gammon_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class TDAgent(object):
 4 | 
 5 |     def __init__(self, player, model):
 6 |         self.player = player
 7 |         self.model = model
 8 |         self.name = 'TD-Gammon'
 9 | 
10 |     def get_action(self, actions, game):
11 |         """
12 |         Return best action according to self.evaluationFunction,
13 |         with no lookahead.
14 |         """
15 |         v_best = 0
16 |         a_best = None
17 | 
18 |         for a in actions:
19 |             ateList = game.take_action(a, self.player)
20 |             features = game.extract_features(game.opponent(self.player))
21 |             v = self.model.get_output(features)
22 |             v = 1. - v if self.player == game.players[0] else v
23 |             if v > v_best:
24 |                 v_best = v
25 |                 a_best = a
26 |             game.undo_action(a, self.player, ateList)
27 | 
28 |         return a_best
29 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "checkpoint-178"
2 | all_model_checkpoint_paths: "checkpoint-178"
3 | 


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-178:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-178


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-178.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-178.meta


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-447880:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-447880


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-447880.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-447880.meta


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-7894:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-7894


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/checkpoints/checkpoint-7894.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/showcase-studies/td-gammon/checkpoints/checkpoint-7894.meta


--------------------------------------------------------------------------------
/showcase-studies/td-gammon/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tensorflow as tf
 3 | 
 4 | from model import Model
 5 | 
 6 | flags = tf.app.flags
 7 | FLAGS = flags.FLAGS
 8 | 
 9 | flags.DEFINE_boolean('test', False, 'If true, test against a random strategy.')
10 | flags.DEFINE_boolean('play', False, 'If true, play against a trained TD-Gammon strategy.')
11 | flags.DEFINE_boolean('restore', False, 'If true, restore a checkpoint before training.')
12 | 
13 | model_path = os.environ.get('MODEL_PATH', 'models/')
14 | summary_path = os.environ.get('SUMMARY_PATH', 'summaries/')
15 | checkpoint_path = os.environ.get('CHECKPOINT_PATH', 'checkpoints/')
16 | 
17 | if not os.path.exists(model_path):
18 |     os.makedirs(model_path)
19 | 
20 | if not os.path.exists(checkpoint_path):
21 |     os.makedirs(checkpoint_path)
22 | 
23 | if not os.path.exists(summary_path):
24 |     os.makedirs(summary_path)
25 | 
26 | if __name__ == '__main__':
27 |     graph = tf.Graph()
28 |     sess = tf.Session(graph=graph)
29 |     with sess.as_default(), graph.as_default():
30 |         model = Model(sess, model_path, summary_path, checkpoint_path, restore=FLAGS.restore)
31 |         if FLAGS.test:
32 |             model.test(episodes=1000)
33 |         elif FLAGS.play:
34 |             model.play()
35 |         else:
36 |             model.train()
37 | 


--------------------------------------------------------------------------------
/slides/CNNs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/slides/CNNs.pdf


--------------------------------------------------------------------------------
/slides/tensorflow_intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/slides/tensorflow_intro.pdf


--------------------------------------------------------------------------------
/tf_examples/convnet_mnist.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A Convolutional Network implementation example using TensorFlow library.
  3 | This example is using the MNIST database of handwritten digits
  4 | (http://yann.lecun.com/exdb/mnist/)
  5 | Author: Aymeric Damien
  6 | Project: https://github.com/aymericdamien/TensorFlow-Examples/
  7 | '''
  8 | 
  9 | from __future__ import print_function
 10 | 
 11 | import tensorflow as tf
 12 | 
 13 | # Import MNIST data
 14 | from tensorflow.examples.tutorials.mnist import input_data
 15 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
 16 | 
 17 | # Parameters
 18 | learning_rate = 0.001
 19 | training_iters = 200000
 20 | batch_size = 128
 21 | display_step = 10
 22 | 
 23 | # Network Parameters
 24 | n_input = 784 # MNIST data input (img shape: 28*28)
 25 | n_classes = 10 # MNIST total classes (0-9 digits)
 26 | dropout = 0.75 # Dropout, probability to keep units
 27 | 
 28 | # tf Graph input
 29 | x = tf.placeholder(tf.float32, [None, n_input])
 30 | y = tf.placeholder(tf.float32, [None, n_classes])
 31 | keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
 32 | 
 33 | 
 34 | # Create some wrappers for simplicity
 35 | def conv2d(x, W, b, strides=1):
 36 |     # Conv2D wrapper, with bias and relu activation
 37 |     x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
 38 |     x = tf.nn.bias_add(x, b)
 39 |     return tf.nn.relu(x)
 40 | 
 41 | 
 42 | def maxpool2d(x, k=2):
 43 |     # MaxPool2D wrapper
 44 |     return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
 45 |                           padding='SAME')
 46 | 
 47 | 
 48 | # Create model
 49 | def conv_net(x, weights, biases, dropout):
 50 |     # Reshape input picture
 51 |     x = tf.reshape(x, shape=[-1, 28, 28, 1])
 52 | 
 53 |     # Convolution Layer
 54 |     conv1 = conv2d(x, weights['wc1'], biases['bc1'])
 55 |     # Max Pooling (down-sampling)
 56 |     conv1 = maxpool2d(conv1, k=2)
 57 | 
 58 |     # Convolution Layer
 59 |     conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
 60 |     # Max Pooling (down-sampling)
 61 |     conv2 = maxpool2d(conv2, k=2)
 62 | 
 63 |     # Fully connected layer
 64 |     # Reshape conv2 output to fit fully connected layer input
 65 |     fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
 66 |     fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
 67 |     fc1 = tf.nn.relu(fc1)
 68 |     # Apply Dropout
 69 |     fc1 = tf.nn.dropout(fc1, dropout)
 70 | 
 71 |     # Output, class prediction
 72 |     out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
 73 |     return out
 74 | 
 75 | # Store layers weight & bias
 76 | weights = {
 77 |     # 5x5 conv, 1 input, 32 outputs
 78 |     'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
 79 |     # 5x5 conv, 32 inputs, 64 outputs
 80 |     'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
 81 |     # fully connected, 7*7*64 inputs, 1024 outputs
 82 |     'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
 83 |     # 1024 inputs, 10 outputs (class prediction)
 84 |     'out': tf.Variable(tf.random_normal([1024, n_classes]))
 85 | }
 86 | 
 87 | biases = {
 88 |     'bc1': tf.Variable(tf.random_normal([32])),
 89 |     'bc2': tf.Variable(tf.random_normal([64])),
 90 |     'bd1': tf.Variable(tf.random_normal([1024])),
 91 |     'out': tf.Variable(tf.random_normal([n_classes]))
 92 | }
 93 | 
 94 | # Construct model
 95 | pred = conv_net(x, weights, biases, keep_prob)
 96 | 
 97 | # Define loss and optimizer
 98 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
 99 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
100 | 
101 | # Evaluate model
102 | correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
103 | accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
104 | 
105 | # Initializing the variables
106 | init = tf.initialize_all_variables()
107 | 
108 | # Launch the graph
109 | with tf.Session() as sess:
110 |     sess.run(init)
111 |     step = 1
112 |     # Keep training until reach max iterations
113 |     while step * batch_size < training_iters:
114 |         batch_x, batch_y = mnist.train.next_batch(batch_size)
115 |         # Run optimization op (backprop)
116 |         sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
117 |                                        keep_prob: dropout})
118 |         if step % display_step == 0:
119 |             # Calculate batch loss and accuracy
120 |             loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
121 |                                                               y: batch_y,
122 |                                                               keep_prob: 1.})
123 |             print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
124 |                   "{:.6f}".format(loss) + ", Training Accuracy= " + \
125 |                   "{:.5f}".format(acc))
126 |         step += 1
127 |     print("Optimization Finished!")
128 | 
129 |     # Calculate accuracy for 256 mnist test images
130 |     print("Testing Accuracy:", \
131 |         sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
132 |                                       y: mnist.test.labels[:256],
133 |                                       keep_prob: 1.}))


--------------------------------------------------------------------------------
/tf_examples/linear_regression.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A linear regression learning algorithm example using TensorFlow library.
 3 | Author: Aymeric Damien
 4 | Project: https://github.com/aymericdamien/TensorFlow-Examples/
 5 | '''
 6 | 
 7 | from __future__ import print_function
 8 | 
 9 | import tensorflow as tf
10 | import numpy
11 | import matplotlib.pyplot as plt
12 | rng = numpy.random
13 | 
14 | # Parameters
15 | learning_rate = 0.01
16 | training_epochs = 1000
17 | display_step = 50
18 | 
19 | # Training Data
20 | train_X = numpy.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,
21 |                          7.042,10.791,5.313,7.997,5.654,9.27,3.1])
22 | train_Y = numpy.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,
23 |                          2.827,3.465,1.65,2.904,2.42,2.94,1.3])
24 | n_samples = train_X.shape[0]
25 | 
26 | # tf Graph Input
27 | X = tf.placeholder("float")
28 | Y = tf.placeholder("float")
29 | 
30 | # Set model weights
31 | W = tf.Variable(rng.randn(), name="weight")
32 | b = tf.Variable(rng.randn(), name="bias")
33 | 
34 | # Construct a linear model
35 | pred = tf.add(tf.mul(X, W), b)
36 | 
37 | # Mean squared error
38 | cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
39 | # Gradient descent
40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
41 | 
42 | # Initializing the variables
43 | init = tf.initialize_all_variables()
44 | 
45 | # Launch the graph
46 | with tf.Session() as sess:
47 |     sess.run(init)
48 | 
49 |     # Fit all training data
50 |     for epoch in range(training_epochs):
51 |         for (x, y) in zip(train_X, train_Y):
52 |             sess.run(optimizer, feed_dict={X: x, Y: y})
53 | 
54 |         # Display logs per epoch step
55 |         if (epoch+1) % display_step == 0:
56 |             c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
57 |             print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
58 |                 "W=", sess.run(W), "b=", sess.run(b))
59 | 
60 |     print("Optimization Finished!")
61 |     training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
62 |     print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
63 | 
64 |     # Graphic display
65 |     plt.plot(train_X, train_Y, 'ro', label='Original data')
66 |     plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
67 |     plt.legend()
68 |     plt.show()
69 | 
70 |     # Testing example, as requested (Issue #2)
71 |     test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])
72 |     test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])
73 | 
74 |     print("Testing... (Mean square loss Comparison)")
75 |     testing_cost = sess.run(
76 |         tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]),
77 |         feed_dict={X: test_X, Y: test_Y})  # same function as cost above
78 |     print("Testing cost=", testing_cost)
79 |     print("Absolute mean square loss difference:", abs(
80 |         training_cost - testing_cost))
81 | 
82 |     plt.plot(test_X, test_Y, 'bo', label='Testing data')
83 |     plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
84 |     plt.legend()
85 |     plt.show()


--------------------------------------------------------------------------------
/tf_examples/mlp_mnist.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A Multilayer Perceptron implementation example using TensorFlow library.
 3 | This example is using the MNIST database of handwritten digits
 4 | (http://yann.lecun.com/exdb/mnist/)
 5 | Author: Aymeric Damien
 6 | Project: https://github.com/aymericdamien/TensorFlow-Examples/
 7 | '''
 8 | 
 9 | from __future__ import print_function
10 | 
11 | # Import MNIST data
12 | from tensorflow.examples.tutorials.mnist import input_data
13 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
14 | 
15 | import tensorflow as tf
16 | 
17 | # Parameters
18 | learning_rate = 0.001
19 | training_epochs = 15
20 | batch_size = 100
21 | display_step = 1
22 | 
23 | # Network Parameters
24 | n_hidden_1 = 256 # 1st layer number of features
25 | n_hidden_2 = 256 # 2nd layer number of features
26 | n_input = 784 # MNIST data input (img shape: 28*28)
27 | n_classes = 10 # MNIST total classes (0-9 digits)
28 | 
29 | # tf Graph input
30 | x = tf.placeholder("float", [None, n_input])
31 | y = tf.placeholder("float", [None, n_classes])
32 | 
33 | 
34 | # Create model
35 | def multilayer_perceptron(x, weights, biases):
36 |     # Hidden layer with RELU activation
37 |     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
38 |     layer_1 = tf.nn.relu(layer_1)
39 |     # Hidden layer with RELU activation
40 |     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
41 |     layer_2 = tf.nn.relu(layer_2)
42 |     # Output layer with linear activation
43 |     out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
44 |     return out_layer
45 | 
46 | # Store layers weight & bias
47 | weights = {
48 |     'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
49 |     'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
50 |     'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
51 | }
52 | biases = {
53 |     'b1': tf.Variable(tf.random_normal([n_hidden_1])),
54 |     'b2': tf.Variable(tf.random_normal([n_hidden_2])),
55 |     'out': tf.Variable(tf.random_normal([n_classes]))
56 | }
57 | 
58 | # Construct model
59 | pred = multilayer_perceptron(x, weights, biases)
60 | 
61 | # Define loss and optimizer
62 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
63 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
64 | 
65 | # Initializing the variables
66 | init = tf.initialize_all_variables()
67 | 
68 | # Launch the graph
69 | with tf.Session() as sess:
70 |     sess.run(init)
71 | 
72 |     # Training cycle
73 |     for epoch in range(training_epochs):
74 |         avg_cost = 0.
75 |         total_batch = int(mnist.train.num_examples/batch_size)
76 |         # Loop over all batches
77 |         for i in range(total_batch):
78 |             batch_x, batch_y = mnist.train.next_batch(batch_size)
79 |             # Run optimization op (backprop) and cost op (to get loss value)
80 |             _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
81 |                                                           y: batch_y})
82 |             # Compute average loss
83 |             avg_cost += c / total_batch
84 |         # Display logs per epoch step
85 |         if epoch % display_step == 0:
86 |             print("Epoch:", '%04d' % (epoch+1), "cost=", \
87 |                 "{:.9f}".format(avg_cost))
88 |     print("Optimization Finished!")
89 | 
90 |     # Test model
91 |     correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
92 |     # Calculate accuracy
93 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
94 |     print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))


--------------------------------------------------------------------------------
/theory/MC/MCTS_(English).svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/MC/MCTS_(English).svg.png


--------------------------------------------------------------------------------
/theory/alfa-beta/Selection_087.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/Selection_087.png


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img001.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img001.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img002.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img002.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img003.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img003.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img004.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img004.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img005.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img005.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img006.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img006.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img007.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img007.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img008.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img008.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img009.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img009.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img010.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img010.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img011.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img011.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img012.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img012.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img013.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img013.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img014.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img014.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img015.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img015.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img016.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img016.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img017.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img017.gif


--------------------------------------------------------------------------------
/theory/alfa-beta/tree/img018.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/alfa-beta/tree/img018.gif


--------------------------------------------------------------------------------
/theory/mini-max/Selection_086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michalsustr/RL_workshop/557aca58bd8341fb778c2ac42319311d3093614e/theory/mini-max/Selection_086.png


--------------------------------------------------------------------------------