├── .gitignore
├── MNIST.ipynb
├── Mock AlphaGo (1) Preprocess Pipeline.ipynb
├── Mock AlphaGo (2) Policy Network.ipynb
├── Mock AlphaGo (3) Reinforced Learning.ipynb
├── Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production.ipynb
├── Mock AlphaGo Zero (1) Preprocess Pipeline.ipynb
├── Mock AlphaGo Zero (2) Policy and Value Network.ipynb
├── Mock AlphaGo Zero (3B) Reinforced Learning.ipynb
├── Monitoring.ipynb
├── README.md
├── RocAlphaGo
├── go.pxd
├── go.pyx
├── go_data.pxd
├── go_data.pyx
├── preprocessing.pxd
├── preprocessing.pyx
├── preprocessing_rollout.pxd
└── preprocessing_rollout.pyx
├── game.py
├── modeling.py
├── modelingZero.py
├── setup.py
└── sgfutil.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | ### nohup ###
104 | nohup.out
105 |
106 | ### RocAlphaGo Cython ###
107 | /RocAlphaGo/*.c
108 |
109 |
--------------------------------------------------------------------------------
/Mock AlphaGo (1) Preprocess Pipeline.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Preprocess\n",
8 | " The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor which are 48 feature planes of 19x19 size, according to DeepMind.\n",
9 | "\n",
10 | "| Feature | # of planes | Description\n",
11 | "|--------------|-------------|-------------------------\n",
12 | "| Stone colour | 3 | Player stone / opponent stone / empty\n",
13 | "| Ones | 1 | A constant plane filled with 1\n",
14 | "| Turns since | 8 | How many turns since a move was played\n",
15 | "| Liberties | 8 | Number of liberties (empty adjacent points)\n",
16 | "| Capture size | 8 | How many opponent stones would be captured\n",
17 | "| Self-atari size | 8 | How many of own stones would be captured\n",
18 | "| Liberties after move | 8 | Number of liberties after this move is played\n",
19 | "| Ladder capture | 1 | Whether a move at this point is a successful ladder capture\n",
20 | "| Ladder escape | 1 | Whether a move at this point is a successful ladder escape\n",
21 | "| Sensibleness | 1 | Whether a move is legal and does not fill its own eyes\n",
22 | "| Zeros | 1 | A constant plane filled with 0\n",
23 | "| Player color | 1 | Whether current player is black"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 4,
29 | "metadata": {
30 | "collapsed": true
31 | },
32 | "outputs": [],
33 | "source": [
34 | "from preprocessing import Preprocess\n",
35 | "from go import GameState, BLACK, WHITE, EMPTY\n",
36 | "import os, sgf\n",
37 | "import numpy as np\n",
38 | "\n",
39 | "# input SGF files\n",
40 | "FILE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','go')\n",
41 | "# output archive SGF files\n",
42 | "SUCCEED_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','succeed')\n",
43 | "FAIL_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','fail')\n",
44 | "# output database\n",
45 | "DATA_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','go')\n",
46 | "TRAIN_DATA = os.path.join(DATA_FOLDER,'train_data')\n",
47 | "TEST_DATA = os.path.join(DATA_FOLDER,'test_data')\n",
48 | "\n",
49 | "# BOARD_POSITION contains SGF symbol which represents each row (or column) of the board\n",
50 | "# It can be used to convert between 0,1,2,3... and a,b,c,d...\n",
51 | "# Symbol [tt] or [] represents PASS in SGF, therefore is omitted\n",
52 | "BOARD_POSITION = 'abcdefghijklmnopqrs'\n",
53 | "\n",
54 | "DEFAULT_FEATURES = [\n",
55 | " \"board\", \"ones\", \"turns_since\", \"liberties\", \"capture_size\",\n",
56 | " \"self_atari_size\", \"liberties_after\", \"ladder_capture\", \"ladder_escape\",\n",
57 | " \"sensibleness\", \"zeros\"]"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "## Define DB output\n",
65 | " [LevelDB](http://leveldb.org/) is preferred database because it automatically use [Snappy](https://github.com/google/snappy) to compress the data."
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "from caffe2.python import core, utils\n",
75 | "from caffe2.proto import caffe2_pb2\n",
76 | "\n",
77 | "def write_db(db_type, db_name, base_name, features, labels):\n",
78 | " db = core.C.create_db(db_type, db_name, core.C.Mode.write)\n",
79 | " transaction = db.new_transaction()\n",
80 | " for i in range(features.shape[0]):\n",
81 | " feature_and_label = caffe2_pb2.TensorProtos()\n",
82 | " feature_and_label.protos.extend([\n",
83 | " utils.NumpyArrayToCaffe2Tensor(features[i]),\n",
84 | " utils.NumpyArrayToCaffe2Tensor(labels[i])\n",
85 | " ])\n",
86 | " transaction.put(\n",
87 | " '{}_{:0=3}'.format(base_name,i),\n",
88 | " feature_and_label.SerializeToString())\n",
89 | " # Close the transaction, and then close the db.\n",
90 | " del transaction\n",
91 | " del db"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "## Parse SGF game file\n",
99 | " Parse the SGF file. SGF file use characters a to s to represents line 1 to 19. We convert SGF to Caffe2 Tensor. And write back database in batch of game."
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "#%%capture output\n",
109 | "p = Preprocess(DEFAULT_FEATURES)\n",
110 | "for dirname, subDirList, fileList in os.walk(FILE_FOLDER):\n",
111 | " for filename in fileList:\n",
112 | " with open(os.path.join(dirname, filename)) as f:\n",
113 | " collection = sgf.parse(f.read())\n",
114 | " for game in collection:\n",
115 | " if game.nodes[0].properties['SZ'] == ['19']: # Size of the Board should only be 19x19\n",
116 | " state = GameState() # Initialize GameState\n",
117 | " features = np.empty(shape=(0,48,19,19), dtype=np.int8)\n",
118 | " labels = np.empty(shape=(0,1), dtype=np.uint16)\n",
119 | " try:\n",
120 | " for node in game.nodes[1:]: # Except nodes[0] for game properties\n",
121 | " features = np.append(features, p.state_to_tensor(state).astype(np.int8), axis=0)\n",
122 | " if 'B' in node.properties and len(node.properties['B'][0]) == 2: # Black move\n",
123 | " x = BOARD_POSITION.index(node.properties['B'][0][0])\n",
124 | " y = BOARD_POSITION.index(node.properties['B'][0][1])\n",
125 | " state.do_move(action=(x,y),color = BLACK)\n",
126 | " elif 'W' in node.properties and len(node.properties['W'][0]) == 2: # White move\n",
127 | " x = BOARD_POSITION.index(node.properties['W'][0][0])\n",
128 | " y = BOARD_POSITION.index(node.properties['W'][0][1])\n",
129 | " state.do_move(action=(x,y),color = WHITE)\n",
130 | " labels = np.append(labels, np.asarray([[x * 19 + y]], dtype=np.uint16), axis=0)\n",
131 | " write_db(\n",
132 | " db_type = 'leveldb',\n",
133 | " db_name = TRAIN_DATA, # replace this with TRAIN_DATA or TEST_DATA if you want to separate the dataset\n",
134 | " base_name = os.path.basename(filename),\n",
135 | " features = features,\n",
136 | " labels = labels\n",
137 | " )\n",
138 | " os.rename(f.name,os.path.join(SUCCEED_FOLDER,filename)) # move the file to SUCCEED_FOLDER, so Preprocess can resume after interrupted\n",
139 | " print('{} succeeded'.format(filename))\n",
140 | " except Exception as e:\n",
141 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n",
142 | " print('{} failed dues to {}'.format(filename, e))"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "metadata": {
149 | "collapsed": true
150 | },
151 | "outputs": [],
152 | "source": []
153 | }
154 | ],
155 | "metadata": {
156 | "kernelspec": {
157 | "display_name": "Python 2",
158 | "language": "python",
159 | "name": "python2"
160 | },
161 | "language_info": {
162 | "codemirror_mode": {
163 | "name": "ipython",
164 | "version": 2
165 | },
166 | "file_extension": ".py",
167 | "mimetype": "text/x-python",
168 | "name": "python",
169 | "nbconvert_exporter": "python",
170 | "pygments_lexer": "ipython2",
171 | "version": "2.7.12"
172 | },
173 | "toc": {
174 | "colors": {
175 | "hover_highlight": "#DAA520",
176 | "navigate_num": "#000000",
177 | "navigate_text": "#333333",
178 | "running_highlight": "#FF0000",
179 | "selected_highlight": "#FFD700",
180 | "sidebar_border": "#EEEEEE",
181 | "wrapper_background": "#FFFFFF"
182 | },
183 | "moveMenuLeft": true,
184 | "nav_menu": {
185 | "height": "30px",
186 | "width": "252px"
187 | },
188 | "navigate_menu": true,
189 | "number_sections": true,
190 | "sideBar": true,
191 | "threshold": 4,
192 | "toc_cell": false,
193 | "toc_section_display": "block",
194 | "toc_window_display": false,
195 | "widenNotebook": false
196 | }
197 | },
198 | "nbformat": 4,
199 | "nbformat_minor": 1
200 | }
201 |
--------------------------------------------------------------------------------
/Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production\n",
8 | "In this notebook, we will train the policy network by letting them compete each other according to DeepMind:\n",
9 | "\n",
10 | "> We further trained the policy network by policy gradient reinforcement learning.\n",
11 | "Each iteration consisted of a mini-batch of n games played in parallel, between\n",
12 | "the current policy network $p_\\rho$ that is being trained, and an opponent $p_\\rho-$\n",
13 | "that uses parameters $\\rho^-$ from a previous iteration, randomly sampled from\n",
14 | "a pool $O$ of opponents, so as to increase the stability of training. Weights were\n",
15 | "initialized to $\\rho = \\rho^- = \\sigma$. Every 500 iterations, we added the current\n",
16 | "parameters $\\rho$ to the opponent pool. Each game $i$ in the mini-batch was played\n",
17 | "out until termination at step $T^i$, and then scored to determine the outcome\n",
18 | "$z^i_t = \\pm r(s_{T^i})$ from each player’s perspective. The games were then replayed\n",
19 | "to determine the policy gradient update, $\\Delta\\rho = \\frac{a}{n}\\Sigma^n_{i=1}\n",
20 | "\\Sigma^{T^i}_{t=1}\\frac{\\partial\\log p_\\rho(a^i_t|s^i_t)}{\\partial_\\rho}(z^i_t-v(s^i_t))$, using the REINFORCE \n",
21 | "algorithm with baseline $v(s^i_t)$ for variance reduction. On the first pass \n",
22 | "through the training pipeline, the baseline was set to zero; on the second pass\n",
23 | "we used the value network $v_\\theta(s)$ as a baseline; this provided a small\n",
24 | "performance boost. The policy network was trained in this way for 10,000 \n",
25 | "mini-batches of 128 games, using 50 GPUs, for one day."
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "import os, numpy as np\n",
35 | "from caffe2.python import core, model_helper, workspace, brew, utils\n",
36 | "from caffe2.proto import caffe2_pb2\n",
37 | "from sgfutil import BOARD_POSITION\n",
38 | "\n",
39 | "%matplotlib inline\n",
40 | "from matplotlib import pyplot\n",
41 | "\n",
42 | "# how many games will be run in one minibatch\n",
43 | "GAMES_BATCHES = 16 # [1,infinity) depends on your hardware\n",
44 | "SEARCH_WIDE = 1600 # [1, infinity) for each step, run MCTS to obtain better distribution\n",
45 | "# how many iterations for this tournament\n",
46 | "TOURNAMENT_ITERS = 10000 # [1,infinity)\n",
47 | "\n",
48 | "if workspace.has_gpu_support:\n",
49 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n",
50 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n",
51 | "else :\n",
52 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n",
53 | " print('Running in CPU mode')\n",
54 | "\n",
55 | "arg_scope = {\"order\": \"NCHW\"}\n",
56 | "\n",
57 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','go','param') # folder stores the loss/accuracy log"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "We need to differentiate primary player and sparring partner. Primary player will learn from the game result"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "### Config for primary player\n",
74 | "PRIMARY_WORKSPACE = os.path.join(ROOT_FOLDER, 'primary')\n",
75 | "PRIMARY_CONV_LEVEL = 4\n",
76 | "PRIMARY_FILTERS = 128\n",
77 | "PRIMARY_PRE_TRAINED_ITERS = 1\n",
78 | "# before traning, where to load the params\n",
79 | "PRIMARY_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"RL-conv={}-k={}-iter={}\"\n",
80 | " .format(PRIMARY_CONV_LEVEL,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS))\n",
81 | "BASE_LR = -0.01 # (-0.003,0) The base Learning Rate; 0 to disable it.\n",
82 | "NEGATIVE_BASE_LR = 0.0 # [BASE_LR,0] Dues to multi-class softmax, this param is usually smaller than BASE_LR; 0 to disable it.\n",
83 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n",
84 | "# after training, where to store the params\n",
85 | "PRIMARY_SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"RL-conv={}-k={}-iter={}\"\n",
86 | " .format(PRIMARY_CONV_LEVEL,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))\n",
87 | "if not os.path.exists(PRIMARY_SAVE_FOLDER):\n",
88 | " os.makedirs(PRIMARY_SAVE_FOLDER)\n",
89 | "\n",
90 | "### Config for sparring partner\n",
91 | "SPARR_WORKSPACE = os.path.join(ROOT_FOLDER, 'sparring')\n",
92 | "SPARR_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"conv={}-k={}-iter={}\".format(4,128,1))\n",
93 | "\n",
94 | "print('Training model from {} to {} iterations'.format(PRIMARY_PRE_TRAINED_ITERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "## AlphaGo Neural Network Architecture"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": true
109 | },
110 | "outputs": [],
111 | "source": [
112 | "from modeling import AddConvModel, AddTrainingOperators"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "## Build the actual network"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": true
127 | },
128 | "outputs": [],
129 | "source": [
130 | "import caffe2.python.predictor.predictor_exporter as pe\n",
131 | "\n",
132 | "data = np.empty(shape=(TRAIN_BATCHES,48,19,19), dtype=np.float32)\n",
133 | "label = np.empty(shape=(TRAIN_BATCHES,), dtype=np.int32)"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "### Primary player\n",
141 | ">Train Net: Blob('data','label') ==> Predict Net ==> Loss ==> Backward Propergation"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "workspace.SwitchWorkspace(PRIMARY_WORKSPACE, True)\n",
151 | "# for learning from winner\n",
152 | "with core.DeviceScope(device_opts):\n",
153 | " primary_train_model = model_helper.ModelHelper(name=\"primary_train_model\", arg_scope=arg_scope, init_params=True)\n",
154 | " workspace.FeedBlob(\"data\", data)\n",
155 | " predict = AddConvModel(primary_train_model, \"data\", conv_level=PRIMARY_CONV_LEVEL, filters=PRIMARY_FILTERS)\n",
156 | " workspace.FeedBlob(\"label\", label)\n",
157 | " AddTrainingOperators(primary_train_model, predict, \"label\", None, base_lr=BASE_LR)\n",
158 | " workspace.RunNetOnce(primary_train_model.param_init_net)\n",
159 | " workspace.CreateNet(primary_train_model.net, overwrite=True)\n",
160 | "# for learning from negative examples\n",
161 | "with core.DeviceScope(device_opts):\n",
162 | " primary_train_neg_model = model_helper.ModelHelper(name=\"primary_train_neg_model\", arg_scope=arg_scope, init_params=True)\n",
163 | " workspace.FeedBlob(\"data\", data)\n",
164 | " predict = AddConvModel(primary_train_neg_model, \"data\", conv_level=PRIMARY_CONV_LEVEL, filters=PRIMARY_FILTERS)\n",
165 | " ONES = primary_train_neg_model.ConstantFill([], \"ONES\", shape=[TRAIN_BATCHES,361], value=1.0)\n",
166 | " negative = primary_train_neg_model.Sub([ONES, predict], 'negative')\n",
167 | " workspace.FeedBlob(\"label\", label)\n",
168 | " AddTrainingOperators(primary_train_neg_model, negative, None, expect, base_lr=NEGATIVE_BASE_LR)\n",
169 | " workspace.RunNetOnce(primary_train_neg_model.param_init_net)\n",
170 | " workspace.CreateNet(primary_train_neg_model.net, overwrite=True)\n",
171 | " \n",
172 | " primary_predict_net = pe.prepare_prediction_net(os.path.join(PRIMARY_LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "Function `LearnFromWinner` takes the result of tournament and train primary player with the result."
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": true
187 | },
188 | "outputs": [],
189 | "source": [
190 | "def LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES):\n",
191 | " data = np.empty(shape=(mini_batch,48,19,19), dtype=np.float32)\n",
192 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n",
193 | " #iter = 0\n",
194 | " k = 0\n",
195 | " for i in range(len(winner)):\n",
196 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n",
197 | " for step in history[i]:\n",
198 | " if (step[0] == 'B' and winner[i] == 'B+') or (step[0] == 'W' and winner[i] == 'W+'):\n",
199 | " data[k] = step[2]\n",
200 | " label[k] = step[1]\n",
201 | " k += 1\n",
202 | " #iter += 1\n",
203 | " if k == mini_batch:\n",
204 | " k = 0\n",
205 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n",
206 | " with core.DeviceScope(device_opts):\n",
207 | " workspace.FeedBlob(\"data\", data)\n",
208 | " workspace.FeedBlob(\"label\", label)\n",
209 | " workspace.RunNet(primary_train_model.net)\n",
210 | "\n",
211 | "def LearnFromLosingGames(history, winner, mini_batch=TRAIN_BATCHES):\n",
212 | " data = np.empty(shape=(mini_batch,48,19,19), dtype=np.float32)\n",
213 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n",
214 | " #iter = 0\n",
215 | " k = 0\n",
216 | " for i in range(len(winner)):\n",
217 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n",
218 | " for step in history[i]:\n",
219 | " if (step[0] == 'B' and winner[i] == 'W+') or (step[0] == 'W' and winner[i] == 'B+'):\n",
220 | " data[k] = step[2]\n",
221 | " label[k] = step[1]\n",
222 | " k += 1\n",
223 | " #iter += 1\n",
224 | " if k == mini_batch:\n",
225 | " k = 0\n",
226 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n",
227 | " with core.DeviceScope(device_opts):\n",
228 | " workspace.FeedBlob(\"data\", data)\n",
229 | " workspace.FeedBlob(\"label\", label)\n",
230 | " workspace.RunNet(primary_train_neg_model.net)"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "### Sparring partner\n",
238 | ">Predict Net: Blob('data') ==> Predict Net ==> Blob('softmax')"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "## Run the tournament and training\n",
246 | ">We use a reward function $r(s)$ that is zero for all non-terminal time-steps $t < T$.\n",
247 | "The outcome $z_t = \\pm r(s_T)$ is the terminal reward at the end of the game from the perspective of the\n",
248 | "current player at time-step $t$: $+1$ for winning and $-1$ for losing. Weights are then updated at each\n",
249 | "time-step $t$ by stochastic gradient ascent in the direction that maximizes expected outcome."
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "collapsed": true
257 | },
258 | "outputs": [],
259 | "source": [
260 | "from go import GameState, BLACK, WHITE, EMPTY, PASS\n",
261 | "from preprocessing import Preprocess\n",
262 | "from game import DEFAULT_FEATURES\n",
263 | "from datetime import datetime\n",
264 | "from sgfutil import GetWinner, WriteBackSGF\n",
265 | "import sgf\n",
266 | "\n",
267 | "np.random.seed(datetime.now().microsecond)\n",
268 | "\n",
269 | "# construct the model to be exported\n",
270 | "pe_meta = pe.PredictorExportMeta(\n",
271 | " predict_net=primary_predict_net.Proto(),\n",
272 | " parameters=[str(b) for b in primary_train_model.params],\n",
273 | " inputs=[\"data\"],\n",
274 | " outputs=[\"softmax\"],\n",
275 | ")"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": null,
281 | "metadata": {
282 | "scrolled": true
283 | },
284 | "outputs": [],
285 | "source": [
286 | "for tournament in range(PRIMARY_PRE_TRAINED_ITERS, PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS):\n",
287 | " # Every 500 tournament, copy current player to opponent. i.e. checkpoint\n",
288 | " if tournament > 0 and tournament % 20 == 0:\n",
289 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n",
290 | " print('Checkpoint saved to {}'.format(PRIMARY_SAVE_FOLDER))\n",
291 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+tournament)), pe_meta)\n",
292 | " print('Checkpoint saved to {}'.format(SPARR_LOAD_FOLDER))\n",
293 | " \n",
294 | " # Randomly change color of player\n",
295 | " PRIMARY_PLAYER = np.random.choice(['B','W'])\n",
296 | " if PRIMARY_PLAYER == 'B':\n",
297 | " SPARRING_PLAYER = 'W'\n",
298 | " else:\n",
299 | " SPARRING_PLAYER = 'B'\n",
300 | " \n",
301 | " # Randomly pickup sparring partner\n",
302 | " workspace.SwitchWorkspace(SPARR_WORKSPACE, True)\n",
303 | " sparring_param_file = np.random.choice(os.listdir(SPARR_LOAD_FOLDER))\n",
304 | " with core.DeviceScope(device_opts):\n",
305 | " sparring_predict_net = pe.prepare_prediction_net(os.path.join(SPARR_LOAD_FOLDER, sparring_param_file), \"minidb\")\n",
306 | " print('Tournament {} Primary({}) vs Sparring({}|{}) started @{}'\n",
307 | " .format(tournament, PRIMARY_PLAYER, SPARRING_PLAYER, sparring_param_file, datetime.now()))\n",
308 | "\n",
309 | " \n",
310 | " # Initialize game board and game state\n",
311 | " game_state = [ GameState() for i in range(GAMES_BATCHES) ]\n",
312 | " game_result = [0] * GAMES_BATCHES # 0 - Not Ended; BLACK - Black Wins; WHITE - White Wins\n",
313 | " p = Preprocess(DEFAULT_FEATURES) # Singleton\n",
314 | " history = [ [] for i in range(GAMES_BATCHES) ] # history[n][step] stores tuple of (player, x, y, board[n])\n",
315 | " board = None # The preprocessed board with shape Nx48x19x19\n",
316 | " \n",
317 | " # for each step in all games\n",
318 | " for step in range(0,722):\n",
319 | " \n",
320 | " # Preprocess the board\n",
321 | " board = np.concatenate([p.state_to_tensor(game_state[i]).astype(np.float32) for i in range(GAMES_BATCHES)])\n",
322 | "\n",
323 | " if step % 2 == 0:\n",
324 | " current_player = BLACK\n",
325 | " current_color = 'B'\n",
326 | " else:\n",
327 | " current_player = WHITE\n",
328 | " current_color = 'W'\n",
329 | "\n",
330 | " if step % 2 == (PRIMARY_PLAYER == 'W'): # if step %2 == 0 and Primary is Black, or vice versa.\n",
331 | " # primary player make move\n",
332 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n",
333 | " with core.DeviceScope(device_opts):\n",
334 | " workspace.FeedBlob('data', board)\n",
335 | " workspace.RunNet(primary_predict_net)\n",
336 | " else:\n",
337 | " # sparring partner make move\n",
338 | " workspace.SwitchWorkspace(SPARR_WORKSPACE)\n",
339 | " with core.DeviceScope(device_opts):\n",
340 | " workspace.FeedBlob('data', board)\n",
341 | " workspace.RunNet(sparring_predict_net)\n",
342 | "\n",
343 | " predict = workspace.FetchBlob('softmax') # [0.01, 0.02, ...] in shape (N,361)\n",
344 | "\n",
345 | " for i in range(GAMES_BATCHES):\n",
346 | " if game_result[i]: # game end\n",
347 | " continue\n",
348 | " else: # game not end\n",
349 | " legal_moves = [ x*19+y for (x,y) in game_state[i].get_legal_moves(include_eyes=False)] # [59, 72, ...] in 1D\n",
350 | " if len(legal_moves) > 0: # at least 1 legal move\n",
351 | " probabilities = predict[i][legal_moves] # [0.02, 0.01, ...]\n",
352 | " # use numpy.random.choice to randomize the step,\n",
353 | " # otherwise use np.argmax to get best choice\n",
354 | " # current_choice = legal_moves[np.argmax(probabilities)]\n",
355 | " if np.sum(probabilities) > 0:\n",
356 | " current_choice = np.random.choice(legal_moves, 1, p=probabilities/np.sum(probabilities))[0]\n",
357 | " else:\n",
358 | " current_choice = np.random.choice(legal_moves, 1)[0]\n",
359 | " (x, y) = (current_choice/19, current_choice%19)\n",
360 | " history[i].append((current_color, current_choice, board[i]))\n",
361 | " game_state[i].do_move(action = (x, y), color = current_player) # End of Game?\n",
362 | " #print('game({}) step({}) {} move({},{})'.format(i, step, current_color, x, y))\n",
363 | " else:\n",
364 | " game_state[i].do_move(action = PASS, color = current_player)\n",
365 | " #print('game({}) step({}) {} PASS'.format(i, step, current_color))\n",
366 | " game_result[i] = game_state[i].is_end_of_game\n",
367 | "\n",
368 | " if np.all(game_result):\n",
369 | " break\n",
370 | " \n",
371 | " # Get the winner\n",
372 | " winner = [ GetWinner(game_state[i]) for i in range(GAMES_BATCHES) ] # B+, W+, T\n",
373 | " print('Tournament {} Finished with Primary({}) {}:{} Sparring({}) @{}'.\n",
374 | " format(tournament, PRIMARY_PLAYER, sum(np.char.count(winner, PRIMARY_PLAYER)),\n",
375 | " sum(np.char.count(winner, SPARRING_PLAYER)), SPARRING_PLAYER, datetime.now()))\n",
376 | " \n",
377 | " # Save the games(optional)\n",
378 | " for i in range(GAMES_BATCHES):\n",
379 | " filename = os.path.join(\n",
380 | " os.path.expanduser('~'), 'python', 'tutorial_files','selfplay',\n",
381 | " '({}_{}_{})vs({})_{}_{}_{}'.format(PRIMARY_CONV_LEVEL, PRIMARY_FILTERS, PRIMARY_PRE_TRAINED_ITERS+tournament,\n",
382 | " sparring_param_file, i, winner[i],\n",
383 | " datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S%Z\")))\n",
384 | " WriteBackSGF(winner, history[i], filename)\n",
385 | " \n",
386 | " # After each tournament, learn from the winner\n",
387 | " if BASE_LR != 0:\n",
388 | " LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES)\n",
389 | " \n",
390 | " # And learn from negative examples\n",
391 | " if NEGATIVE_BASE_LR != 0:\n",
392 | " LearnFromLosingGames(history, winner, mini_batch=TRAIN_BATCHES)"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": null,
398 | "metadata": {
399 | "collapsed": true
400 | },
401 | "outputs": [],
402 | "source": [
403 | "if TOURNAMENT_ITERS>0 :\n",
404 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n",
405 | " print('Results saved to {}'.format(PRIMARY_SAVE_FOLDER))\n",
406 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS)), pe_meta)\n",
407 | " print('Results saved to {}'.format(SPARR_LOAD_FOLDER))"
408 | ]
409 | }
410 | ],
411 | "metadata": {
412 | "kernelspec": {
413 | "display_name": "Python 2",
414 | "language": "python",
415 | "name": "python2"
416 | },
417 | "language_info": {
418 | "codemirror_mode": {
419 | "name": "ipython",
420 | "version": 2
421 | },
422 | "file_extension": ".py",
423 | "mimetype": "text/x-python",
424 | "name": "python",
425 | "nbconvert_exporter": "python",
426 | "pygments_lexer": "ipython2",
427 | "version": "2.7.12"
428 | },
429 | "toc": {
430 | "colors": {
431 | "hover_highlight": "#DAA520",
432 | "navigate_num": "#000000",
433 | "navigate_text": "#333333",
434 | "running_highlight": "#FF0000",
435 | "selected_highlight": "#FFD700",
436 | "sidebar_border": "#EEEEEE",
437 | "wrapper_background": "#FFFFFF"
438 | },
439 | "moveMenuLeft": true,
440 | "nav_menu": {
441 | "height": "315px",
442 | "width": "367px"
443 | },
444 | "navigate_menu": true,
445 | "number_sections": true,
446 | "sideBar": true,
447 | "threshold": 4,
448 | "toc_cell": false,
449 | "toc_position": {
450 | "height": "544px",
451 | "left": "0px",
452 | "right": "1723px",
453 | "top": "107px",
454 | "width": "130px"
455 | },
456 | "toc_section_display": "block",
457 | "toc_window_display": true,
458 | "widenNotebook": false
459 | }
460 | },
461 | "nbformat": 4,
462 | "nbformat_minor": 1
463 | }
464 |
--------------------------------------------------------------------------------
/Mock AlphaGo Zero (1) Preprocess Pipeline.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Preprocess\n",
8 | "Neural Network Architecture The input to the neural network is a 19 × 19 × 17 image stack\n",
9 | "comprising 17 binary feature planes. 8 feature planes $X_t$ consist of binary values indicating the\n",
10 | "presence of the current player’s stones ($X_t^i = 1$ if intersection $i$ contains a stone of the player’s\n",
11 | "colour at time-step $t$; $0$ if the intersection is empty, contains an opponent stone, or if $t < 0$). A\n",
12 | "further 8 feature planes, $Y_t$ , represent the corresponding features for the opponent’s stones. The\n",
13 | "final feature plane, $C$, represents the colour to play, and has a constant value of either 1 if black\n",
14 | "is to play or 0 if white is to play. These planes are concatenated together to give input features\n",
15 | "$s_t = [X_t , Y_t , X_{t−1} , Y_{t−1} , ..., X_{t−7} , Y_{t−7} , C]$."
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "from preprocessing import Preprocess\n",
25 | "from go import GameState, BLACK, WHITE, EMPTY\n",
26 | "import os, sgf\n",
27 | "import numpy as np\n",
28 | "\n",
29 | "# input SGF files\n",
30 | "FILE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','go')\n",
31 | "# output archive SGF files\n",
32 | "SUCCEED_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','succeed')\n",
33 | "FAIL_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','fail')\n",
34 | "# output database\n",
35 | "TRAIN_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'train_data')\n",
36 | "TEST_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'test_data')\n",
37 | "\n",
38 | "# Config this to indicate whether it's training or testing data\n",
39 | "DATA_FOLDER = TRAIN_DATA\n",
40 | "\n",
41 | "# BOARD_POSITION contains SGF symbol which represents each row (or column) of the board\n",
42 | "# It can be used to convert between 0,1,2,3... and a,b,c,d...\n",
43 | "# Symbol [tt] or [] represents PASS in SGF, therefore is omitted\n",
44 | "BOARD_POSITION = 'abcdefghijklmnopqrs'\n",
45 | "\n",
46 | "# Only 3 features are needed for AlphaGo Zero\n",
47 | "# 0 - Player Stone, 1 - Opponent Stone, 3 - Current Player Color\n",
48 | "DEFAULT_FEATURES = [\"board\", \"color\"]\n",
49 | "\n",
50 | "# reverse the index of player/opponent\n",
51 | "# 0,2,4,6... are player, 1,3,5,7... are opponent\n",
52 | "OPPONENT_INDEX = [1,0,3,2,5,4,7,6,9,8,11,10,13,12]"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "## Define DB output\n",
60 | " [LevelDB](http://leveldb.org/) is preferred database because it automatically use [Snappy](https://github.com/google/snappy) to compress the data."
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "from caffe2.python import core, utils\n",
70 | "from caffe2.proto import caffe2_pb2\n",
71 | "\n",
72 | "def write_db(db_type, db_name, base_name, features, labels, rewards):\n",
73 | " db = core.C.create_db(db_type, db_name, core.C.Mode.write)\n",
74 | " transaction = db.new_transaction()\n",
75 | " for i in range(features.shape[0]):\n",
76 | " feature_and_label = caffe2_pb2.TensorProtos()\n",
77 | " feature_and_label.protos.extend([\n",
78 | " utils.NumpyArrayToCaffe2Tensor(features[i]),\n",
79 | " utils.NumpyArrayToCaffe2Tensor(labels[i]),\n",
80 | " utils.NumpyArrayToCaffe2Tensor(rewards[i])\n",
81 | " ])\n",
82 | " transaction.put(\n",
83 | " '{}_{:0=3}'.format(base_name,i),\n",
84 | " feature_and_label.SerializeToString())\n",
85 | " # Close the transaction, and then close the db.\n",
86 | " del transaction\n",
87 | " del db"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "## Parse SGF game file\n",
95 | " Parse the SGF file. SGF file use characters a to s to represents line 1 to 19. We convert SGF to Caffe2 Tensor. And write back database in batch of game."
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "#%%capture output\n",
105 | "p = Preprocess(DEFAULT_FEATURES)\n",
106 | "for dirname, subDirList, fileList in os.walk(FILE_FOLDER):\n",
107 | " for filename in fileList:\n",
108 | " with open(os.path.join(dirname, filename)) as f:\n",
109 | " collection = sgf.parse(f.read())\n",
110 | " for game in collection:\n",
111 | " # Size of the Board should only be 19x19, Komi should be 7.5 according to Chinese rule\n",
112 | " if (game.nodes[0].properties['SZ'] == ['19']\n",
113 | "# and game.nodes[0].properties['RU'] == ['Chinese']\n",
114 | "# and game.nodes[0].properties['KM'] == ['7.50']\n",
115 | " ):\n",
116 | " try:\n",
117 | " state = GameState() # Initialize GameState\n",
118 | " features = np.empty(shape=(0,17,19,19), dtype=np.int8)\n",
119 | " feature_history = np.zeros(shape=(1,17,19,19), dtype=np.int8)\n",
120 | " labels = np.empty(shape=(0,), dtype=np.int32)\n",
121 | " rewards = np.empty(shape=(0,), dtype=np.float32)\n",
122 | " result = 'B' if game.nodes[0].properties['RE'][0:2] == ['B+'] else 'W'\n",
123 | " for node in game.nodes[1:]: # Except nodes[0] for game properties\n",
124 | " feature_current = p.state_to_tensor(state).astype(np.int8) # Player/Opponent/Empty/Color\n",
125 | " feature_history = np.concatenate((feature_current[0:1,0:2], # Xt, Yt\n",
126 | " feature_history[0:1,OPPONENT_INDEX],\n",
127 | " feature_current[0:1,3:4]), # Color\n",
128 | " axis=1)\n",
129 | " if 'B' in node.properties and len(node.properties['B'][0]) == 2: # Black move\n",
130 | " x = BOARD_POSITION.index(node.properties['B'][0][0])\n",
131 | " y = BOARD_POSITION.index(node.properties['B'][0][1])\n",
132 | " state.do_move(action=(x,y),color = BLACK)\n",
133 | " elif 'W' in node.properties and len(node.properties['W'][0]) == 2: # White move\n",
134 | " x = BOARD_POSITION.index(node.properties['W'][0][0])\n",
135 | " y = BOARD_POSITION.index(node.properties['W'][0][1])\n",
136 | " state.do_move(action=(x,y),color = WHITE)\n",
137 | " reward = np.asarray([1.0 if result in node.properties else -1.0], dtype=np.float32)\n",
138 | " features = np.append(features, feature_history, axis=0)\n",
139 | " labels = np.append(labels, np.asarray([x * 19 + y], dtype=np.int32), axis=0)\n",
140 | " rewards = np.append(rewards, reward, axis=0)\n",
141 | " write_db(\n",
142 | " db_type = 'leveldb',\n",
143 | " db_name = DATA_FOLDER, # replace this with TRAIN_DATA or TEST_DATA if you want to separate the dataset\n",
144 | " base_name = os.path.basename(filename),\n",
145 | " features = features,\n",
146 | " labels = labels,\n",
147 | " rewards = rewards\n",
148 | " )\n",
149 | " os.rename(f.name,os.path.join(SUCCEED_FOLDER,filename)) # move the file to SUCCEED_FOLDER, so Preprocess can resume after interrupted\n",
150 | " print('{} succeeded'.format(filename))\n",
151 | " except Exception as e:\n",
152 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n",
153 | " print('{} failed dues to {}'.format(filename, e))\n",
154 | " else:\n",
155 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n",
156 | " print('{} unqualified dues to Size, Rule or Komi'.format(filename))"
157 | ]
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 2",
163 | "language": "python",
164 | "name": "python2"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 2
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython2",
176 | "version": "2.7.12"
177 | },
178 | "toc": {
179 | "colors": {
180 | "hover_highlight": "#DAA520",
181 | "navigate_num": "#000000",
182 | "navigate_text": "#333333",
183 | "running_highlight": "#FF0000",
184 | "selected_highlight": "#FFD700",
185 | "sidebar_border": "#EEEEEE",
186 | "wrapper_background": "#FFFFFF"
187 | },
188 | "moveMenuLeft": true,
189 | "nav_menu": {
190 | "height": "30px",
191 | "width": "252px"
192 | },
193 | "navigate_menu": true,
194 | "number_sections": true,
195 | "sideBar": true,
196 | "threshold": 4,
197 | "toc_cell": false,
198 | "toc_section_display": "block",
199 | "toc_window_display": false,
200 | "widenNotebook": false
201 | }
202 | },
203 | "nbformat": 4,
204 | "nbformat_minor": 1
205 | }
206 |
--------------------------------------------------------------------------------
/Mock AlphaGo Zero (2) Policy and Value Network.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Mock AlphaGo Zero (2) - Policy and Value Network\n",
8 | "In this notebook, we will build the model of AlphaGo Zero's Policy and Value Network, which is based on ResNet.\n",
9 | "\n",
10 | "Supervised Learning For comparison, we also trained neural network parameters $\\theta_{SL}$ by super-\n",
11 | "vised learning. The neural network architecture was identical to AlphaGo Zero. Mini-batches of\n",
12 | "data $(s,\\pi,z)$ were sampled at random from the KGS data-set, setting $\\pi_a = 1$ for the human expert\n",
13 | "move a. Parameters were optimised by stochastic gradient descent with momentum and learning\n",
14 | "rate annealing, using the same loss as in Equation 1, but weighting the mean-squared error com-\n",
15 | "ponent by a factor of $0.01$. The learning rate was annealed according to the standard schedule\n",
16 | "in Extended Data Table 3. The momentum parameter was set to $0.9$, and the L2 regularisation\n",
17 | "parameter was set to $c = 10^{−4}$."
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 1,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stderr",
27 | "output_type": "stream",
28 | "text": [
29 | "WARNING:root:This caffe2 python run does not have GPU support. Will run in CPU only mode.\n",
30 | "WARNING:root:Debug message: No module named caffe2_pybind11_state_gpu\n"
31 | ]
32 | },
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "Running in CPU mode\n",
38 | "Training model from 0 to 10000 iterations\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "import os, numpy as np\n",
44 | "from caffe2.python import core, model_helper, workspace, brew, utils\n",
45 | "from caffe2.proto import caffe2_pb2\n",
46 | "\n",
47 | "%matplotlib inline\n",
48 | "from matplotlib import pyplot\n",
49 | "\n",
50 | "NUM_RES_BLOCKS = 2 # [19(alphago zero),39] How many Residual Blocks will be used in the model\n",
51 | "FILTERS = 128 # 128/192/256(alphago zero)/384 How many K will be used in the model\n",
52 | "BASE_LR = -0.1 # (-0.1,0) The base Learning Rate, alphago zero uses -0.1 and times 0.1 every 200K iters\n",
53 | "\n",
54 | "if workspace.has_gpu_support:\n",
55 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n",
56 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n",
57 | "else:\n",
58 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n",
59 | " print('Running in CPU mode')\n",
60 | "\n",
61 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n",
62 | "PRE_TRAINED_ITERS = 0 # [0, infinity) how many batches the model has been trained before\n",
63 | "SKIP_TRAINED_DATA = 0 # [0, infinity) if this is a resumed training, how many input data will be skipped\n",
64 | "TRAIN_ITERS = 10000 # [0, infinity) how many batches the model will be trained\n",
65 | "TEST_BATCHES = 100 # how many samples will be tested within one mini-batch\n",
66 | "TEST_ITERS = 100 # how many batches the model will be tested\n",
67 | "\n",
68 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','param') # folder stores the loss/accuracy log\n",
69 | "TRAIN_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','train_data') # db folder stores the preprocessed games\n",
70 | "TEST_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','test_data') # db folder stores the preprocessed test data\n",
71 | "\n",
72 | "# if this is a resumed training, where to load the init_param from\n",
73 | "LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(NUM_RES_BLOCKS,FILTERS,PRE_TRAINED_ITERS))\n",
74 | "\n",
75 | "# if the model will be saved for future resume training, where to store it\n",
76 | "SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(NUM_RES_BLOCKS,FILTERS,PRE_TRAINED_ITERS+TRAIN_ITERS))\n",
77 | "\n",
78 | "workspace.ResetWorkspace(ROOT_FOLDER)\n",
79 | "\n",
80 | "print('Training model from {} to {} iterations'.format(PRE_TRAINED_ITERS,PRE_TRAINED_ITERS+TRAIN_ITERS))"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "## AlphaGo Neural Network Architecture\n"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "### Data Input\n",
95 | ">The input to the neural network is a 19 × 19 × 17 image stack\n",
96 | "comprising 17 binary feature planes. 8 feature planes $X_t$ consist of binary values indicating the\n",
97 | "presence of the current player’s stones ($X_t^i = 1$ if intersection $i$ contains a stone of the player’s\n",
98 | "colour at time-step $t$; $0$ if the intersection is empty, contains an opponent stone, or if $t < 0$). A\n",
99 | "further 8 feature planes, $Y_t$ , represent the corresponding features for the opponent’s stones. The\n",
100 | "final feature plane, $C$, represents the colour to play, and has a constant value of either 1 if black\n",
101 | "is to play or 0 if white is to play. These planes are concatenated together to give input features\n",
102 | "$s_t = [X_t , Y_t , X_{t−1} , Y_{t−1} , ..., X_{t−7} , Y_{t−7} , C]$."
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 2,
108 | "metadata": {
109 | "collapsed": true
110 | },
111 | "outputs": [],
112 | "source": [
113 | "from modelingZero import AddInput, AddOneHot"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "### DCNN\n",
121 | ">The input features $S_t$ are processed by a residual tower that consists of a single convolutional block followed by either 19 or 39 residual blocks 4. The convolutional block applies the following modules:\n",
122 | "1. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n",
123 | "2. Batch normalisation\n",
124 | "3. A rectifier non-linearity\n",
125 | "\n",
126 | ">Each residual block applies the following modules sequentially to its input:\n",
127 | "1. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n",
128 | "2. Batch normalisation\n",
129 | "3. A rectifier non-linearity\n",
130 | "4. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n",
131 | "5. Batch normalisation\n",
132 | "6. A skip connection that adds the input to the block\n",
133 | "7. A rectifier non-linearity\n",
134 | "\n",
135 | ">The output of the residual tower is passed into two separate “heads” for computing the policy and value respectively. \n",
136 | ">The policy head applies the following modules:\n",
137 | "1. A convolution of 2 filters of kernel size 1 x 1 with stride 1\n",
138 | "2. Batch normalisation\n",
139 | "3. A rectifier non-linearity\n",
140 | "4. A fully connected linear layer that outputs a vector of size 192 + 1 = 362 corresponding to logit probabilities for all intersections and the pass move\n",
141 | "\n",
142 | ">The value head applies the following modules:\n",
143 | "1. A convolution of 1 filter of kernel size 1 x 1 with stride 1\n",
144 | "2. Batch normalisation\n",
145 | "3. A rectifier non-linearity\n",
146 | "4. A fully connected linear layer to a hidden layer of size 256\n",
147 | "5. A rectifier non-linearity\n",
148 | "6. A fully connected linear layer to a scalar\n",
149 | "7. A tanh non-linearity outputting a scalar in the range `[-1; 1]`"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 3,
155 | "metadata": {
156 | "collapsed": true
157 | },
158 | "outputs": [],
159 | "source": [
160 | "from modelingZero import AddResNetModel, AddSoftmax"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "### Accuracy\n",
168 | "Please note predict is 4 dimensional tensor in shape of N x 1 x 19 x 19, and label is 2 dimensional tensor in shape of N x 1."
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 4,
174 | "metadata": {
175 | "collapsed": true
176 | },
177 | "outputs": [],
178 | "source": [
179 | "from modelingZero import AddAccuracy"
180 | ]
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "metadata": {},
185 | "source": [
186 | "### Training Operator (Backward Propagation)\n",
187 | "\n",
188 | "A game terminates at step $T$ when both players pass, when the search value drops below a \n",
189 | "resignation threshold (`10%`), or when the game exceeds a maximum length (`722 steps`); \n",
190 | "the game is then scored to give a final reward of $r_T\\in\\{-1,+1\\}$ (see\n",
191 | "Methods for details). The data for each time-step $t$ is stored as $(s_t, \\pi_t, z_t)$ \n",
192 | "where $z_t = \\pm r_T$ is the game winner from the perspective of the current player at step $t$.\n",
193 | "In parallel (Figure 1b), new network parameters $\\theta_i$ are trained from data $(s,\\pi,z)$\n",
194 | "sampled uniformly among all time-steps of the last iteration(s) of self-play. The neural \n",
195 | "network $(p,v) = f_{\\theta _i}(s)$ is adjusted to minimise the error between the predicted \n",
196 | "value $v$ and the self-play winner $z$, and to maximise the similarity of the neural network \n",
197 | "move probabilities $p$ to the search probabilities $\\pi$. Specifically, the parameters $\\theta$\n",
198 | "are adjusted by gradient descent on a loss function $l$ that sums over mean-squared error and\n",
199 | "cross-entropy losses respectively,\n",
200 | "\n",
201 | ">$(p,v) = f_\\theta(s), l = (z - v)^2 \\pi^T \\log p + c|| \\theta ||^2$"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 5,
207 | "metadata": {
208 | "collapsed": true
209 | },
210 | "outputs": [],
211 | "source": [
212 | "from modelingZero import AddTrainingOperators"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "## Build the actual network"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 6,
225 | "metadata": {
226 | "collapsed": true
227 | },
228 | "outputs": [],
229 | "source": [
230 | "arg_scope = {\"order\": \"NCHW\"}"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "### Train Net"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 7,
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "data": {
247 | "text/plain": [
248 | "True"
249 | ]
250 | },
251 | "execution_count": 7,
252 | "metadata": {},
253 | "output_type": "execute_result"
254 | }
255 | ],
256 | "source": [
257 | "# Skip model only has DBInput to waste the input\n",
258 | "skip_model = model_helper.ModelHelper(name=\"skip_model\", arg_scope=arg_scope, init_params=True)\n",
259 | "_d, _l, _r = AddInput(\n",
260 | " skip_model, batch_size=TRAIN_BATCHES,\n",
261 | " db=TRAIN_DATA,\n",
262 | " db_type='leveldb')\n",
263 | "# Initialize params and create network\n",
264 | "workspace.RunNetOnce(skip_model.param_init_net)\n",
265 | "workspace.CreateNet(skip_model.net, overwrite=True)"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 8,
271 | "metadata": {},
272 | "outputs": [
273 | {
274 | "name": "stderr",
275 | "output_type": "stream",
276 | "text": [
277 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: OneHot.\n",
278 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
279 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
280 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
281 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
282 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
283 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: OneHot.\n"
284 | ]
285 | },
286 | {
287 | "data": {
288 | "text/plain": [
289 | "True"
290 | ]
291 | },
292 | "execution_count": 8,
293 | "metadata": {},
294 | "output_type": "execute_result"
295 | }
296 | ],
297 | "source": [
298 | "# Train Net: DBInput ==> Predict Net ==> Loss ==> Backward Propergation\n",
299 | "with core.DeviceScope(device_opts):\n",
300 | " train_model = model_helper.ModelHelper(name=\"policy_train\", arg_scope=arg_scope, init_params=True)\n",
301 | " data, label, reward = AddInput(\n",
302 | " train_model, batch_size=TRAIN_BATCHES,\n",
303 | " db=TRAIN_DATA,\n",
304 | " db_type='leveldb')\n",
305 | " onehot = AddOneHot(train_model, label)\n",
306 | " predict, value = AddResNetModel(train_model, data, num_blocks=NUM_RES_BLOCKS, filters=FILTERS, is_test=False)\n",
307 | " softmax = AddSoftmax(train_model, predict)\n",
308 | " AddAccuracy(train_model, softmax, label)\n",
309 | " AddTrainingOperators(train_model, predict, onehot, value, reward, \n",
310 | " base_lr=BASE_LR, policy='fixed') #policy='step', stepsize=200000, gamma=0.1)\n",
311 | "# Initialize params and create network\n",
312 | "workspace.RunNetOnce(train_model.param_init_net)\n",
313 | "workspace.CreateNet(train_model.net, overwrite=True)"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "### Test Net"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 9,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "name": "stderr",
330 | "output_type": "stream",
331 | "text": [
332 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
333 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
334 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
335 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
336 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n"
337 | ]
338 | },
339 | {
340 | "data": {
341 | "text/plain": [
342 | "True"
343 | ]
344 | },
345 | "execution_count": 9,
346 | "metadata": {},
347 | "output_type": "execute_result"
348 | }
349 | ],
350 | "source": [
351 | "# Test Net: DBInput ==> Predict Net ==> Accuracy\n",
352 | "with core.DeviceScope(device_opts):\n",
353 | " test_model = model_helper.ModelHelper(name=\"policy_test\", arg_scope=arg_scope, init_params=False)\n",
354 | " data, label, reward = AddInput(\n",
355 | " test_model, batch_size=TEST_BATCHES,\n",
356 | " db=TEST_DATA,\n",
357 | " db_type='leveldb')\n",
358 | " predict, value = AddResNetModel(test_model, data, num_blocks=NUM_RES_BLOCKS, filters=FILTERS)\n",
359 | " softmax = AddSoftmax(test_model, predict)\n",
360 | " AddAccuracy(test_model, softmax, label)\n",
361 | "# Initialize params and create network\n",
362 | "workspace.RunNetOnce(test_model.param_init_net)\n",
363 | "workspace.CreateNet(test_model.net, overwrite=True)"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "### Deploy Net"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 10,
376 | "metadata": {},
377 | "outputs": [
378 | {
379 | "name": "stderr",
380 | "output_type": "stream",
381 | "text": [
382 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
383 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
384 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
385 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
386 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n"
387 | ]
388 | },
389 | {
390 | "data": {
391 | "text/plain": [
392 | "True"
393 | ]
394 | },
395 | "execution_count": 10,
396 | "metadata": {},
397 | "output_type": "execute_result"
398 | }
399 | ],
400 | "source": [
401 | "# Train Net: Blob('data') ==> Predict Net ==> Blob('predict')\n",
402 | "with core.DeviceScope(device_opts):\n",
403 | " deploy_model = model_helper.ModelHelper(name=\"policy_deploy\", arg_scope=arg_scope, init_params=False)\n",
404 | " predict, value = AddResNetModel(deploy_model, 'data', num_blocks=NUM_RES_BLOCKS, filters=FILTERS)\n",
405 | " AddSoftmax(deploy_model, predict)\n",
406 | "# Initialize params and create network\n",
407 | "workspace.RunNetOnce(deploy_model.param_init_net)\n",
408 | "workspace.CreateNet(deploy_model.net, overwrite=True)"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "## Run the training and testing\n",
416 | "### resume from last training\n",
417 | " Training a dCNN takes quite a long time. To pause-and-resume the training, set the PRE_TRAINED_ITERS so the program will start from where last time it was."
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "metadata": {},
424 | "outputs": [],
425 | "source": [
426 | "#import caffe2.python.predictor.predictor_exporter as pe\n",
427 | "from modelingZero import LoadParams, SaveParams\n",
428 | "# construct the model to be exported\n",
429 | "#pe_meta = pe.PredictorExportMeta(\n",
430 | "# predict_net=deploy_model.net.Proto(),\n",
431 | "# parameters=[str(b) for b in deploy_model.params], \n",
432 | "# inputs=[\"data\"],\n",
433 | "# outputs=[\"softmax\", \"value\"],\n",
434 | "#)\n",
435 | "\n",
436 | "if PRE_TRAINED_ITERS > 0:\n",
437 | " # load the predict net\n",
438 | " with core.DeviceScope(device_opts):\n",
439 | " #deploy_model.net = pe.prepare_prediction_net(os.path.join(LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")\n",
440 | " LoadParams(os.path.join(LOAD_FOLDER, \"policy_model.pb\"))\n",
441 | " print('Params loaded from {}'.format(LOAD_FOLDER))"
442 | ]
443 | },
444 | {
445 | "cell_type": "markdown",
446 | "metadata": {},
447 | "source": [
448 | "### Train the model"
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": null,
454 | "metadata": {
455 | "scrolled": true
456 | },
457 | "outputs": [],
458 | "source": [
459 | "#%%capture output # Jupyter magic command to capture the output\n",
460 | "\n",
461 | "# set the number of iterations and track the accuracy & loss\n",
462 | "accuracy = np.zeros(TRAIN_ITERS)\n",
463 | "loss = np.zeros(TRAIN_ITERS)\n",
464 | "loss1 = np.zeros(TRAIN_ITERS)\n",
465 | "loss2 = np.zeros(TRAIN_ITERS)\n",
466 | "\n",
467 | "if TRAIN_ITERS > 0:\n",
468 | " # skip the data which should not be trained again\n",
469 | " for i in range(SKIP_TRAINED_DATA):\n",
470 | " workspace.RunNet(skip_model.net)\n",
471 | " \n",
472 | " # Now, run the network \n",
473 | " for i in range(0, TRAIN_ITERS):\n",
474 | " workspace.RunNet(train_model.net)\n",
475 | " accuracy[i] = workspace.FetchBlob('accuracy')\n",
476 | " loss[i] = workspace.FetchBlob('loss')\n",
477 | " loss1[i] = workspace.FetchBlob('xent')\n",
478 | " loss2[i] = workspace.FetchBlob('msqrl2')\n",
479 | " # checkpoint every 10000 iterations\n",
480 | " if i > 0 and i % 10000 == 0:\n",
481 | " if not os.path.exists(SAVE_FOLDER):\n",
482 | " os.makedirs(SAVE_FOLDER)\n",
483 | " #pe.save_to_db(\"minidb\", os.path.join(SAVE_FOLDER, \"policy_model_checkpoint_{}.minidb\".format(PRE_TRAINED_ITERS+i)), pe_meta)\n",
484 | " SaveParams(deploy_model, os.path.join(SAVE_FOLDER, \"policy_model_checkpoint_{}.pb\".format(PRE_TRAINED_ITERS+i)))\n",
485 | " print('Checkpoint {} saved to {}'.format(PRE_TRAINED_ITERS+i,SAVE_FOLDER))"
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": null,
491 | "metadata": {},
492 | "outputs": [],
493 | "source": [
494 | "if TRAIN_ITERS > 0:\n",
495 | " # After the execution is done, plot the values.\n",
496 | " pyplot.plot(loss, 'b')\n",
497 | " pyplot.plot(loss1, 'darkgreen')\n",
498 | " pyplot.plot(loss2, 'lightgreen')\n",
499 | " pyplot.plot(accuracy, 'r')\n",
500 | " pyplot.legend(('Loss', 'Accuracy'), loc='upper right')"
501 | ]
502 | },
503 | {
504 | "cell_type": "markdown",
505 | "metadata": {},
506 | "source": [
507 | "### Test the model"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": null,
513 | "metadata": {},
514 | "outputs": [],
515 | "source": [
516 | "test_accuracy = np.zeros(TEST_ITERS)\n",
517 | "for i in range(TEST_ITERS):\n",
518 | " workspace.RunNet(test_model.net)\n",
519 | " test_accuracy[i] = workspace.FetchBlob('accuracy')\n",
520 | "# After the execution is done, let's plot the values.\n",
521 | "pyplot.plot(test_accuracy, 'r')\n",
522 | "pyplot.title('Acuracy over test batches.')\n",
523 | "print('test_accuracy: %f' % test_accuracy.mean())"
524 | ]
525 | },
526 | {
527 | "cell_type": "markdown",
528 | "metadata": {},
529 | "source": [
530 | "### Save the work for future use"
531 | ]
532 | },
533 | {
534 | "cell_type": "code",
535 | "execution_count": null,
536 | "metadata": {},
537 | "outputs": [],
538 | "source": [
539 | "if TRAIN_ITERS > 0:\n",
540 | " if not os.path.exists(SAVE_FOLDER):\n",
541 | " os.makedirs(SAVE_FOLDER)\n",
542 | " # save the model to a file. Use minidb as the file format\n",
543 | " #pe.save_to_db(\"minidb\", os.path.join(SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n",
544 | " SaveParams(deploy_model, os.path.join(SAVE_FOLDER, \"policy_model.pb\"))\n",
545 | " print('Params saved to {}'.format(SAVE_FOLDER))"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": null,
551 | "metadata": {
552 | "collapsed": true
553 | },
554 | "outputs": [],
555 | "source": []
556 | },
557 | {
558 | "cell_type": "code",
559 | "execution_count": null,
560 | "metadata": {
561 | "collapsed": true
562 | },
563 | "outputs": [],
564 | "source": []
565 | }
566 | ],
567 | "metadata": {
568 | "kernelspec": {
569 | "display_name": "Python 2",
570 | "language": "python",
571 | "name": "python2"
572 | },
573 | "language_info": {
574 | "codemirror_mode": {
575 | "name": "ipython",
576 | "version": 2
577 | },
578 | "file_extension": ".py",
579 | "mimetype": "text/x-python",
580 | "name": "python",
581 | "nbconvert_exporter": "python",
582 | "pygments_lexer": "ipython2",
583 | "version": "2.7.12"
584 | },
585 | "toc": {
586 | "colors": {
587 | "hover_highlight": "#DAA520",
588 | "navigate_num": "#000000",
589 | "navigate_text": "#333333",
590 | "running_highlight": "#FF0000",
591 | "selected_highlight": "#FFD700",
592 | "sidebar_border": "#EEEEEE",
593 | "wrapper_background": "#FFFFFF"
594 | },
595 | "moveMenuLeft": true,
596 | "nav_menu": {
597 | "height": "30px",
598 | "width": "252px"
599 | },
600 | "navigate_menu": true,
601 | "number_sections": true,
602 | "sideBar": true,
603 | "threshold": 4,
604 | "toc_cell": false,
605 | "toc_position": {
606 | "height": "856px",
607 | "left": "0px",
608 | "right": "20px",
609 | "top": "107px",
610 | "width": "179px"
611 | },
612 | "toc_section_display": "block",
613 | "toc_window_display": true,
614 | "widenNotebook": false
615 | }
616 | },
617 | "nbformat": 4,
618 | "nbformat_minor": 1
619 | }
620 |
--------------------------------------------------------------------------------
/Mock AlphaGo Zero (3B) Reinforced Learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Mock AlphaGo Zero (3B) Reinforced Learning\n",
8 | "In this notebook, we will train the policy network by letting them compete each other according to DeepMind:"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [
16 | {
17 | "name": "stderr",
18 | "output_type": "stream",
19 | "text": [
20 | "WARNING:root:This caffe2 python run does not have GPU support. Will run in CPU only mode.\n",
21 | "WARNING:root:Debug message: No module named caffe2_pybind11_state_gpu\n"
22 | ]
23 | },
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "Running in CPU mode\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "import os, numpy as np\n",
34 | "from caffe2.python import core, model_helper, workspace, brew, utils\n",
35 | "from caffe2.proto import caffe2_pb2\n",
36 | "from sgfutil import BOARD_POSITION\n",
37 | "\n",
38 | "%matplotlib inline\n",
39 | "from matplotlib import pyplot\n",
40 | "\n",
41 | "# how many games will be run in one minibatch\n",
42 | "GAMES_BATCHES = 16 # [1,infinity) depends on your hardware\n",
43 | "SEARCH_WIDE = 1600 # [1, infinity) for each step, run MCTS to obtain better distribution\n",
44 | "# how many iterations for this tournament\n",
45 | "TOURNAMENT_ITERS = 1 # [1,infinity)\n",
46 | "\n",
47 | "if workspace.has_gpu_support:\n",
48 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n",
49 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n",
50 | "else :\n",
51 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n",
52 | " print('Running in CPU mode')\n",
53 | "\n",
54 | "arg_scope = {\"order\": \"NCHW\"}\n",
55 | "\n",
56 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','param') # folder stores the loss/accuracy log"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 2,
62 | "metadata": {
63 | "collapsed": true
64 | },
65 | "outputs": [],
66 | "source": [
67 | "# Only 3 features are needed for AlphaGo Zero\n",
68 | "# 0 - Player Stone, 1 - Opponent Stone, 3 - Current Player Color\n",
69 | "DEFAULT_FEATURES = [\"board\", \"color\"]\n",
70 | "\n",
71 | "# reverse the index of player/opponent\n",
72 | "# 0,2,4,6... are player, 1,3,5,7... are opponent\n",
73 | "OPPONENT_INDEX = [1,0,3,2,5,4,7,6,9,8,11,10,13,12]"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "We need to differentiate primary player and sparring partner. Primary player will learn from the game result"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 3,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | "Training model from 0 to 1 iterations\n"
93 | ]
94 | }
95 | ],
96 | "source": [
97 | "### Config for primary player\n",
98 | "PRIMARY_WORKSPACE = os.path.join(ROOT_FOLDER, 'primary')\n",
99 | "PRIMARY_RES_BLOCKS = 1 # [1,19(AlphaGo Zero),39]\n",
100 | "PRIMARY_FILTERS = 128 # [128, 192, 256(AlphaGo Zero), 384]\n",
101 | "PRIMARY_PRE_TRAINED_ITERS = 0\n",
102 | "# before traning, where to load the params\n",
103 | "PRIMARY_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"RL-res={}-k={}-iter={}\"\n",
104 | " .format(PRIMARY_RES_BLOCKS,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS))\n",
105 | "BASE_LR = -0.01 # (-0.01,0) The base Learning Rate; 0 to disable it.\n",
106 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n",
107 | "# after training, where to store the params\n",
108 | "PRIMARY_SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"RL-res={}-k={}-iter={}\"\n",
109 | " .format(PRIMARY_RES_BLOCKS,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))\n",
110 | "if not os.path.exists(PRIMARY_SAVE_FOLDER):\n",
111 | " os.makedirs(PRIMARY_SAVE_FOLDER)\n",
112 | "\n",
113 | "### Config for sparring partner\n",
114 | "SPARR_WORKSPACE = os.path.join(ROOT_FOLDER, 'sparring')\n",
115 | "SPARR_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(1,128,1))\n",
116 | "\n",
117 | "print('Training model from {} to {} iterations'.format(PRIMARY_PRE_TRAINED_ITERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "## AlphaGo Neural Network Architecture"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 4,
130 | "metadata": {
131 | "collapsed": true
132 | },
133 | "outputs": [],
134 | "source": [
135 | "from modelingZero import AddResNetModel, AddSoftmax, AddTrainingOperators"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "## Build the actual network"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 5,
148 | "metadata": {
149 | "collapsed": true
150 | },
151 | "outputs": [],
152 | "source": [
153 | "import caffe2.python.predictor.predictor_exporter as pe\n",
154 | "\n",
155 | "data = np.empty(shape=(TRAIN_BATCHES,17,19,19), dtype=np.float32)\n",
156 | "expect = np.empty(shape=(TRAIN_BATCHES,362), dtype=np.float32) # expected distribution of probability\n",
157 | "reward = np.empty(shape=(TRAIN_BATCHES,), dtype=np.float32) # scalar values between [-1,1]"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "### Primary player\n",
165 | ">Train Net: Blob('data','label') ==> Predict Net ==> Loss ==> Backward Propergation"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 6,
171 | "metadata": {},
172 | "outputs": [
173 | {
174 | "name": "stderr",
175 | "output_type": "stream",
176 | "text": [
177 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
178 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n",
179 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
180 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n",
181 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n",
182 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n",
183 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n",
184 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n"
185 | ]
186 | }
187 | ],
188 | "source": [
189 | "workspace.SwitchWorkspace(PRIMARY_WORKSPACE, True)\n",
190 | "\n",
191 | "with core.DeviceScope(device_opts):\n",
192 | " workspace.FeedBlob(\"data\", data)\n",
193 | " workspace.FeedBlob('expect', expect)\n",
194 | " workspace.FeedBlob('reward', reward)\n",
195 | " # for learning from winner\n",
196 | " primary_train_model = model_helper.ModelHelper(name=\"primary_train_model\", arg_scope=arg_scope, init_params=True)\n",
197 | " predict, value = AddResNetModel(primary_train_model, 'data', num_blocks=PRIMARY_RES_BLOCKS, filters=PRIMARY_FILTERS)\n",
198 | " AddTrainingOperators(primary_train_model, predict, None, 'expect', value, 'reward', base_lr=BASE_LR)\n",
199 | " workspace.RunNetOnce(primary_train_model.param_init_net)\n",
200 | " workspace.CreateNet(primary_train_model.net, overwrite=True)\n",
201 | " # \n",
202 | " primary_predict_net = pe.prepare_prediction_net(os.path.join(PRIMARY_LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {},
208 | "source": [
209 | "Function `LearnFromWinner` takes the result of tournament and train primary player with the result."
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 7,
215 | "metadata": {
216 | "collapsed": true
217 | },
218 | "outputs": [],
219 | "source": [
220 | "def LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES):\n",
221 | " data = np.empty(shape=(mini_batch,17,19,19), dtype=np.float32)\n",
222 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n",
223 | " #iter = 0\n",
224 | " k = 0\n",
225 | " for i in range(len(winner)):\n",
226 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n",
227 | " for step in history[i]:\n",
228 | " if (step[0] == 'B' and winner[i] == 'B+') or (step[0] == 'W' and winner[i] == 'W+'):\n",
229 | " data[k] = step[2]\n",
230 | " label[k] = step[1]\n",
231 | " k += 1\n",
232 | " #iter += 1\n",
233 | " if k == mini_batch:\n",
234 | " k = 0\n",
235 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n",
236 | " with core.DeviceScope(device_opts):\n",
237 | " workspace.FeedBlob(\"data\", data)\n",
238 | " workspace.FeedBlob(\"label\", label)\n",
239 | " workspace.RunNet(primary_train_model.net)"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "### Sparring partner\n",
247 | " Load on the fly"
248 | ]
249 | },
250 | {
251 | "cell_type": "markdown",
252 | "metadata": {},
253 | "source": [
254 | "## Run the tournament and training\n",
255 | ">We use a reward function $r(s)$ that is zero for all non-terminal time-steps $t < T$.\n",
256 | "The outcome $z_t = \\pm r(s_T)$ is the terminal reward at the end of the game from the perspective of the\n",
257 | "current player at time-step $t$: $+1$ for winning and $-1$ for losing. Weights are then updated at each\n",
258 | "time-step $t$ by stochastic gradient ascent in the direction that maximizes expected outcome."
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 8,
264 | "metadata": {
265 | "collapsed": true
266 | },
267 | "outputs": [],
268 | "source": [
269 | "from go import GameState, BLACK, WHITE, EMPTY, PASS\n",
270 | "from preprocessing import Preprocess\n",
271 | "from datetime import datetime\n",
272 | "from sgfutil import GetWinner, WriteBackSGF\n",
273 | "import sgf\n",
274 | "\n",
275 | "np.random.seed(datetime.now().microsecond)\n",
276 | "\n",
277 | "# construct the model to be exported\n",
278 | "pe_meta = pe.PredictorExportMeta(\n",
279 | " predict_net=primary_predict_net.Proto(),\n",
280 | " parameters=[str(b) for b in primary_train_model.params],\n",
281 | " inputs=[\"data\"],\n",
282 | " outputs=[\"softmax\"],\n",
283 | ")"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 9,
289 | "metadata": {
290 | "scrolled": true
291 | },
292 | "outputs": [
293 | {
294 | "name": "stdout",
295 | "output_type": "stream",
296 | "text": [
297 | "Tournament 0 Primary(B) vs Sparring(W|policy_model.minidb) started @2017-11-01 18:30:04.795610\n",
298 | "Traceback for operator 1 in network policy_deploy_1\n"
299 | ]
300 | },
301 | {
302 | "ename": "RuntimeError",
303 | "evalue": "[enforce fail at conv_op_impl.h:46] C == filter.dim32(1) * group_. Convolution op: input channels does not match: # of input channels 4 is not equal to kernel channels * group:17*1 Error from operator: \ninput: \"pad1\" input: \"conv1_w\" input: \"conv1_b\" output: \"conv1\" name: \"\" type: \"Conv\" arg { name: \"kernel\" i: 3 } arg { name: \"exhaustive_search\" i: 0 } arg { name: \"order\" s: \"NCHW\" } device_option { device_type: 0 cuda_gpu_id: 0 } engine: \"CUDNN\"",
304 | "output_type": "error",
305 | "traceback": [
306 | "\u001b[0;31m\u001b[0m",
307 | "\u001b[0;31mRuntimeError\u001b[0mTraceback (most recent call last)",
308 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDeviceScope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice_opts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0mworkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFeedBlob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mboard\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 50\u001b[0;31m \u001b[0mworkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRunNet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprimary_predict_net\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 51\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# sparring partner make move\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
309 | "\u001b[0;32m/usr/local/caffe2/python/workspace.py\u001b[0m in \u001b[0;36mRunNet\u001b[0;34m(name, num_iter, allow_fail)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWorkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurrent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_last_failed_op_net_position\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0mGetNetName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m \u001b[0mStringifyNetName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_iter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallow_fail\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 225\u001b[0m )\n\u001b[1;32m 226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
310 | "\u001b[0;32m/usr/local/caffe2/python/workspace.py\u001b[0m in \u001b[0;36mCallWithExceptionIntercept\u001b[0;34m(func, op_id_fetcher, net_name, *args, **kwargs)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mCallWithExceptionIntercept\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_id_fetcher\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnet_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0mop_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mop_id_fetcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
311 | "\u001b[0;31mRuntimeError\u001b[0m: [enforce fail at conv_op_impl.h:46] C == filter.dim32(1) * group_. Convolution op: input channels does not match: # of input channels 4 is not equal to kernel channels * group:17*1 Error from operator: \ninput: \"pad1\" input: \"conv1_w\" input: \"conv1_b\" output: \"conv1\" name: \"\" type: \"Conv\" arg { name: \"kernel\" i: 3 } arg { name: \"exhaustive_search\" i: 0 } arg { name: \"order\" s: \"NCHW\" } device_option { device_type: 0 cuda_gpu_id: 0 } engine: \"CUDNN\""
312 | ]
313 | }
314 | ],
315 | "source": [
316 | "for tournament in range(PRIMARY_PRE_TRAINED_ITERS, PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS):\n",
317 | " # Every 500 tournament, copy current player to opponent. i.e. checkpoint\n",
318 | " if tournament > 0 and tournament % 20 == 0:\n",
319 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n",
320 | " print('Checkpoint saved to {}'.format(PRIMARY_SAVE_FOLDER))\n",
321 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+tournament)), pe_meta)\n",
322 | " print('Checkpoint saved to {}'.format(SPARR_LOAD_FOLDER))\n",
323 | " \n",
324 | " # Randomly change color of player\n",
325 | " PRIMARY_PLAYER = np.random.choice(['B','W'])\n",
326 | " if PRIMARY_PLAYER == 'B':\n",
327 | " SPARRING_PLAYER = 'W'\n",
328 | " else:\n",
329 | " SPARRING_PLAYER = 'B'\n",
330 | " \n",
331 | " # Randomly pickup sparring partner\n",
332 | " workspace.SwitchWorkspace(SPARR_WORKSPACE, True)\n",
333 | " sparring_param_file = np.random.choice(os.listdir(SPARR_LOAD_FOLDER))\n",
334 | " with core.DeviceScope(device_opts):\n",
335 | " sparring_predict_net = pe.prepare_prediction_net(os.path.join(SPARR_LOAD_FOLDER, sparring_param_file), \"minidb\")\n",
336 | " print('Tournament {} Primary({}) vs Sparring({}|{}) started @{}'\n",
337 | " .format(tournament, PRIMARY_PLAYER, SPARRING_PLAYER, sparring_param_file, datetime.now()))\n",
338 | "\n",
339 | " \n",
340 | " # Initialize game board and game state\n",
341 | " game_state = [ GameState() for i in range(GAMES_BATCHES) ]\n",
342 | " game_result = [0] * GAMES_BATCHES # 0 - Not Ended; BLACK - Black Wins; WHITE - White Wins\n",
343 | " p = Preprocess(DEFAULT_FEATURES) # Singleton\n",
344 | " history = [ [] for i in range(GAMES_BATCHES) ] # history[n][step] stores tuple of (player, x, y, board[n])\n",
345 | " board = None # The preprocessed board with shape Nx17x19x19\n",
346 | " \n",
347 | " # for each step in all games\n",
348 | " for step in range(0,722):\n",
349 | " \n",
350 | " # Preprocess the board\n",
351 | " board = np.concatenate([p.state_to_tensor(game_state[i]).astype(np.float32) for i in range(GAMES_BATCHES)])\n",
352 | "\n",
353 | " if step % 2 == 0:\n",
354 | " current_player = BLACK\n",
355 | " current_color = 'B'\n",
356 | " else:\n",
357 | " current_player = WHITE\n",
358 | " current_color = 'W'\n",
359 | "\n",
360 | " if step % 2 == (PRIMARY_PLAYER == 'W'): # if step %2 == 0 and Primary is Black, or vice versa.\n",
361 | " # primary player make move\n",
362 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n",
363 | " with core.DeviceScope(device_opts):\n",
364 | " workspace.FeedBlob('data', board)\n",
365 | " workspace.RunNet(primary_predict_net)\n",
366 | " else:\n",
367 | " # sparring partner make move\n",
368 | " workspace.SwitchWorkspace(SPARR_WORKSPACE)\n",
369 | " with core.DeviceScope(device_opts):\n",
370 | " workspace.FeedBlob('data', board)\n",
371 | " workspace.RunNet(sparring_predict_net)\n",
372 | "\n",
373 | " predict = workspace.FetchBlob('softmax') # [0.01, 0.02, ...] in shape (N,361)\n",
374 | "\n",
375 | " for i in range(GAMES_BATCHES):\n",
376 | " if game_result[i]: # game end\n",
377 | " continue\n",
378 | " else: # game not end\n",
379 | " legal_moves = [ x*19+y for (x,y) in game_state[i].get_legal_moves(include_eyes=False)] # [59, 72, ...] in 1D\n",
380 | " if len(legal_moves) > 0: # at least 1 legal move\n",
381 | " probabilities = predict[i][legal_moves] # [0.02, 0.01, ...]\n",
382 | " # use numpy.random.choice to randomize the step,\n",
383 | " # otherwise use np.argmax to get best choice\n",
384 | " # current_choice = legal_moves[np.argmax(probabilities)]\n",
385 | " if np.sum(probabilities) > 0:\n",
386 | " current_choice = np.random.choice(legal_moves, 1, p=probabilities/np.sum(probabilities))[0]\n",
387 | " else:\n",
388 | " current_choice = np.random.choice(legal_moves, 1)[0]\n",
389 | " (x, y) = (current_choice/19, current_choice%19)\n",
390 | " history[i].append((current_color, current_choice, board[i]))\n",
391 | " game_state[i].do_move(action = (x, y), color = current_player) # End of Game?\n",
392 | " #print('game({}) step({}) {} move({},{})'.format(i, step, current_color, x, y))\n",
393 | " else:\n",
394 | " game_state[i].do_move(action = PASS, color = current_player)\n",
395 | " #print('game({}) step({}) {} PASS'.format(i, step, current_color))\n",
396 | " game_result[i] = game_state[i].is_end_of_game\n",
397 | "\n",
398 | " if np.all(game_result):\n",
399 | " break\n",
400 | " \n",
401 | " # Get the winner\n",
402 | " winner = [ GetWinner(game_state[i]) for i in range(GAMES_BATCHES) ] # B+, W+, T\n",
403 | " print('Tournament {} Finished with Primary({}) {}:{} Sparring({}) @{}'.\n",
404 | " format(tournament, PRIMARY_PLAYER, sum(np.char.count(winner, PRIMARY_PLAYER)),\n",
405 | " sum(np.char.count(winner, SPARRING_PLAYER)), SPARRING_PLAYER, datetime.now()))\n",
406 | " \n",
407 | " # Save the games(optional)\n",
408 | " for i in range(GAMES_BATCHES):\n",
409 | " filename = os.path.join(\n",
410 | " os.path.expanduser('~'), 'python', 'tutorial_files','selfplay',\n",
411 | " '({}_{}_{})vs({})_{}_{}_{}'.format(PRIMARY_CONV_LEVEL, PRIMARY_FILTERS, PRIMARY_PRE_TRAINED_ITERS+tournament,\n",
412 | " sparring_param_file, i, winner[i],\n",
413 | " datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S%Z\")))\n",
414 | " WriteBackSGF(winner, history[i], filename)\n",
415 | " \n",
416 | " # After each tournament, learn from the winner\n",
417 | " if BASE_LR != 0:\n",
418 | " LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES)"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": null,
424 | "metadata": {
425 | "collapsed": true
426 | },
427 | "outputs": [],
428 | "source": [
429 | "if TOURNAMENT_ITERS>0 :\n",
430 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n",
431 | " print('Results saved to {}'.format(PRIMARY_SAVE_FOLDER))\n",
432 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS)), pe_meta)\n",
433 | " print('Results saved to {}'.format(SPARR_LOAD_FOLDER))"
434 | ]
435 | }
436 | ],
437 | "metadata": {
438 | "kernelspec": {
439 | "display_name": "Python 2",
440 | "language": "python",
441 | "name": "python2"
442 | },
443 | "language_info": {
444 | "codemirror_mode": {
445 | "name": "ipython",
446 | "version": 2
447 | },
448 | "file_extension": ".py",
449 | "mimetype": "text/x-python",
450 | "name": "python",
451 | "nbconvert_exporter": "python",
452 | "pygments_lexer": "ipython2",
453 | "version": "2.7.12"
454 | },
455 | "toc": {
456 | "colors": {
457 | "hover_highlight": "#DAA520",
458 | "navigate_num": "#000000",
459 | "navigate_text": "#333333",
460 | "running_highlight": "#FF0000",
461 | "selected_highlight": "#FFD700",
462 | "sidebar_border": "#EEEEEE",
463 | "wrapper_background": "#FFFFFF"
464 | },
465 | "moveMenuLeft": true,
466 | "nav_menu": {
467 | "height": "315px",
468 | "width": "367px"
469 | },
470 | "navigate_menu": true,
471 | "number_sections": true,
472 | "sideBar": true,
473 | "threshold": 4,
474 | "toc_cell": false,
475 | "toc_position": {
476 | "height": "544px",
477 | "left": "0px",
478 | "right": "1723px",
479 | "top": "107px",
480 | "width": "130px"
481 | },
482 | "toc_section_display": "block",
483 | "toc_window_display": true,
484 | "widenNotebook": false
485 | }
486 | },
487 | "nbformat": 4,
488 | "nbformat_minor": 1
489 | }
490 |
--------------------------------------------------------------------------------
/Monitoring.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Monitoring Caffe2 Learning Status\n",
8 | " This notebook will help you monitoring Caffe2 learning status."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {
15 | "collapsed": true
16 | },
17 | "outputs": [],
18 | "source": [
19 | "from matplotlib import pyplot\n",
20 | "import numpy as np\n",
21 | "import os\n",
22 | "from StringIO import StringIO\n",
23 | "\n",
24 | "# Let's show all plots inline.\n",
25 | "%matplotlib inline\n",
26 | "\n",
27 | "BASE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'go', 'param')"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "\n",
35 | "## Loss\n",
36 | " This program will first load Loss from log file and then plot them."
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {
43 | "scrolled": false
44 | },
45 | "outputs": [],
46 | "source": [
47 | "loss_t = open(os.path.join(BASE_FOLDER, 'loss.log'), 'r').read()\n",
48 | "\n",
49 | "loss = np.genfromtxt(StringIO(loss_t), usecols=(7))\n",
50 | "\n",
51 | "weight = np.ones(100)/100\n",
52 | "sma = np.convolve(weight, loss)[100:-100]\n",
53 | "\n",
54 | "pyplot.plot(loss, 'b')\n",
55 | "pyplot.plot(sma, 'g')\n",
56 | "pyplot.legend(('Loss', 'SMA'), loc='upper right')\n",
57 | "\n",
58 | "nighty = len(loss)*9/10 # monitor the trend: last 10% of the data\n",
59 | "loss[nighty:].mean()"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "## Training Accuracy\n",
67 | " This program will first load Accuracy from log file, and then plot them."
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {
74 | "scrolled": false
75 | },
76 | "outputs": [],
77 | "source": [
78 | "accuracy_t = open(os.path.join(BASE_FOLDER, 'accuracy.log'), 'r').read()\n",
79 | "#loss_t = open(os.path.join(BASE_FOLDER, 'param', 'loss.log'), 'r').read()\n",
80 | "\n",
81 | "accuracy = np.genfromtxt(StringIO(accuracy_t), usecols=(7))\n",
82 | "#loss = np.genfromtxt(StringIO(loss_t), usecols=(7))\n",
83 | "\n",
84 | "weight = np.ones(100)/100\n",
85 | "\n",
86 | "sma = np.convolve(weight, accuracy)[100:-100]\n",
87 | "\n",
88 | "#pyplot.plot(loss, 'b')\n",
89 | "pyplot.plot(accuracy, 'red')\n",
90 | "pyplot.plot(sma, 'g')\n",
91 | "pyplot.legend(('Accuracy', 'SMA'), loc='upper right')\n",
92 | "\n",
93 | "nighty = len(accuracy)*9/10 # monitor the trend: last 10% of the data\n",
94 | "accuracy[nighty:].mean()"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "# Monitoring Hardware\n",
102 | "## CPU and Memory\n",
103 | " This program will display top processes"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "scrolled": true
111 | },
112 | "outputs": [],
113 | "source": [
114 | "!top -n 1"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {
120 | "collapsed": true
121 | },
122 | "source": [
123 | "## GPU"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "!nvidia-smi"
133 | ]
134 | }
135 | ],
136 | "metadata": {
137 | "kernelspec": {
138 | "display_name": "Python 2",
139 | "language": "python",
140 | "name": "python2"
141 | },
142 | "language_info": {
143 | "codemirror_mode": {
144 | "name": "ipython",
145 | "version": 2
146 | },
147 | "file_extension": ".py",
148 | "mimetype": "text/x-python",
149 | "name": "python",
150 | "nbconvert_exporter": "python",
151 | "pygments_lexer": "ipython2",
152 | "version": "2.7.12"
153 | },
154 | "toc": {
155 | "colors": {
156 | "hover_highlight": "#DAA520",
157 | "navigate_num": "#000000",
158 | "navigate_text": "#333333",
159 | "running_highlight": "#FF0000",
160 | "selected_highlight": "#FFD700",
161 | "sidebar_border": "#EEEEEE",
162 | "wrapper_background": "#FFFFFF"
163 | },
164 | "moveMenuLeft": true,
165 | "nav_menu": {
166 | "height": "105px",
167 | "width": "252px"
168 | },
169 | "navigate_menu": true,
170 | "number_sections": true,
171 | "sideBar": true,
172 | "threshold": 4,
173 | "toc_cell": false,
174 | "toc_position": {
175 | "height": "576px",
176 | "left": "0px",
177 | "right": "auto",
178 | "top": "107px",
179 | "width": "212px"
180 | },
181 | "toc_section_display": "block",
182 | "toc_window_display": false,
183 | "widenNotebook": false
184 | }
185 | },
186 | "nbformat": 4,
187 | "nbformat_minor": 2
188 | }
189 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # C2TutorialsGo
2 | This is a tutorial written for Caffe2 which mocks google AlphaGo Fan and AlphaGO Zero.
3 | v0.2.0 is released, with ResNet based AlphaGo Zero model.
4 |
5 | ## Installation
6 | This program by so far relies on [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) Cython implementation for feature preprocessing and Go rules. Cython compilation can be done by running shell command `python setup.py build_ext --inplace`.
7 |
8 | # New updates from AlphaGo Zero
9 | ## Preprocess
10 | The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor. AlphaGo Zero requires 17 feature planes of 19x19 size, which does not include 'human knowledge' like Liberties or Escape.
11 | [This preprocess program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20Zero%20%281%29%20Preprocess%20Pipeline.ipynb) still relies on [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) for Go rules, but no more dependencies for feature generation. I'm looking for a better(more accurate) Go rule implementation which can support Chinese/Korean/Japanese Go rules and different Komi, please feel free to recommend.
12 |
13 | ## Dual Policy and Value network with ResNet
14 | The Supervised Learning program is used to evaluate whether the network architecture is correct. Due to a bug in Caffe2 spatial_BN op, the program cannot resume from previous run. Since each epoch requires 200~250 GPU hours, thus it's not viable to run it on personal computer.
15 |
16 | | epochs | LR | loss | train/test accu | epochs | LR | loss | train/test accu |
17 | |--------|--------|--------|-----------------|--------|--------|--------|-----------------|
18 | | 0.2 | 0.1 | - | - / 0.1698 | 11 | | | / |
19 | | 0.4 | | | / | 12 | | | / |
20 | | 0.6 | | | / | 13 | | | / |
21 | | 0.8 | | | / | 14 | | | / |
22 | | 1 | | | / | 15 | | | / |
23 | | 6 | | | / | 16 | | | / |
24 | | 7 | | | / | 17 | | | / |
25 | | 8 | | | / | 18 | | | / |
26 | | 9 | | | / | 19 | | | / |
27 | | 10 | | | / | * | | | 0.60/0.57(alphago zero)|
28 |
29 | ## Reinforced Learning pipline
30 | On going. This will be different from AlphaGo Fan in may ways:
31 | 1. Always use the best primary player to generate data.
32 | 2. Before each move, do wide search to obtain better distribution than Policy predict.
33 | 3. MCTS only relies on Policy and Value network, no more Rollout.
34 | 4. more detail will be added during implementation
35 |
36 | # About AlphaGo Fan
37 | ## Preprocess
38 | The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor which are 48 feature planes of 19x19 size, according to [DeepMind](http://www.nature.com/nature/journal/v529/n7587/full/nature16961.html?foxtrotcallback=true).
39 | [The preprocess program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%281%29%20Preprocess%20Pipeline.ipynb) relies on `Cython` implementation of [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) project for Go rules and feature plane generation. It is estimated to take 60 CPU hours for preprocess complete KGS data set.
40 |
41 | ## Supervised Learning - Policy Network
42 | According to [DeepMind](http://www.nature.com/nature/journal/v529/n7587/full/nature16961.html?foxtrotcallback=true), AlphaGo can achieve 55.4% test accuracy after 20 epochs training. Test set is the first 1 million steps. i.e. KGS2004. The speed of each prediction is 4.8ms (on Kepler K40 GPU).
43 | [This program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%282%29%20Policy%20Network.ipynb) achieves 52.83% by 11 epochs so far. Test set is the latest 1M steps. i.e.KGS201705-KGS201709. It also achieved speed of around 4.5ms for each single prediction (on Maxwell GTX980m GPU). Therefore each epochs takes ~40 GPU hours. Running on GPU mode is around 100x faster than CPU mode.
44 |
45 | | epochs | LR | loss | train/test accu | epochs | LR | loss | train/test accu |
46 | |--------|--------|--------|-----------------|--------|--------|--------|-----------------|
47 | | 1 | 0.003 | 1.895 | 0.4800 / 0.4724 | 11 | 0.0002 | 1.5680 | 0.5416 / 0.5283 |
48 | | 2 | 0.003 | 1.7782 | 0.5024 / 0.4912 | 12 | 0.0001 | 1.5639 | 0.5424 / 0.5291 |
49 | | 3 | 0.002 | 1.7110 | 0.5157 / 0.5029 | 13 | | | / |
50 | | 4 | 0.002 | 1.6803 | 0.5217 / 0.5079 | 14 | | | / |
51 | | 5 | 0.002 | 1.6567 | - / 0.5119 | 15 | | | / |
52 | | 6 | 0.002 | 1.6376 | 0.5302 / 0.5146 | 16 | | | / |
53 | | 7 | 0.001 | 1.6022 | 0.5377 / 0.5202 | 17 | | | / |
54 | | 8 | 0.0005 | 1.5782 | - / 0.5273 | 18 | | | / |
55 | | 9 | 0.0005 | 1.6039 | 0.5450 / 0.5261 | 19 | | | / |
56 | | 10 | 0.0002 | 1.5697 | 0.5447 / 0.5281 | 20 | | | 0.569/0.554(alphago)|
57 |
58 | > The training accuracy record of epoch 5/8 were lost.
59 | > Intel Broadwell CPU can provide around 30 GFlops compute power per core. Nvidia Kepler K40 and Maxwell GTX980m GPU can provide around 3 TFlops compute power.
60 |
61 | ## Reinforced Learning - Policy Network
62 | [The RL program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%283B%29%20Policy%20Network%20-%20Reinforced%20Learning%20in%20mass%20production.ipynb) is runnable now but still under evaluation. It also relies on RocAlphaGo project for Go rules by now. A new program is under construction to implement first 12 features in GPU mode to replace RocAlphaGo. It is believed to be at least 10x faster than RocAlphaGo(python implementation).
63 |
64 | ## Supervised Learning - Value Network
65 | tbd. Depends on Reinforced Learning to generate 30 millions games. And pick 1 state of each game.
66 |
67 | ## Supervised Learning - Fast Rollout
68 | tbd. AlphaGo achieved 24.2% of accuracy and 2us of speed.
69 |
70 | ## MTCS
71 | tbd. Depends on Fast Rollout.
72 |
--------------------------------------------------------------------------------
/RocAlphaGo/go.pxd:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | from go_data cimport *
4 |
5 |
6 | cdef class GameState:
7 |
8 | ############################################################################
9 | # variables declarations #
10 | # #
11 | ############################################################################
12 |
13 | # amount of locations on one side
14 | cdef char size
15 | # amount of locations on board, size * size
16 | cdef short board_size
17 |
18 | # possible ko location
19 | cdef short ko
20 |
21 | # list with all groups
22 | cdef Groups_List *groups_list
23 | # pointer to empty group
24 | cdef Group *group_empty
25 |
26 | # list representing board locations as groups
27 | # a Group contains all group stone locations and group liberty locations
28 | cdef Group **board_groups
29 |
30 | cdef char player_current
31 | cdef char player_opponent
32 |
33 | # amount of black stones captured
34 | cdef short capture_black
35 | # amount of white stones captured
36 | cdef short capture_white
37 |
38 | # amount of passes by black
39 | cdef short passes_black
40 | # amount of passes by white
41 | cdef short passes_white
42 |
43 | # list with move history
44 | cdef Locations_List *moves_history
45 |
46 | # list with legal moves
47 | cdef Locations_List *moves_legal
48 |
49 | # arrays, neighbor arrays pointers
50 | cdef short *neighbor
51 | cdef short *neighbor3x3
52 | cdef short *neighbor12d
53 |
54 | # zobrist
55 | cdef unsigned long long zobrist_current
56 | cdef unsigned long long *zobrist_lookup
57 |
58 | cdef bint enforce_superko
59 | cdef set previous_hashes
60 |
61 | ############################################################################
62 | # init functions #
63 | # #
64 | ############################################################################
65 |
66 | cdef void initialize_new(self, char size)
67 | """
68 | initialize this state as empty state
69 | """
70 |
71 | cdef void initialize_duplicate(self, GameState copyState)
72 | """
73 | Initialize all variables as a copy of copy_state
74 | """
75 |
76 |
77 | ############################################################################
78 | # private cdef functions used for game-play #
79 | # #
80 | ############################################################################
81 |
82 | cdef void update_hash(self, short location, char colour)
83 | """
84 | xor current hash with location + colour action value
85 | """
86 |
87 | cdef bint is_positional_superko(self, short location, Group **board)
88 | """
89 | Find all actions that the current_player has done in the past, taking into
90 | account the fact that history starts with BLACK when there are no
91 | handicaps or with WHITE when there are.
92 | """
93 |
94 | cdef bint is_legal_move(self, short location, Group **board, short ko)
95 | """
96 | check if playing at location is a legal move to make
97 | """
98 |
99 | cdef bint is_legal_move_superko(self, short location, Group **board, short ko)
100 | """
101 | check if playing at location is a legal move to make
102 | """
103 |
104 | cdef bint has_liberty_after(self, short location, Group **board)
105 | """
106 | check if a play at location results in an alive group
107 | - has liberty
108 | - conects to group with >= 2 liberty
109 | - captures enemy group
110 | """
111 |
112 | cdef short calculate_board_location(self, char x, char y)
113 | """
114 | return location on board
115 | no checks on outside board
116 | x = columns
117 | y = rows
118 | """
119 |
120 | cdef tuple calculate_tuple_location(self, short location)
121 | """
122 | return location on board as a tupple
123 | no checks on outside board
124 | """
125 |
126 | cdef void set_moves_legal_list(self, Locations_List *moves_legal)
127 | """
128 | generate moves_legal list
129 | """
130 |
131 | cdef void combine_groups(self, Group* group_keep, Group* group_remove, Group **board)
132 | """
133 | combine group_keep and group_remove and replace group_remove on the board
134 | """
135 |
136 | cdef void remove_group(self, Group* group_remove, Group **board, short* ko)
137 | """
138 | remove group from board -> set all locations to group_empty
139 | """
140 |
141 | cdef void add_to_group(self, short location, Group **board, short* ko, short* count_captures)
142 | """
143 | check if a stone on location is connected to a group, kills a group
144 | or is a new group on the board
145 | """
146 |
147 | ############################################################################
148 | # private cdef functions used for feature generation #
149 | # #
150 | ############################################################################
151 |
152 | cdef long generate_12d_hash(self, short centre)
153 | """
154 | generate 12d hash around centre location
155 | """
156 |
157 | cdef long generate_3x3_hash(self, short centre)
158 | """
159 | generate 3x3 hash around centre location
160 | """
161 |
162 | cdef void get_group_after_pointer(self, short* stones, short* liberty, short* capture, char* locations, char* captures, short location)
163 | cdef void get_group_after(self, char* groups_after, char* locations, char* captures, short location)
164 | """
165 | groups_after is a board_size * 3 array representing STONES, LIBERTY, CAPTURE for every location
166 |
167 | calculate group after a play on location and set
168 | groups_after[ location * 3 + ] to stone count
169 | groups_after[ location * 3 + 1 ] to liberty count
170 | groups_after[ location * 3 + 2 ] to capture count
171 | """
172 |
173 | cdef bint is_true_eye(self, short location, Locations_List* eyes, char owner)
174 | """
175 | check if location is a real eye
176 | """
177 |
178 | ############################################################################
179 | # private cdef Ladder functions #
180 | # #
181 | ############################################################################
182 |
183 | """
184 | Ladder evaluation consumes a lot of time duplicating data, the original
185 | version (still can be found in go_python.py) made a copy of the whole
186 | GameState for every move played.
187 |
188 | This version only duplicates self.board_groups (so the list with pointers to groups)
189 | the add_ladder_move playes a move like the add_to_group function but it
190 | does not change the original groups and creates a list with groups removed
191 |
192 | with this groups removed list undo_ladder_move will return the board state to
193 | be the same as before add_ladder_move was called
194 |
195 | get_removed_groups and unremove_group are being used my add/undo_ladder_move
196 |
197 | nb.
198 | duplicating self.board_groups is not neccisary stricktly speaking but
199 | it is safer to do so in a threaded environment. as soon as mcts is
200 | implemented this duplication could be removed if the mcts ensures a
201 | GameState is not accesed while preforming a ladder evaluation
202 |
203 | TODO validate no changes are being made!
204 |
205 | TODO self.player colour is used, should become a pointer
206 | """
207 |
208 | cdef Groups_List* add_ladder_move(self, short location, Group **board, short* ko)
209 | """
210 | create a new group for location move and add all connected groups to it
211 |
212 | similar to add_to_group except no groups are changed or killed and a list
213 | with groups removed is returned so the board can be restored to original
214 | position
215 | """
216 |
217 | cdef void remove_ladder_group(self, Group* group_remove, Group **board, short* ko)
218 | """
219 | remove group from board -> set all locations to group_empty
220 | does not update zobrist hash
221 | """
222 |
223 | cdef void undo_ladder_move(self, short location, Groups_List* removed_groups, short ko, Group **board, short* ko)
224 | """
225 | Use removed_groups list to return board state to be the same as before
226 | add_ladder_move was used
227 | """
228 |
229 | cdef void unremove_group(self, Group* group_remove, Group **board)
230 | """
231 | unremove group from board
232 | loop over all stones in this group and set board to group_unremove
233 | remove liberty from neigbor locations
234 | """
235 |
236 | cdef dict get_capture_moves(self, Group* group, char color, Group **board)
237 | """
238 | create a dict with al moves that capture a group surrounding group
239 | """
240 |
241 | cdef void get_removed_groups(self, short location, Groups_List* removed_groups, Group **board, short* ko)
242 | """
243 | create a new group for location move and add all connected groups to it
244 |
245 | similar to add_to_group except no groups are changed or killed
246 | all changes to the board are stored in removed_groups
247 | """
248 |
249 | cdef bint is_ladder_escape_move(self, Group **board, short* ko, Locations_List *list_ko, short location_group, dict capture, short location, int maxDepth, char colour_group, char colour_chase)
250 | """
251 | play a ladder move on location, check if group has escaped,
252 | if the group has 2 liberty it is undetermined ->
253 | try to capture it by playing at both liberty
254 | """
255 |
256 | cdef bint is_ladder_capture_move(self, Group **board, short* ko, Locations_List *list_ko, short location_group, dict capture, short location, int maxDepth, char colour_group, char colour_chase)
257 | """
258 | play a ladder move on location, try capture and escape moves
259 | and see if the group is able to escape ladder
260 | """
261 |
262 | ############################################################################
263 | # public cdef functions used by preprocessing #
264 | # #
265 | ############################################################################
266 |
267 | cdef char* get_groups_after(self)
268 | """
269 | return a short array of size board_size * 3 representing
270 | STONES, LIBERTY, CAPTURE for every board location
271 |
272 | max count values are 100
273 |
274 | loop over all legal moves and determine stone count, liberty count and
275 | capture count of a play on that location
276 | """
277 |
278 | cdef long get_hash_12d(self, short centre)
279 | """
280 | return hash for 12d star pattern around location
281 | """
282 |
283 | cdef long get_hash_3x3(self, short location)
284 | """
285 | return 3x3 pattern hash + current player
286 | """
287 |
288 | cdef char* get_ladder_escapes(self, int maxDepth)
289 | """
290 | return char array with size board_size
291 | every location represents a location on the board where:
292 | _FREE = no ladder escape
293 | _STONE = ladder escape
294 | """
295 |
296 | cdef char* get_ladder_captures(self, int maxDepth)
297 | """
298 | return char array with size board_size
299 | every location represents a location on the board where:
300 | _FREE = no ladder capture
301 | _STONE = ladder capture
302 | """
303 |
304 | ############################################################################
305 | # public cdef functions used for game play #
306 | # #
307 | ############################################################################
308 |
309 | cdef void add_move(self, short location)
310 | """
311 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
312 | Move should be legal!
313 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
314 |
315 | play move on location, move should be legal!
316 |
317 | update player_current, history and moves_legal
318 | """
319 |
320 | cdef GameState new_state_add_move(self, short location)
321 | """
322 | copy this gamestate and play move at location
323 | """
324 |
325 | cdef float get_score(self, float komi)
326 | """
327 | Calculate score of board state. Uses 'Area scoring'.
328 |
329 | http://senseis.xmp.net/?Passing#1
330 |
331 | negative value indicates black win
332 | positive value indicates white win
333 | """
334 |
335 | cdef char get_winner_colour(self, float komi)
336 | """
337 | Calculate score of board state and return player ID (1, -1, or 0 for tie)
338 | corresponding to winner. Uses 'Area scoring'.
339 |
340 | http://senseis.xmp.net/?Passing#1
341 | """
342 |
343 | ############################################################################
344 | # public def functions used for game play (Python) #
345 | # #
346 | ############################################################################
347 |
348 | cdef Locations_List* get_sensible_moves(self)
349 | """
350 | only used for def get_legal_moves
351 | """
352 |
--------------------------------------------------------------------------------
/RocAlphaGo/go_data.pxd:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 |
4 | ############################################################################
5 | # constants #
6 | # #
7 | ############################################################################
8 |
9 | # TODO find out if these are really used as compile time-constants
10 |
11 | # value for PASS move
12 | cdef char _PASS
13 |
14 | # observe: stones > EMPTY
15 | # border < EMPTY
16 | # be aware you should NOT use != EMPTY as this includes border locations
17 | cdef char _BORDER
18 | cdef char _EMPTY
19 | cdef char _WHITE
20 | cdef char _BLACK
21 |
22 | # used for group stone, liberty locations, legal move and eye locations
23 | cdef char _FREE
24 | cdef char _STONE
25 | cdef char _LIBERTY
26 | cdef char _CAPTURE
27 | cdef char _LEGAL
28 | cdef char _EYE
29 |
30 | # value used to generate pattern hashes
31 | cdef char _HASHVALUE
32 |
33 |
34 | ############################################################################
35 | # Structs #
36 | # #
37 | ############################################################################
38 |
39 | """
40 | a struct has the advantage of being completely C, no python wrapper so
41 | no python overhead.
42 |
43 | compared to a cdef class a struct has some advantages:
44 | - C only, no python overhead
45 | - able to get a pointer to it
46 | - smaller in size
47 |
48 | drawbacks
49 | - have to be Malloc created and freed after use -> memory leak
50 | - no convenient functions available
51 | - no boundchecks
52 | """
53 |
54 | """
55 | struct to store group stone and liberty locations
56 |
57 | locations is a char pointer array of size board_size and initialized
58 | to _FREE. after adding a stone/liberty that location is set to
59 | _STONE/_LIBERTY and count_stones/count_liberty is incremented
60 |
61 | note that a stone location can never be a liberty location,
62 | if a stone is placed on a liberty location liberty_count is decremented
63 |
64 | it works as a dictionary so lookup time for a location is O(1)
65 | looping over all stone/liberty location could be optimized by adding
66 | two lists containing stone/liberty locations
67 |
68 | TODO check if this dictionary implementation is faster on average
69 | use as a two list implementation
70 | """
71 | cdef struct Group:
72 | char *locations
73 | short count_stones
74 | short count_liberty
75 | char colour
76 |
77 | """
78 | struct to store a list of Group
79 |
80 | board_groups is a Group pointer array of size #size and containing
81 | #count_groups groups
82 |
83 | TODO convert to c++ list?
84 | """
85 | cdef struct Groups_List:
86 | Group **board_groups
87 | short count_groups
88 | short size
89 |
90 | """
91 | struct to store a list of short (board locations)
92 |
93 | locations is a short pointer array of size #size and containing
94 | #count locations
95 |
96 | TODO convert to c++ list and/or set
97 | """
98 | cdef struct Locations_List:
99 | short *locations
100 | short count
101 | short size
102 |
103 |
104 | ############################################################################
105 | # group functions #
106 | # #
107 | ############################################################################
108 |
109 | cdef Group* group_new(char colour, short size)
110 | """
111 | create new struct Group
112 | with locations #size char long initialized to FREE
113 | """
114 |
115 | cdef Group* group_duplicate(Group* group, short size)
116 | """
117 | create new struct Group initialized as a duplicate of group
118 | """
119 |
120 | cdef void group_destroy(Group* group)
121 | """
122 | free memory location of group and locations
123 | """
124 |
125 | cdef void group_add_stone(Group* group, short location)
126 | """
127 | update location as STONE
128 | update liberty count if it was a liberty location
129 |
130 | n.b. stone count is not incremented if a stone was present already
131 | """
132 |
133 | cdef void group_remove_stone(Group* group, short location)
134 | """
135 | update location as FREE
136 | update stone count if it was a stone location
137 | """
138 |
139 | cdef short group_location_stone(Group* group, short size)
140 | """
141 | return first location where a STONE is located
142 | """
143 |
144 | cdef void group_add_liberty(Group* group, short location)
145 | """
146 | update location as LIBERTY
147 | update liberty count if it was a FREE location
148 |
149 | n.b. liberty count is not incremented if a stone was present already
150 | """
151 |
152 | cdef void group_remove_liberty(Group* group, short location)
153 | """
154 | update location as FREE
155 | update liberty count if it was a LIBERTY location
156 |
157 | n.b. liberty count is not decremented if location is a FREE location
158 | """
159 |
160 | cdef short group_location_liberty(Group* group, short size)
161 | """
162 | return location where a LIBERTY is located
163 | """
164 |
165 | ############################################################################
166 | # Groups_List functions #
167 | # #
168 | ############################################################################
169 |
170 | cdef Groups_List* groups_list_new(short size)
171 | """
172 | create new struct Groups_List
173 | with locations #size Group* long and count_groups set to 0
174 | """
175 |
176 | cdef void groups_list_add(Group* group, Groups_List* groups_list)
177 | """
178 | add group to list and increment groups count
179 | """
180 |
181 | cdef void groups_list_add_unique(Group* group, Groups_List* groups_list)
182 | """
183 | check if a group is already in the list, return if so
184 | add group to list if not
185 | """
186 |
187 | cdef void groups_list_remove(Group* group, Groups_List* groups_list)
188 | """
189 | remove group from list and decrement groups count
190 | """
191 |
192 | ############################################################################
193 | # Locations_List functions #
194 | # #
195 | ############################################################################
196 |
197 | cdef Locations_List* locations_list_new(short size)
198 | """
199 | create new struct Locations_List
200 | with locations #size short long and count set to 0
201 | """
202 |
203 | cdef void locations_list_destroy(Locations_List* locations_list)
204 | """
205 | free memory location of locations_list and locations
206 | """
207 |
208 | cdef void locations_list_remove_location(Locations_List* locations_list, short location)
209 | """
210 | remove location from list
211 | """
212 |
213 | cdef void locations_list_add_location(Locations_List* locations_list, short location)
214 | """
215 | add location to list and increment count
216 | """
217 |
218 | cdef void locations_list_add_location_increment(Locations_List* locations_list, short location)
219 | """
220 | check if list can hold one more location, resize list if not
221 | add location to list and increment count
222 | """
223 |
224 | cdef void locations_list_add_location_unique(Locations_List* locations_list, short location)
225 | """
226 | check if location is present in list, return if so
227 | add location to list if not
228 | """
229 |
230 | ############################################################################
231 | # neighbor creation functions #
232 | # #
233 | ############################################################################
234 |
235 | cdef short calculate_board_location(char x, char y, char size)
236 | """
237 | return location on board
238 | no checks on outside board
239 | x = columns
240 | y = rows
241 | """
242 |
243 | cdef short calculate_board_location_or_border(char x, char y, char size)
244 | """
245 | return location on board or borderlocation
246 | board locations = [ 0, size * size)
247 | border location = size * size
248 | x = columns
249 | y = rows
250 | """
251 |
252 | cdef short* get_neighbors(char size)
253 | """
254 | create array for every board location with all 4 direct neighbour locations
255 | neighbor order: left - right - above - below
256 |
257 | -1 x
258 | x x
259 | +1 x
260 |
261 | order:
262 | -1 2
263 | 0 1
264 | +1 3
265 |
266 | TODO neighbors is obsolete as neighbor3x3 contains the same values
267 | """
268 |
269 | cdef short* get_3x3_neighbors(char size)
270 | """
271 | create for every board location array with all 8 surrounding neighbour locations
272 | neighbor order: above middle - middle left - middle right - below middle
273 | above left - above right - below left - below right
274 | this order is more useful as it separates neighbors and then diagonals
275 | -1 xxx
276 | x x
277 | +1 xxx
278 |
279 | order:
280 | -1 405
281 | 1 2
282 | +1 637
283 |
284 | 0-3 contains neighbors
285 | 4-7 contains diagonals
286 | """
287 |
288 | cdef short* get_12d_neighbors(char size)
289 | """
290 | create array for every board location with 12d star neighbour locations
291 | neighbor order: top star tip
292 | above left - above middle - above right
293 | left star tip - left - right - right star tip
294 | below left - below middle - below right
295 | below star tip
296 |
297 | -2 x
298 | -1 xxx
299 | xx xx
300 | +1 xxx
301 | +2 x
302 |
303 | order:
304 | -2 0
305 | -1 123
306 | 45 67
307 | +1 89a
308 | +2 b
309 | """
310 |
311 | ############################################################################
312 | # zobrist creation functions #
313 | # #
314 | ############################################################################
315 |
316 |
317 | cdef unsigned long long* get_zobrist_lookup(char size)
318 | """
319 |
320 | """
321 |
--------------------------------------------------------------------------------
/RocAlphaGo/go_data.pyx:
--------------------------------------------------------------------------------
1 | cimport cython
2 | import numpy as np
3 | cimport numpy as np
4 | from libc.stdlib cimport malloc, free, realloc
5 | from libc.string cimport memcpy, memset, memchr
6 |
7 | """
8 | Future speedups, right now the usage of C dicts and List is copied from original
9 | Java implementation. not all usages have been tested for max performance.
10 |
11 | possible speedups could be swapping certain dicts for lists and vice versa.
12 | more testing should be done where this might apply.
13 |
14 | some notes:
15 | - using list for Group stone&liberty locations?
16 | - do we need to consider 25*25 boards?
17 | - dict for moves_legal instead of list?
18 | - create mixed short+char arrays to store location+value in one array?
19 | - implement dict+list struct to get fast lookup and fast looping over all elements
20 | - store one liberty&stone location in group for fast lookup of group location/liberty
21 | - implement faster loop over all elements for dict using memchr and offset pointer
22 | """
23 |
24 | ############################################################################
25 | # constants #
26 | # #
27 | ############################################################################
28 |
29 |
30 | # value for PASS move
31 | _PASS = -1
32 |
33 | # observe: stones > EMPTY
34 | # border < EMPTY
35 | # be aware you should NOT use != EMPTY as this includes border locations
36 | _BORDER = 1
37 | _EMPTY = 2
38 | _WHITE = 3
39 | _BLACK = 4
40 |
41 | # used for group stone, liberty locations, legal move and sensible move
42 | _FREE = 3
43 | _STONE = 0
44 | _LIBERTY = 1
45 | _CAPTURE = 2
46 | _LEGAL = 4
47 | _EYE = 5
48 |
49 | # value used to generate pattern hashes
50 | _HASHVALUE = 33
51 |
52 |
53 | ############################################################################
54 | # Structs #
55 | # #
56 | ############################################################################
57 |
58 | """ -> structs, declared in go_data.pxd
59 |
60 | # a struct has the advantage of being completely C, no python wrapper so
61 | # no python overhead.
62 | #
63 | # compared to a cdef class a struct has some advantages:
64 | # - C only, no python overhead
65 | # - able to get a pointer to it
66 | # - smaller in size
67 | #
68 | # drawbacks
69 | # - have to be Malloc created and freed after use -> memory leak
70 | # - no convenient functions available
71 | # - no boundchecks
72 |
73 |
74 | # struct to store group stone and liberty locations
75 | #
76 | # locations is a char pointer array of size board_size and initialized
77 | # to _FREE. after adding a stone/liberty that location is set to
78 | # _STONE/_LIBERTY and count_stones/count_liberty is incremented
79 | #
80 | # note that a stone location can never be a liberty location,
81 | # if a stone is placed on a liberty location liberty_count is decremented
82 | #
83 | # it works as a dictionary so lookup time for a location is O(1)
84 | # looping over all stone/liberty location could be optimized by adding
85 | # two lists containing stone/liberty locations
86 | #
87 | # TODO check if this dictionary implementation is faster on average
88 | # use as a two list implementation
89 |
90 | cdef struct Group:
91 | char *locations
92 | short count_stones
93 | short count_liberty
94 | char colour
95 |
96 |
97 | # struct to store a list of Group
98 | #
99 | # board_groups is a Group pointer array of size #size and containing
100 | # #count_groups groups
101 | #
102 | # TODO convert to c++ list?
103 |
104 | cdef struct Groups_List:
105 | Group **board_groups
106 | short count_groups
107 | short size
108 |
109 |
110 | # struct to store a list of short (board locations)
111 | #
112 | # locations is a short pointer array of size #size and containing
113 | # #count locations
114 |
115 | TODO convert to c++ list and/or set
116 |
117 | cdef struct Locations_List:
118 | short *locations
119 | short count
120 | short size
121 | """
122 |
123 | ############################################################################
124 | # group functions #
125 | # #
126 | ############################################################################
127 |
128 |
129 | @cython.boundscheck(False)
130 | @cython.wraparound(False)
131 | cdef Group* group_new(char colour, short size):
132 | """
133 | create new struct Group
134 | with locations #size char long initialized to FREE
135 | """
136 |
137 | cdef int i
138 |
139 | # allocate memory for Group
140 | cdef Group *group = malloc(sizeof(Group))
141 | if not group:
142 | raise MemoryError()
143 |
144 | # allocate memory for array locations
145 | group.locations = malloc(size)
146 | if not group.locations:
147 | raise MemoryError()
148 |
149 | # set counts to 0 and colour to colour
150 | group.count_stones = 0
151 | group.count_liberty = 0
152 | group.colour = colour
153 |
154 | # initialize locations with FREE
155 | memset(group.locations, _FREE, size)
156 |
157 | return group
158 |
159 |
160 | @cython.boundscheck(False)
161 | @cython.wraparound(False)
162 | cdef Group* group_duplicate(Group* group, short size):
163 | """
164 | create new struct Group initialized as a duplicate of group
165 | """
166 |
167 | cdef int i
168 |
169 | # allocate memory for Group
170 | cdef Group *duplicate = malloc(sizeof(Group))
171 | if not duplicate:
172 | raise MemoryError()
173 |
174 | # allocate memory for array locations
175 | duplicate.locations = malloc(size)
176 | if not duplicate.locations:
177 | raise MemoryError()
178 |
179 | # set counts and colour values
180 | duplicate.count_stones = group.count_stones
181 | duplicate.count_liberty = group.count_liberty
182 | duplicate.colour = group.colour
183 |
184 | # duplicate locations array in memory
185 | # memcpy is optimized to do this quickly
186 | memcpy(duplicate.locations, group.locations, size)
187 |
188 | return duplicate
189 |
190 |
191 | @cython.boundscheck(False)
192 | @cython.wraparound(False)
193 | cdef void group_destroy(Group* group):
194 | """
195 | free memory location of group and locations
196 | """
197 |
198 | # check if group exists
199 | if group is not NULL:
200 |
201 | # check if locations exists
202 | if group.locations is not NULL:
203 |
204 | # free locations
205 | free(group.locations)
206 |
207 | # free group
208 | free(group)
209 |
210 |
211 | @cython.boundscheck(False)
212 | @cython.wraparound(False)
213 | cdef void group_add_stone(Group* group, short location):
214 | """
215 | update location as STONE
216 | update liberty count if it was a liberty location
217 |
218 | n.b. stone count is not incremented if a stone was present already
219 | """
220 |
221 | # check if locations is a liberty
222 | if group.locations[ location ] == _FREE:
223 |
224 | # locations is FREE, increment stone count
225 | group.count_stones += 1
226 | elif group.locations[ location ] == _LIBERTY:
227 |
228 | # locations is LIBERTY, increment stone count and decrement liberty count
229 | group.count_stones += 1
230 | group.count_liberty -= 1
231 |
232 | # set STONE
233 | group.locations[ location ] = _STONE
234 |
235 |
236 | @cython.boundscheck(False)
237 | @cython.wraparound(False)
238 | cdef void group_remove_stone(Group* group, short location):
239 | """
240 | update location as FREE
241 | update stone count if it was a stone location
242 | """
243 |
244 | # check if a stone is present
245 | if group.locations[ location ] == _STONE:
246 |
247 | # stone present, decrement stone count and set location to FREE
248 | group.count_stones -= 1
249 | group.locations[ location ] = _FREE
250 |
251 |
252 | @cython.boundscheck(False)
253 | @cython.wraparound(False)
254 | cdef short group_location_stone(Group* group, short size):
255 | """
256 | return first location where a STONE is located
257 | """
258 |
259 | # memchr is a in memory search function, it starts searching at
260 | # pointer location #group.locations for a max of size continous bytes untill
261 | # a location with value _STONE is found -> returns a pointer to this location
262 | # when this pointer location is substracted with pointer #group.locations
263 | # the location is calculated where a stone is
264 | return (memchr(group.locations, _STONE, size) - group.locations)
265 |
266 |
267 | @cython.boundscheck(False)
268 | @cython.wraparound(False)
269 | cdef void group_add_liberty(Group* group, short location):
270 | """
271 | update location as LIBERTY
272 | update liberty count if it was a FREE location
273 |
274 | n.b. liberty count is not incremented if a stone was present already
275 | """
276 |
277 | # check if location is FREE
278 | if group.locations[ location ] == _FREE:
279 |
280 | # increment liberty count, set location to LIBERTY
281 | group.count_liberty += 1
282 | group.locations[ location ] = _LIBERTY
283 |
284 |
285 | @cython.boundscheck(False)
286 | @cython.wraparound(False)
287 | cdef void group_remove_liberty(Group* group, short location):
288 | """
289 | update location as FREE
290 | update liberty count if it was a LIBERTY location
291 |
292 | n.b. liberty count is not decremented if location is a FREE location
293 | """
294 |
295 | # check if location is LIBERTY
296 | if group.locations[ location ] == _LIBERTY:
297 |
298 | # decrement liberty count, set location to FREE
299 | group.count_liberty -= 1
300 | group.locations[ location ] = _FREE
301 |
302 |
303 | @cython.boundscheck(False)
304 | @cython.wraparound(False)
305 | cdef short group_location_liberty(Group* group, short size):
306 | """
307 | return location where a LIBERTY is located
308 | """
309 |
310 | # memchr is a in memory search function, it starts searching at
311 | # pointer location #group.locations for a max of size continous bytes untill
312 | # a location with value _LIBERTY is found -> returns a pointer to this location
313 | # when this pointer location is substracted with pointer #group.locations
314 | # the location is calculated where a liberty is
315 | return (memchr(group.locations, _LIBERTY, size) - group.locations)
316 |
317 |
318 | ############################################################################
319 | # Groups_List functions #
320 | # #
321 | ############################################################################
322 |
323 |
324 | @cython.boundscheck(False)
325 | @cython.wraparound(False)
326 | cdef Groups_List* groups_list_new(short size):
327 | """
328 | create new struct Groups_List
329 | with locations #size Group* long and count_groups set to 0
330 | """
331 |
332 | cdef Groups_List* list_new
333 |
334 | list_new = malloc(sizeof(Groups_List))
335 | if not list_new:
336 | raise MemoryError()
337 |
338 | list_new.board_groups = malloc(size * sizeof(Group*))
339 | if not list_new.board_groups:
340 | raise MemoryError()
341 |
342 | list_new.count_groups = 0
343 |
344 | return list_new
345 |
346 |
347 | @cython.boundscheck(False)
348 | @cython.wraparound(False)
349 | cdef void groups_list_add(Group* group, Groups_List* groups_list):
350 | """
351 | add group to list and increment groups count
352 | """
353 |
354 | groups_list.board_groups[ groups_list.count_groups ] = group
355 | groups_list.count_groups += 1
356 |
357 |
358 | @cython.boundscheck(False)
359 | @cython.wraparound(False)
360 | cdef void groups_list_add_unique(Group* group, Groups_List* groups_list):
361 | """
362 | check if a group is already in the list, return if so
363 | add group to list if not
364 | """
365 |
366 | cdef int i
367 |
368 | # loop over array
369 | for i in range(groups_list.count_groups):
370 |
371 | # check if group is present
372 | if group == groups_list.board_groups[ i ]:
373 |
374 | # group is present, return
375 | return
376 |
377 | # group is not present, add to group
378 | groups_list.board_groups[ groups_list.count_groups ] = group
379 | groups_list.count_groups += 1
380 |
381 |
382 | @cython.boundscheck(False)
383 | @cython.wraparound(False)
384 | cdef void groups_list_remove(Group* group, Groups_List* groups_list):
385 | """
386 | remove group from list and decrement groups count
387 | """
388 |
389 | cdef int i
390 |
391 | # loop over array
392 | for i in range(groups_list.count_groups):
393 |
394 | # check if group is present
395 | if groups_list.board_groups[ i ] == group:
396 |
397 | # group is present, move last group to this location
398 | # and decrement groups count
399 | groups_list.count_groups -= 1
400 | groups_list.board_groups[ i ] = groups_list.board_groups[ groups_list.count_groups ]
401 | return
402 |
403 | # TODO this should not happen, create error for this??
404 | print("Group not found!!!!!!!!!!!!!!")
405 |
406 |
407 | ############################################################################
408 | # Locations_List functions #
409 | # #
410 | ############################################################################
411 |
412 |
413 | @cython.boundscheck(False)
414 | @cython.wraparound(False)
415 | cdef Locations_List* locations_list_new(short size):
416 | """
417 | create new struct Locations_List
418 | with locations #size short long and count set to 0
419 | """
420 |
421 | cdef Locations_List* list_new
422 |
423 | # allocate memory for Group
424 | list_new = malloc(sizeof(Locations_List))
425 | if not list_new:
426 | raise MemoryError()
427 |
428 | # allocate memory for locations
429 | list_new.locations = malloc(size * sizeof(short))
430 | if not list_new.locations:
431 | raise MemoryError()
432 |
433 | # set count to 0
434 | list_new.count = 0
435 |
436 | # set size
437 | list_new.size = size
438 |
439 | return list_new
440 |
441 | @cython.boundscheck(False)
442 | @cython.wraparound(False)
443 | cdef void locations_list_destroy(Locations_List* locations_list):
444 | """
445 | free memory location of locations_list and locations
446 | """
447 |
448 | # check if locations_list exists
449 | if locations_list is not NULL:
450 |
451 | # check if locations exists
452 | if locations_list.locations is not NULL:
453 |
454 | # free locations
455 | free(locations_list.locations)
456 |
457 | # free locations_list
458 | free(locations_list)
459 |
460 | @cython.boundscheck(False)
461 | @cython.wraparound(False)
462 | cdef void locations_list_remove_location(Locations_List* locations_list, short location):
463 | """
464 | remove location from list
465 | """
466 |
467 | cdef int i
468 |
469 | # loop over array
470 | for i in range(locations_list.count):
471 |
472 | # check if [ i ] == location
473 | if locations_list.locations[ i ] == location:
474 |
475 | # location found, move last value to this location
476 | # and decrement count
477 | locations_list.count -= 1
478 | locations_list.locations[ i ] = locations_list.locations[ locations_list.count ]
479 | return
480 |
481 | # TODO this should not happen, create error for this??
482 | print("location not found!!!!!!!!!!!!!!")
483 |
484 |
485 | @cython.boundscheck(False)
486 | @cython.wraparound(False)
487 | cdef void locations_list_add_location(Locations_List* locations_list, short location):
488 | """
489 | add location to list and increment count
490 | """
491 |
492 | locations_list.locations[ locations_list.count ] = location
493 | locations_list.count += 1
494 |
495 |
496 | @cython.boundscheck(False)
497 | @cython.wraparound(False)
498 | cdef void locations_list_add_location_increment(Locations_List* locations_list, short location):
499 | """
500 | check if list can hold one more location, resize list if not
501 | add location to list and increment count
502 | """
503 |
504 | if locations_list.count == locations_list.size:
505 |
506 | locations_list.size += 10
507 | locations_list.locations = realloc(locations_list.locations, locations_list.size * sizeof(short))
508 | if not locations_list.locations:
509 | print("MEM ERROR")
510 | raise MemoryError()
511 |
512 |
513 | locations_list.locations[ locations_list.count ] = location
514 | locations_list.count += 1
515 |
516 |
517 | @cython.boundscheck(False)
518 | @cython.wraparound(False)
519 | @cython.nonecheck(False)
520 | cdef void locations_list_add_location_unique(Locations_List* locations_list, short location):
521 | """
522 | check if location is present in list, return if so
523 | add location to list if not
524 | """
525 |
526 | cdef int i
527 |
528 | # loop over array
529 | for i in range(locations_list.count):
530 |
531 | # check if location is present
532 | if location == locations_list.locations[ i ]:
533 |
534 | # location found, do nothing -> return
535 | return
536 |
537 | # add location to list and increment count
538 | locations_list.locations[ locations_list.count ] = location
539 | locations_list.count += 1
540 |
541 |
542 | ############################################################################
543 | # neighbor creation functions #
544 | # #
545 | ############################################################################
546 |
547 |
548 | @cython.boundscheck(False)
549 | @cython.wraparound(False)
550 | cdef short calculate_board_location(char x, char y, char size):
551 | """
552 | return location on board
553 | no checks on outside board
554 | x = columns
555 | y = rows
556 | """
557 |
558 | # return board location
559 | return x + (y * size)
560 |
561 |
562 | @cython.boundscheck(False)
563 | @cython.wraparound(False)
564 | cdef short calculate_board_location_or_border(char x, char y, char size):
565 | """
566 | return location on board or borderlocation
567 | board locations = [ 0, size * size)
568 | border location = size * size
569 | x = columns
570 | y = rows
571 | """
572 |
573 | # check if x or y are outside board
574 | if x < 0 or y < 0 or x >= size or y >= size:
575 |
576 | # return border location
577 | return size * size
578 |
579 | # return board location
580 | return calculate_board_location(x, y, size)
581 |
582 |
583 | @cython.boundscheck(False)
584 | @cython.wraparound(False)
585 | cdef short* get_neighbors(char size):
586 | """
587 | create array for every board location with all 4 direct neighbor locations
588 | neighbor order: left - right - above - below
589 |
590 | -1 x
591 | x x
592 | +1 x
593 |
594 | order:
595 | -1 2
596 | 0 1
597 | +1 3
598 |
599 | TODO neighbors is obsolete as neighbor3x3 contains the same values
600 | """
601 |
602 | # create array
603 | cdef short* neighbor = malloc(size * size * 4 * sizeof(short))
604 | if not neighbor:
605 | raise MemoryError()
606 |
607 | cdef short location
608 | cdef char x, y
609 |
610 | # add all direct neighbors to every board location
611 | for y in range(size):
612 |
613 | for x in range(size):
614 |
615 | location = (x + (y * size)) * 4
616 | neighbor[ location + 0 ] = calculate_board_location_or_border(x - 1, y , size)
617 | neighbor[ location + 1 ] = calculate_board_location_or_border(x + 1, y , size)
618 | neighbor[ location + 2 ] = calculate_board_location_or_border(x , y - 1, size)
619 | neighbor[ location + 3 ] = calculate_board_location_or_border(x , y + 1, size)
620 |
621 | return neighbor
622 |
623 | @cython.boundscheck(False)
624 | @cython.wraparound(False)
625 | cdef short* get_3x3_neighbors(char size):
626 | """
627 | create for every board location array with all 8 surrounding neighbor locations
628 | neighbor order: above middle - middle left - middle right - below middle
629 | above left - above right - below left - below right
630 | this order is more useful as it separates neighbors and then diagonals
631 | -1 xxx
632 | x x
633 | +1 xxx
634 |
635 | order:
636 | -1 405
637 | 1 2
638 | +1 637
639 |
640 | 0-3 contains neighbors
641 | 4-7 contains diagonals
642 | """
643 |
644 | # create array
645 | cdef short* neighbor3x3 = malloc(size * size * 8 * sizeof(short))
646 | if not neighbor3x3:
647 | raise MemoryError()
648 |
649 | cdef short location
650 | cdef char x, y
651 |
652 | # add all surrounding neighbors to every board location
653 | for x in range(size):
654 |
655 | for y in range(size):
656 |
657 | location = (x + (y * size)) * 8
658 | neighbor3x3[ location + 0 ] = calculate_board_location_or_border(x , y - 1, size)
659 | neighbor3x3[ location + 1 ] = calculate_board_location_or_border(x - 1, y , size)
660 | neighbor3x3[ location + 2 ] = calculate_board_location_or_border(x + 1, y , size)
661 | neighbor3x3[ location + 3 ] = calculate_board_location_or_border(x , y + 1, size)
662 |
663 | neighbor3x3[ location + 4 ] = calculate_board_location_or_border(x - 1, y - 1, size)
664 | neighbor3x3[ location + 5 ] = calculate_board_location_or_border(x + 1, y - 1, size)
665 | neighbor3x3[ location + 6 ] = calculate_board_location_or_border(x - 1, y + 1, size)
666 | neighbor3x3[ location + 7 ] = calculate_board_location_or_border(x + 1, y + 1, size)
667 |
668 | return neighbor3x3
669 |
670 | @cython.boundscheck(False)
671 | @cython.wraparound(False)
672 | cdef short* get_12d_neighbors(char size):
673 | """
674 | create array for every board location with 12d star neighbor locations
675 | neighbor order: top star tip
676 | above left - above middle - above right
677 | left star tip - left - right - right star tip
678 | below left - below middle - below right
679 | below star tip
680 |
681 | -2 x
682 | -1 xxx
683 | xx xx
684 | +1 xxx
685 | +2 x
686 |
687 | order:
688 | -2 0
689 | -1 123
690 | 45 67
691 | +1 89a
692 | +2 b
693 | """
694 |
695 | # create array
696 | cdef short* neighbor12d = malloc(size * size * 12 * sizeof(short))
697 | if not neighbor12d:
698 | raise MemoryError()
699 |
700 | cdef short location
701 | cdef char x, y
702 |
703 | # add all 12d neighbors to every board location
704 | for x in range(size):
705 |
706 | for y in range(size):
707 |
708 | location = (x + (y * size)) * 12
709 | neighbor12d[ location + 4 ] = calculate_board_location_or_border(x , y - 2, size)
710 |
711 | neighbor12d[ location + 1 ] = calculate_board_location_or_border(x - 1, y - 1, size)
712 | neighbor12d[ location + 5 ] = calculate_board_location_or_border(x , y - 1, size)
713 | neighbor12d[ location + 8 ] = calculate_board_location_or_border(x + 1, y - 1, size)
714 |
715 | neighbor12d[ location + 0 ] = calculate_board_location_or_border(x - 2, y , size)
716 | neighbor12d[ location + 2 ] = calculate_board_location_or_border(x - 1, y , size)
717 | neighbor12d[ location + 9 ] = calculate_board_location_or_border(x + 1, y , size)
718 | neighbor12d[ location + 11 ] = calculate_board_location_or_border(x + 2, y , size)
719 |
720 | neighbor12d[ location + 3 ] = calculate_board_location_or_border(x - 1, y + 1, size)
721 | neighbor12d[ location + 6 ] = calculate_board_location_or_border(x , y + 1, size)
722 | neighbor12d[ location + 10 ] = calculate_board_location_or_border(x + 1, y + 1, size)
723 |
724 | neighbor12d[ location + 7 ] = calculate_board_location_or_border(x , y + 2, size)
725 |
726 | return neighbor12d
727 |
728 |
729 | ############################################################################
730 | # zobrist creation functions #
731 | # #
732 | ############################################################################
733 |
734 |
735 | @cython.boundscheck(False)
736 | @cython.wraparound(False)
737 | cdef unsigned long long* get_zobrist_lookup(char size):
738 | """
739 | generate zobrist lookup array for boardsize size
740 | """
741 |
742 | cdef unsigned long long* zobrist_lookup
743 |
744 | zobrist_lookup = malloc((size * size * 2) * sizeof(unsigned long long))
745 | if not zobrist_lookup:
746 | raise MemoryError()
747 |
748 | # initialize all zobrist hash lookup values
749 | for i in range(size * size * 2):
750 | zobrist_lookup[i] = np.random.randint(np.iinfo(np.uint64).max, dtype='uint64')
751 |
752 | return zobrist_lookup
--------------------------------------------------------------------------------
/RocAlphaGo/preprocessing.pxd:
--------------------------------------------------------------------------------
1 | import ast
2 | import time
3 | import numpy as np
4 | cimport numpy as np
5 | from numpy cimport ndarray
6 | from libc.stdlib cimport malloc, free
7 | from go cimport GameState
8 | from go_data cimport _BLACK, _EMPTY, _STONE, _LIBERTY, _CAPTURE, _FREE, _PASS, Group, Locations_List, locations_list_destroy, locations_list_new
9 |
10 | # type of tensor created
11 | # char works but float might be needed later
12 | ctypedef char tensor_type
13 |
14 | # type defining cdef function
15 | ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], char*, int)
16 |
17 |
18 | cdef class Preprocess:
19 |
20 | ############################################################################
21 | # variables declarations #
22 | # #
23 | ############################################################################
24 |
25 | # all feature processors
26 | # TODO find correct type so an array can be used
27 | cdef preprocess_method *processors
28 |
29 | # list with all features used currently
30 | # TODO find correct type so an array can be used
31 | cdef list feature_list
32 |
33 | # output tensor size
34 | cdef int output_dim
35 |
36 | # board size
37 | cdef char size
38 | cdef short board_size
39 |
40 | # pattern dictionaries
41 | cdef dict pattern_nakade
42 | cdef dict pattern_response_12d
43 | cdef dict pattern_non_response_3x3
44 |
45 | # pattern dictionary sizes
46 | cdef int pattern_nakade_size
47 | cdef int pattern_response_12d_size
48 | cdef int pattern_non_response_3x3_size
49 |
50 | ############################################################################
51 | # Tensor generating functions #
52 | # #
53 | ############################################################################
54 |
55 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
56 | """
57 | A feature encoding WHITE BLACK and EMPTY on separate planes.
58 | plane 0 always refers to the current player stones
59 | plane 1 to the opponent stones
60 | plane 2 to empty locations
61 | """
62 |
63 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
64 | """
65 | A feature encoding the age of the stone at each location up to 'maximum'
66 |
67 | Note:
68 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum
69 | - EMPTY locations are all-zero features
70 | """
71 |
72 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
73 | """
74 | A feature encoding the number of liberties of the group connected to the stone at
75 | each location
76 |
77 | Note:
78 | - there is no zero-liberties plane; the 0th plane indicates groups in atari
79 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
80 | - EMPTY locations are all-zero features
81 | """
82 |
83 | cdef int get_capture_size(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
84 | """
85 | A feature encoding the number of opponent stones that would be captured by
86 | playing at each location, up to 'maximum'
87 |
88 | Note:
89 | - we currently *do* treat the 0th plane as "capturing zero stones"
90 | - the [maximum-1] plane is used for any capturable group of size
91 | greater than or equal to maximum-1
92 | - the 0th plane is used for legal moves that would not result in capture
93 | - illegal move locations are all-zero features
94 | """
95 |
96 | cdef int get_self_atari_size(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
97 | """
98 | A feature encoding the size of the own-stone group that is put into atari by
99 | playing at a location
100 |
101 | """
102 |
103 | cdef int get_liberties_after(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
104 | """
105 | A feature encoding what the number of liberties *would be* of the group connected to
106 | the stone *if* played at a location
107 |
108 | Note:
109 | - there is no zero-liberties plane; the 0th plane indicates groups in atari
110 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
111 | - illegal move locations are all-zero features
112 | """
113 |
114 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
115 | """
116 | A feature wrapping GameState.is_ladder_capture().
117 | check if an opponent group can be captured in a ladder
118 | """
119 |
120 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
121 | """
122 | A feature wrapping GameState.is_ladder_escape().
123 | check if player_current group can escape ladder
124 | """
125 |
126 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
127 | """
128 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye
129 | """
130 |
131 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
132 | """
133 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done
134 | not used??
135 | """
136 |
137 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
138 | """
139 | Plane filled with zeros
140 | """
141 |
142 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
143 | """
144 | Plane filled with ones
145 | """
146 |
147 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
148 | """
149 | Value net feature, plane with ones if active_player is black else zeros
150 | """
151 |
152 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
153 | """
154 | ko
155 | """
156 |
157 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
158 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
159 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
160 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
161 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
162 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
163 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
164 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
165 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet)
166 |
167 | ############################################################################
168 | # public cdef function #
169 | # #
170 | ############################################################################
171 |
172 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state)
173 | """
174 | Convert a GameState to a Theano-compatible tensor
175 | """
176 |
--------------------------------------------------------------------------------
/RocAlphaGo/preprocessing_rollout.pxd:
--------------------------------------------------------------------------------
1 | import ast
2 | import time
3 | import numpy as np
4 | cimport numpy as np
5 | from numpy cimport ndarray
6 | from libc.stdlib cimport malloc, free
7 | from go cimport GameState
8 | from go_data cimport _BLACK, _EMPTY, _STONE, _LIBERTY, _CAPTURE, _FREE, _PASS, _HASHVALUE, Group, Locations_List, locations_list_destroy, locations_list_new
9 |
10 | # type of tensor created
11 | # char works but float might be needed later
12 | ctypedef char tensor_type
13 |
14 | # type defining cdef function
15 | ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], int)
16 |
17 |
18 | cdef class Preprocess:
19 |
20 | ############################################################################
21 | # variables declarations #
22 | # #
23 | ############################################################################
24 |
25 | # all feature processors
26 | # TODO find correct type so an array can be used
27 | cdef preprocess_method *processors
28 |
29 | # list with all features used currently
30 | # TODO find correct type so an array can be used
31 | cdef list feature_list
32 |
33 | # output tensor size
34 | cdef int output_dim
35 |
36 | # board size
37 | cdef char size
38 | cdef short board_size
39 |
40 | # pattern dictionaries
41 | cdef dict pattern_nakade
42 | cdef dict pattern_response_12d
43 | cdef dict pattern_non_response_3x3
44 |
45 | # pattern dictionary sizes
46 | cdef int pattern_nakade_size
47 | cdef int pattern_response_12d_size
48 | cdef int pattern_non_response_3x3_size
49 |
50 | ############################################################################
51 | # Tensor generating functions #
52 | # #
53 | ############################################################################
54 |
55 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
56 | """
57 | A feature encoding WHITE BLACK and EMPTY on separate planes.
58 | plane 0 always refers to the current player stones
59 | plane 1 to the opponent stones
60 | plane 2 to empty locations
61 | """
62 |
63 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
64 | """
65 | A feature encoding the age of the stone at each location up to 'maximum'
66 |
67 | Note:
68 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum
69 | - EMPTY locations are all-zero features
70 | """
71 |
72 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
73 | """
74 | A feature encoding the number of liberties of the group connected to the stone at
75 | each location
76 |
77 | Note:
78 | - there is no zero-liberties plane; the 0th plane indicates groups in atari
79 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
80 | - EMPTY locations are all-zero features
81 | """
82 |
83 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
84 | """
85 | A feature wrapping GameState.is_ladder_capture().
86 | check if an opponent group can be captured in a ladder
87 | """
88 |
89 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
90 | """
91 | A feature wrapping GameState.is_ladder_escape().
92 | check if player_current group can escape ladder
93 | """
94 |
95 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
96 | """
97 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye
98 | """
99 |
100 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
101 | """
102 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done
103 | not used??
104 | """
105 |
106 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
107 | """
108 | Plane filled with zeros
109 | """
110 |
111 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
112 | """
113 | Plane filled with ones
114 | """
115 |
116 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
117 | """
118 | Value net feature, plane with ones if active_player is black else zeros
119 | """
120 |
121 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
122 | """
123 | Single plane encoding ko location
124 | """
125 |
126 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
127 | """
128 | single feature plane encoding whether this location matches any of the response
129 | patterns, for now it only checks the 12d response patterns as we do not use the
130 | 3x3 response patterns.
131 | """
132 |
133 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
134 | """
135 | A feature wrapping GameState.is_ladder_escape().
136 | check if player_current group can escape atari for at least one turn
137 | """
138 |
139 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
140 | """
141 | encode last move neighbor positions in two planes:
142 | - horizontal & vertical / direct neighbor
143 | - diagonal neighbor
144 | """
145 |
146 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
147 | """
148 | A nakade pattern is a 12d pattern on a location a stone was captured before
149 | it is unclear if a max size of the captured group has to be considered and
150 | how recent the capture event should have been
151 |
152 | the 12d pattern can be encoded without stone colour and liberty count
153 | unclear if a border location should be considered a stone or liberty
154 |
155 | pattern lookup value is being set instead of 1
156 | """
157 |
158 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
159 | """
160 | A nakade pattern is a 12d pattern on a location a stone was captured before
161 | it is unclear if a max size of the captured group has to be considered and
162 | how recent the capture event should have been
163 |
164 | the 12d pattern can be encoded without stone colour and liberty count
165 | unclear if a border location should be considered a stone or liberty
166 |
167 | #pattern_id is offset
168 | """
169 |
170 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
171 | """
172 | Set 12d hash pattern for 12d shape around last move
173 | pattern lookup value is being set instead of 1
174 | """
175 |
176 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
177 | """
178 | Set 12d hash pattern for 12d shape around last move where
179 | #pattern_id is offset
180 | """
181 |
182 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
183 | """
184 | Set 3x3 hash pattern for every legal location where
185 | pattern lookup value is being set instead of 1
186 | """
187 |
188 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet)
189 | """
190 | Set 3x3 hash pattern for every legal location where
191 | #pattern_id is offset
192 | """
193 |
194 | ############################################################################
195 | # public cdef function #
196 | # #
197 | ############################################################################
198 |
199 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state)
200 | """
201 | Convert a GameState to a Theano-compatible tensor
202 | """
203 |
--------------------------------------------------------------------------------
/RocAlphaGo/preprocessing_rollout.pyx:
--------------------------------------------------------------------------------
1 | # cython: profile=True
2 | # cython: linetrace=True
3 | # cython: wraparound=False
4 | # cython: boundscheck=False
5 | # cython: initializedcheck=False
6 | cimport cython
7 | import numpy as np
8 | cimport numpy as np
9 |
10 |
11 | cdef class Preprocess:
12 |
13 | ############################################################################
14 | # all variables are declared in the .pxd file #
15 | # #
16 | ############################################################################
17 |
18 |
19 | """ -> variables, declared in preprocessing.pxd
20 |
21 | # all feature processors
22 | # TODO find correct type so an array can be used
23 | cdef list processors
24 |
25 | # list with all features used currently
26 | # TODO find correct type so an array can be used
27 | cdef list feature_list
28 |
29 | # output tensor size
30 | cdef int output_dim
31 |
32 | # board size
33 | cdef char size
34 | cdef short board_size
35 |
36 | # pattern dictionaries
37 | cdef dict pattern_nakade
38 | cdef dict pattern_response_12d
39 | cdef dict pattern_non_response_3x3
40 |
41 | # pattern dictionary sizes
42 | cdef int pattern_nakade_size
43 | cdef int pattern_response_12d_size
44 | cdef int pattern_non_response_3x3_size
45 |
46 | -> variables, declared in preprocessing.pxd
47 | """
48 |
49 |
50 | ############################################################################
51 | # Tensor generating functions #
52 | # #
53 | ############################################################################
54 |
55 |
56 | @cython.nonecheck(False)
57 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
58 | """
59 | A feature encoding WHITE BLACK and EMPTY on separate planes.
60 | plane 0 always refers to the current player stones
61 | plane 1 to the opponent stones
62 | plane 2 to empty locations
63 | """
64 |
65 | cdef short location
66 | cdef Group* group
67 | cdef int plane
68 | cdef char opponent = state.player_opponent
69 |
70 | # loop over all locations on board
71 | for location in range(self.board_size):
72 |
73 | group = state.board_groups[ location ]
74 |
75 | if group.colour == _EMPTY:
76 |
77 | plane = offSet + 2
78 | elif group.colour == opponent:
79 |
80 | plane = offSet + 1
81 | else:
82 |
83 | plane = offSet
84 |
85 | tensor[ plane, location ] = 1
86 |
87 | return offSet + 3
88 |
89 |
90 | @cython.nonecheck(False)
91 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
92 | """
93 | A feature encoding the age of the stone at each location up to 'maximum'
94 |
95 | Note:
96 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum
97 | - EMPTY locations are all-zero features
98 | """
99 |
100 | cdef short location
101 | cdef Locations_List *history = state.moves_history
102 | cdef int age = offSet + 7
103 | cdef dict agesSet = {}
104 | cdef int i
105 |
106 | # set all stones to max age
107 | for i in range(history.count):
108 |
109 | location = history.locations[ i ]
110 |
111 | if location != _PASS and state.board_groups[ location ].colour > _EMPTY:
112 |
113 | tensor[ age, location ] = 1
114 |
115 | # start with newest stone
116 | i = history.count - 1
117 | age = 0
118 |
119 | # loop over history backwards
120 | while age < 7 and i >= 0:
121 |
122 | location = history.locations[ i ]
123 |
124 | # if age has not been set yet
125 | if location != _PASS and not location in agesSet and state.board_groups[ location ].colour > _EMPTY:
126 |
127 | tensor[ offSet + age, location ] = 1
128 | tensor[ offSet + 7, location ] = 0
129 | agesSet[ location ] = location
130 |
131 | i -= 1
132 | age += 1
133 |
134 | return offSet + 8
135 |
136 |
137 | @cython.nonecheck(False)
138 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
139 | """
140 | A feature encoding the number of liberties of the group connected to the stone at
141 | each location
142 |
143 | Note:
144 | - there is no zero-liberties plane; the 0th plane indicates groups in atari
145 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
146 | - EMPTY locations are all-zero features
147 | """
148 |
149 | cdef int i, groupLiberty
150 | cdef Group* group
151 | cdef short location
152 |
153 | for location in range(self.board_size):
154 |
155 | group = state.board_groups[ location ]
156 |
157 | if group.colour > _EMPTY:
158 |
159 | groupLiberty = group.count_liberty - 1
160 |
161 | # check max liberty count
162 | if groupLiberty > 7:
163 |
164 | groupLiberty = 7
165 |
166 | groupLiberty += offSet
167 |
168 | tensor[ groupLiberty, location ] = 1
169 |
170 | return offSet + 8
171 |
172 |
173 | @cython.nonecheck(False)
174 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
175 | """
176 | A feature wrapping GameState.is_ladder_capture().
177 | check if an opponent group can be captured in a ladder
178 | """
179 |
180 | cdef int location
181 | cdef char* captures = state.get_ladder_captures(80)
182 |
183 | # loop over all groups on board
184 | for location in range(state.board_size):
185 |
186 | if captures[ location ] != _FREE:
187 |
188 | tensor[ offSet, location ] = 1
189 |
190 | # free captures
191 | free(captures)
192 |
193 | return offSet + 1
194 |
195 |
196 | @cython.nonecheck(False)
197 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
198 | """
199 | A feature wrapping GameState.is_ladder_escape().
200 | check if player_current group can escape ladder
201 | """
202 |
203 | cdef int location
204 | cdef char* escapes = state.get_ladder_escapes(80)
205 |
206 | # loop over all groups on board
207 | for location in range(state.board_size):
208 |
209 | if escapes[ location ] != _FREE:
210 |
211 | tensor[ offSet, location ] = 1
212 |
213 | # free escapes
214 | free(escapes)
215 |
216 | return offSet + 1
217 |
218 |
219 | @cython.nonecheck(False)
220 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
221 | """
222 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye
223 | """
224 |
225 | cdef int i
226 | cdef short location
227 | cdef Group* group
228 |
229 | # set all legal moves to 1
230 | for i in range(state.moves_legal.count):
231 |
232 | tensor[ offSet, state.moves_legal.locations[ i ] ] = 1
233 |
234 | # list can increment but a big enough starting value is important
235 | cdef Locations_List* eyes = locations_list_new(15)
236 |
237 | # loop over all board groups
238 | for i in range(state.groups_list.count_groups):
239 |
240 | group = state.groups_list.board_groups[ i ]
241 |
242 | # if group is current player
243 | if group.colour == state.player_current:
244 |
245 | # loop over liberties because they are possible eyes
246 | for location in range(self.board_size):
247 |
248 | # check liberty location as possible eye
249 | if group.locations[ location ] == _LIBERTY:
250 |
251 | # check if location is an eye
252 | if state.is_true_eye(location, eyes, state.player_current):
253 |
254 | tensor[ offSet, location ] = 0
255 |
256 | locations_list_destroy(eyes)
257 |
258 | return offSet + 1
259 |
260 |
261 | @cython.nonecheck(False)
262 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
263 | """
264 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done
265 | not used??
266 | """
267 |
268 | cdef short location
269 |
270 | # loop over all legal moves and set to one
271 | for location in range(state.moves_legal.count):
272 |
273 | tensor[ offSet, state.moves_legal.locations[ location ] ] = 1
274 |
275 | return offSet + 1
276 |
277 |
278 | @cython.nonecheck(False)
279 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
280 | """
281 | single feature plane encoding whether this location matches any of the response
282 | patterns, for now it only checks the 12d response patterns as we do not use the
283 | 3x3 response patterns.
284 |
285 | TODO
286 | - decide if we consider nakade patterns response patterns as well
287 | - optimization? 12d response patterns are calculated twice..
288 | """
289 |
290 | cdef short location, location_x, location_y, last_move, last_move_x, last_move_y
291 | cdef int i, plane, id
292 | cdef long hash_base, hash_pattern
293 | cdef short *neighbor12d = state.neighbor12d
294 |
295 | # get last move
296 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ]
297 |
298 | # check if last move is not _PASS
299 | if last_move != _PASS:
300 |
301 | # get 12d pattern hash of last move location and colour
302 | hash_base = state.get_hash_12d(last_move)
303 |
304 | # calculate last_move x and y
305 | last_move_x = last_move / state.size
306 | last_move_y = last_move % state.size
307 |
308 | # last_move location in neighbor12d array
309 | last_move *= 12
310 |
311 | # loop over all locations in 12d shape
312 | for i in range(12):
313 |
314 | # get location
315 | location = neighbor12d[last_move + i]
316 |
317 | # check if location is empty
318 | if state.board_groups[ location ].colour == _EMPTY:
319 |
320 | # calculate location x and y
321 | location_x = (location / state.size) - last_move_x
322 | location_y = (location % state.size) - last_move_y
323 |
324 | # calculate 12d response pattern hash
325 | hash_pattern = hash_base + location_x
326 | hash_pattern *= _HASHVALUE
327 | hash_pattern += location_y
328 |
329 | # dictionary lookup
330 | id = self.pattern_response_12d.get( hash_pattern )
331 |
332 | if id >= 0:
333 |
334 | tensor[ offSet, location ] = 1
335 |
336 | return offSet + 1
337 |
338 |
339 | @cython.nonecheck(False)
340 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
341 | """
342 | A feature wrapping GameState.is_ladder_escape().
343 | check if player_current group can escape atari for at least one turn
344 | """
345 |
346 | cdef int location
347 | cdef char* escapes = state.get_ladder_escapes(1)
348 |
349 | # loop over all groups on board
350 | for location in range(state.board_size):
351 |
352 | if escapes[ location ] != _FREE:
353 |
354 | tensor[ offSet, location ] = 1
355 |
356 | # free escapes
357 | free(escapes)
358 |
359 | return offSet + 1
360 |
361 |
362 | @cython.nonecheck(False)
363 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
364 | """
365 | encode last move neighbor positions in two planes:
366 | - horizontal & vertical / direct neighbor
367 | - diagonal neighbor
368 | """
369 |
370 | cdef short location, last_move
371 | cdef int i, plane
372 | cdef short *neighbor3x3 = state.neighbor3x3
373 |
374 | # get last move
375 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ]
376 |
377 | # check if last move is not _PASS
378 | if last_move != _PASS:
379 |
380 | # last_move location in neighbor3x3 array
381 | last_move *= 8
382 |
383 | # direct neighbor plane is plane offset
384 | plane = offSet
385 |
386 | # loop over direct neighbor
387 | # 0,1,2,3 are direct neighbor locations
388 | for i in range(4):
389 |
390 | # get neighbor location
391 | location = neighbor3x3[ last_move + i ]
392 |
393 | # check if location is empty
394 | if state.board_groups[ location ].colour == _EMPTY:
395 |
396 | tensor[ plane, location ] = 1
397 |
398 | # diagonal neighbor plane is plane offset + 1
399 | plane = offSet + 1
400 |
401 | # loop over diagonal neighbor
402 | # 4,5,6,7 are diagonal neighbor locations
403 | for i in range(4, 8):
404 |
405 | # get neighbor location
406 | location = neighbor3x3[ last_move + i ]
407 |
408 | # check if location is empty
409 | if state.board_groups[ location ].colour == _EMPTY:
410 |
411 | tensor[ plane, location ] = 1
412 |
413 | return offSet + 2
414 |
415 |
416 | @cython.nonecheck(False)
417 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
418 | """
419 | A nakade pattern is a 12d pattern on a location a stone was captured before
420 | it is unclear if a max size of the captured group has to be considered and
421 | how recent the capture event should have been
422 |
423 | the 12d pattern can be encoded without stone colour and liberty count
424 | unclear if a border location should be considered a stone or liberty
425 |
426 | pattern lookup value is being set instead of 1
427 | """
428 |
429 | # TODO tensor type has to be float
430 |
431 | return offSet + 1
432 |
433 |
434 | @cython.nonecheck(False)
435 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
436 | """
437 | A nakade pattern is a 12d pattern on a location a stone was captured before
438 | it is unclear if a max size of the captured group has to be considered and
439 | how recent the capture event should have been
440 |
441 | the 12d pattern can be encoded without stone colour and liberty count
442 | unclear if a border location should be considered a stone or liberty
443 |
444 | #pattern_id is offset
445 | """
446 |
447 | return offSet + self.pattern_nakade_size
448 |
449 |
450 | @cython.nonecheck(False)
451 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
452 | """
453 | Set 12d hash pattern for 12d shape around last move
454 | pattern lookup value is being set instead of 1
455 | """
456 |
457 | # get last move location
458 | # check for pass
459 |
460 | return offSet + 1
461 |
462 |
463 | @cython.nonecheck(False)
464 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
465 | """
466 | Check all empty locations in a 12d shape around the last move for being a 12d response
467 | pattern match
468 | #pattern_id is offset
469 |
470 | base hash is 12d pattern hash of last move location + colour
471 | add relative position of every empty location in a 12d shape to get 12d response pattern hash
472 |
473 | c hash x y
474 | ... location a has: state.get_hash_12d(x), -1, 0
475 | .ax.. location b has: state.get_hash_12d(x), +1, -1
476 | ..b location c has: state.get_hash_12d(x), 0, +2
477 | .
478 |
479 | 12d response pattern hash value is calculated by:
480 | ( ( hash + x ) * _HASHVALUE ) + y
481 | """
482 |
483 | cdef short location, location_x, location_y, last_move, last_move_x, last_move_y
484 | cdef int i, plane, id
485 | cdef long hash_base, hash_pattern
486 | cdef short *neighbor12d = state.neighbor12d
487 |
488 | # get last move
489 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ]
490 |
491 | # check if last move is not _PASS
492 | if last_move != _PASS:
493 |
494 | # get 12d pattern hash of last move location and colour
495 | hash_base = state.get_hash_12d(last_move)
496 |
497 | # calculate last_move x and y
498 | last_move_x = last_move / state.size
499 | last_move_y = last_move % state.size
500 |
501 | # last_move location in neighbor12d array
502 | last_move *= 12
503 |
504 | # loop over all locations in 12d shape
505 | for i in range(12):
506 |
507 | # get location
508 | location = neighbor12d[last_move + i]
509 |
510 | # check if location is empty
511 | if state.board_groups[ location ].colour == _EMPTY:
512 |
513 | # calculate location x and y
514 | location_x = (location / state.size) - last_move_x
515 | location_y = (location % state.size) - last_move_y
516 |
517 | # calculate 12d response pattern hash
518 | hash_pattern = hash_base + location_x
519 | hash_pattern *= _HASHVALUE
520 | hash_pattern += location_y
521 |
522 | # dictionary lookup
523 | id = self.pattern_response_12d.get( hash_pattern )
524 |
525 | if id >= 0:
526 |
527 | tensor[ offSet + id, location ] = 1
528 |
529 | return offSet + self.pattern_response_12d_size
530 |
531 |
532 | @cython.nonecheck(False)
533 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
534 | """
535 | Set 3x3 hash pattern for every legal location where
536 | pattern lookup value is being set instead of 1
537 | """
538 |
539 | # TODO tensor type has to be float
540 |
541 | return offSet + 1
542 |
543 |
544 | @cython.nonecheck(False)
545 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
546 | """
547 | Set 3x3 hash pattern for every legal location where
548 | #pattern_id is offset
549 | """
550 |
551 | cdef short i, location
552 | cdef int id
553 |
554 | # loop over all legal moves and set to one
555 | for i in range(state.moves_legal.count):
556 |
557 | # get location
558 | location = state.moves_legal.locations[ i ]
559 | # get location hash and dict lookup
560 | id = self.pattern_non_response_3x3.get( state.get_3x3_hash( location ) )
561 |
562 | if id >= 0:
563 |
564 | tensor[ offSet + id, location ] = 1
565 |
566 | return offSet + self.pattern_non_response_3x3_size
567 |
568 |
569 | @cython.nonecheck(False)
570 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
571 | """
572 | Plane filled with zeros
573 | """
574 |
575 | ##########################################################
576 | # strange things happen if a function does not do anything
577 | # do not remove next line without extensive testing!!!!!!!
578 | tensor[ offSet, 0 ] = 0
579 |
580 | return offSet + 1
581 |
582 |
583 | @cython.nonecheck(False)
584 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
585 | """
586 | Plane filled with ones
587 | """
588 |
589 | cdef short location
590 |
591 | for location in range(0, self.board_size):
592 |
593 | tensor[ offSet, location ] = 1
594 | return offSet + 1
595 |
596 |
597 | @cython.nonecheck(False)
598 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
599 | """
600 | Value net feature, plane with ones if active_player is black else zeros
601 | """
602 |
603 | cdef short location
604 |
605 | # if player_current is white
606 | if state.player_current == _BLACK:
607 |
608 | for location in range(0, self.board_size):
609 |
610 | tensor[ offSet, location ] = 1
611 |
612 | return offSet + 1
613 |
614 |
615 | @cython.nonecheck(False)
616 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet):
617 | """
618 | Single plane encoding ko location
619 | """
620 |
621 | if state.ko is not _PASS:
622 |
623 | tensor[ offSet, state.ko ] = 1
624 |
625 | return offSet + 1
626 |
627 |
628 | ############################################################################
629 | # init function #
630 | # #
631 | ############################################################################
632 |
633 |
634 | def __init__(self, list feature_list, char size=19, dict_nakade=None, dict_3x3=None, dict_12d=None, verbose=False):
635 | """
636 | """
637 |
638 | self.size = size
639 | self.board_size = size * size
640 |
641 | cdef int i
642 |
643 | # preprocess_method is a function pointer:
644 | # ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], char*, int)
645 | cdef preprocess_method processor
646 |
647 | # create a list with function pointers
648 | self.processors = malloc(len(feature_list) * sizeof(preprocess_method))
649 |
650 | if not self.processors:
651 | raise MemoryError()
652 |
653 | # load nakade patterns
654 | self.pattern_nakade = {}
655 | self.pattern_nakade_size = 0
656 | if dict_nakade is not None:
657 | with open(dict_nakade, 'r') as f:
658 | s = f.read()
659 | self.pattern_nakade = ast.literal_eval(s)
660 | self.pattern_nakade_size = max(self.pattern_nakade.values()) + 1
661 |
662 | # load 12d response patterns
663 | self.pattern_response_12d = {}
664 | self.pattern_response_12d_size = 0
665 | if dict_12d is not None:
666 | with open(dict_12d, 'r') as f:
667 | s = f.read()
668 | self.pattern_response_12d = ast.literal_eval(s)
669 | self.pattern_response_12d_size = max(self.pattern_response_12d.values()) + 1
670 |
671 | # load 3x3 non response patterns
672 | self.pattern_non_response_3x3 = {}
673 | self.pattern_non_response_3x3_size = 0
674 | if dict_3x3 is not None:
675 | with open(dict_3x3, 'r') as f:
676 | s = f.read()
677 | self.pattern_non_response_3x3 = ast.literal_eval(s)
678 | self.pattern_non_response_3x3_size = max(self.pattern_non_response_3x3.values()) + 1
679 |
680 | if verbose:
681 | print("loaded " + str(self.pattern_nakade_size) + " nakade patterns")
682 | print("loaded " + str(self.pattern_response_12d_size) + " 12d patterns")
683 | print("loaded " + str(self.pattern_non_response_3x3_size) + " 3x3 patterns")
684 |
685 | self.feature_list = feature_list
686 | self.output_dim = 0
687 |
688 | # loop over feature_list add the corresponding function
689 | # and increment output_dim accordingly
690 | for i in range(len(feature_list)):
691 | feat = feature_list[ i ].lower()
692 | if feat == "board":
693 | processor = self.get_board
694 | self.output_dim += 3
695 |
696 | elif feat == "ones":
697 | processor = self.ones
698 | self.output_dim += 1
699 |
700 | elif feat == "turns_since":
701 | processor = self.get_turns_since
702 | self.output_dim += 8
703 |
704 | elif feat == "liberties":
705 | processor = self.get_liberties
706 | self.output_dim += 8
707 |
708 | elif feat == "ladder_capture":
709 | processor = self.get_ladder_capture
710 | self.output_dim += 1
711 |
712 | elif feat == "ladder_escape":
713 | processor = self.get_ladder_escape
714 | self.output_dim += 1
715 |
716 | elif feat == "sensibleness":
717 | processor = self.get_sensibleness
718 | self.output_dim += 1
719 |
720 | elif feat == "zeros":
721 | processor = self.zeros
722 | self.output_dim += 1
723 |
724 | elif feat == "legal":
725 | processor = self.get_legal
726 | self.output_dim += 1
727 |
728 | elif feat == "response":
729 | processor = self.get_response
730 | self.output_dim += 1
731 |
732 | elif feat == "save_atari":
733 | processor = self.get_save_atari
734 | self.output_dim += 1
735 |
736 | elif feat == "neighbor":
737 | processor = self.get_neighbor
738 | self.output_dim += 2
739 |
740 | elif feat == "nakade":
741 | processor = self.get_nakade
742 | self.output_dim += self.pattern_nakade_size
743 |
744 | elif feat == "response_12d":
745 | processor = self.get_response_12d
746 | self.output_dim += self.pattern_response_12d_size
747 |
748 | elif feat == "non_response_3x3":
749 | processor = self.get_non_response_3x3
750 | self.output_dim += self.pattern_non_response_3x3_size
751 |
752 | elif feat == "color":
753 | processor = self.colour
754 | self.output_dim += 1
755 |
756 | elif feat == "ko":
757 | processor = self.ko
758 | self.output_dim += 1
759 | else:
760 |
761 | # incorrect feature input
762 | raise ValueError("uknown feature: %s" % feat)
763 |
764 | self.processors[ i ] = processor
765 |
766 |
767 | def __dealloc__(self):
768 | """
769 | Prevent memory leaks by freeing all arrays created with malloc
770 | """
771 |
772 | if self.processors is not NULL:
773 | free(self.processors)
774 |
775 | ############################################################################
776 | # public cdef function #
777 | # #
778 | ############################################################################
779 |
780 |
781 | @cython.nonecheck(False)
782 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state):
783 | """
784 | Convert a GameState to a Theano-compatible tensor
785 | """
786 |
787 | cdef int i
788 | cdef preprocess_method proc
789 |
790 | # create complete array now instead of concatenate later
791 | # TODO check if we can use a Malloc array somehow.. faster!!
792 | cdef np.ndarray[ tensor_type, ndim=2 ] np_tensor = np.zeros((self.output_dim, self.board_size), dtype=np.int8)
793 | cdef tensor_type[ :, ::1 ] tensor = np_tensor
794 |
795 | cdef int offSet = 0
796 |
797 | # loop over all processors and generate tensor
798 | for i in range(len(self.feature_list)):
799 |
800 | proc = self.processors[ i ]
801 | offSet = proc(self, state, tensor, offSet)
802 |
803 | # create a singleton 'batch' dimension
804 | return np_tensor.reshape((1, self.output_dim, self.size, self.size))
805 |
806 |
807 | ############################################################################
808 | # public def function (Python) #
809 | # #
810 | ############################################################################
811 |
812 |
813 | def state_to_tensor(self, GameState state):
814 | """
815 | Convert a GameState to a Theano-compatible tensor
816 | """
817 |
818 | return self.generate_tensor(state)
819 |
820 |
821 | def get_output_dimension(self):
822 | """
823 | return output_dim, the amount of planes an output tensor will have
824 | """
825 |
826 | return self.output_dim
827 |
828 |
829 | def get_feature_list(self):
830 | """
831 | return feature list
832 | """
833 |
834 | return self.feature_list
835 |
--------------------------------------------------------------------------------
/game.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | PASS = -1
3 | EMPTY = 2
4 | WHITE = 3
5 | BLACK = 4
6 |
7 | '''
8 | | Feature | # of planes | Description
9 | |-----------------|-------------|-------------------------
10 | | Stone colour | 3 | Player stone / opponent stone / empty
11 | | Ones | 1 | A constant plane filled with 1
12 | | Turns since | 8 | How many turns since a move was played
13 | | Liberties | 8 | Number of liberties (empty adjacent points)
14 | | Capture size | 8 | How many opponent stones would be captured
15 | | Self-atari size | 8 | How many of own stones would be captured
16 | | Liberties after move | 8 | Number of liberties after this move is played
17 | | Ladder capture | 1 | Whether a move at this point is a successful ladder capture
18 | | Ladder escape | 1 | Whether a move at this point is a successful ladder escape
19 | | Sensibleness | 1 | Whether a move is legal and does not fill its own eyes
20 | | Zeros | 1 | A constant plane filled with 0
21 | | Player color | 1 | Whether current player is black (only for Value Network)
22 | '''
23 | DEFAULT_FEATURES = [
24 | "board", "ones", "turns_since", "liberties", "capture_size",
25 | "self_atari_size", "liberties_after", "ladder_capture", "ladder_escape",
26 | "sensibleness", "zeros"]
27 |
28 | def UpdateGroups(model, stones, liberties, captures, data, label, player):
29 | '''
30 | stones: shape (mini_batch, 361, 361) type=BOOL
31 | | Features | # of planes |
32 | |-----------------|-------------|
33 | | Stones | 1 |
34 | | Liberties | 1 |
35 | | Captures | 1 |
36 |
37 | data: shape (mini_batch,48,19,19)
38 | label: type blob, shape (mini_batch,)
39 | player: BLACK or WHITE
40 | '''
41 | black = model.ConstantFill([], 'BLACK', shape=[1,], value=BLACK)
42 | white = model.ConstantFill([], 'WHITE', shape=[1,], value=WHITE)
43 |
44 | # group of current position = self + intersect(union of neighbor groups, self stones)
45 | g_left = momodel.Slice([stones, [label-1,0], [label-1,-1]], 'g_left') # if label-1<0 should be null
46 | g_right = momodel.Slice([stones, [label-1,0], [label-1,-1]], 'g_right') # if label+1>360 should be null
47 | g_up = model.Slice([stones, [label-19,0], [label-19,-1]], 'g_up') # if label-19<0 should be null
48 | g_down = model.Slice([stones, [label+19,0], [label+19,-1]], 'g_down') # if label+19>360 should be null
49 | # union of neighbor groups
50 | g_self = model.Or([g_left, g_right], 'g_self')
51 | g_self = model.Or([g_self, g_up], 'g_self')
52 | g_self = model.Or([g_self, g_down], 'g_self')
53 | # board0 contains all player stones
54 | board0 = model.Slice([data, [N,0,0,0], [N,0,19,19]], 'board0')
55 | g_self = model.And([g_self, board0], 'g_self')
56 | # onehot to get self
57 | onehot = model.Cast(onehot, 'onehotb', to=BOOL)
58 | # group of current position
59 | g_self = model.Or([g_self, onehotb], 'g_self')
60 |
61 | # liberties of current group = SUM(liberties of neighbor) - 4 + 2 * SUM(liberties of self)
62 | # board2 contains all empty
63 | board2 = model.Slice([data, [N,2,0,0],[N,2,19,19]], 'board2')
64 | # liberties of self can be counted from board2
65 | l_self = model.Add(board2[label-1], board2[label+1], board2[label-19], board2[label+19], 'l_self')
66 | # liberties of neighbor can be counted from liberties
67 | l_neighbor = model.Add(liberties[label-1], liberties[label+1], liberties[label-19], liberties[label+19], 'l_neighbor')
68 | l_self = 2 * l_self + l_neighbor - 4
69 |
70 | # liberties of neighbor opponent =
71 | model.Substract(neighbor,1) # only if neighbor is independant group
72 |
73 | # Captures of current move =
74 | c_self = None
75 |
76 | # all stones in current group will update
77 | indices = model.LengthsRangeFill([361,], 'indices') # [0,1,...360]
78 | indices = model.BooleanMask([indices, g_self], 'indices')
79 | #
80 | stones = model.ScatterAssign([stones, indices, slice], g_self) # update inplace
81 | liberties = model.ScatterAssign([liberties, indices, slice], l_self)
82 | captures = model.ScatterAssign([captures, indices, slice], c_self)
83 |
84 | return stones, liberties, captures
85 |
86 | def UpdateLiberties(model, groups_after, data, label, player, batch_size=64):
87 | '''
88 | groups_after: shape (mini_batch, 19x19, 19x19) type=BOOL
89 | '''
90 | neighbors = np.zeros((19,19,21,21), dtype=np.bool) # constant represents neighbors, including borders
91 | for i in range(19):
92 | for j in range(19):
93 | neighbors[i, j, i, j+1] = True # ◌ ◌ ● ◌ ◌
94 | neighbors[i, j, i+1, j] = True # ◌ ● ◌ ● ◌
95 | neighbors[i, j, i+1, j+2] = True #
96 | neighbors[i, j, i+2, j+1] = True # ◌ ◌ ● ◌ ◌
97 | # remove borders (19,19,21,21) => (19,19,19,19)
98 | neighbors = np.delete(neighbors, [0,20], axis=2)
99 | neighbors = np.delete(neighbors, [0,20], axis=3)
100 | NEIGHBORS = model.GivenTensorBoolFill([], 'neighbors', shape=[batch_size,361,361], values=neighbors) #
101 | #
102 | INDICES = model.LengthsRangeFill([361]*batch_size, 'indices') # N*[0,1,...360]
103 |
104 | current_group = model.BooleanMask([INDICES, groups_after[label]], 'current_group') # (N,361)
105 | group_neighbors = model.Or(NEIGHBORS[current_group], 'group_neighbors' ,axis=1) # (N,?)
106 | empties = model.Slice([data, [N,2,0,0],[N,2,19,19]], 'empties') # all empties on board[2]
107 | liberties_pos = model.And([group_neighbors, empties], 'liberties_pos') # (N,361)
108 | liberties_count = model.countTrue(liberties_pos, 'liberties_count', axis=1) # (N,)
109 | liberties_after = groups
110 | return liberties_after
111 |
112 | def UpdateGameStatus(model, data, predict):
113 | ''' UpdateGameStatus
114 | It does not consider symmetric, all games are treated independantly.
115 | Input: data with shape (N, C, H, W)
116 | predict with shape (N, C, H, W)
117 | Output: data with shape (N, C, H, W)
118 | '''
119 | BOARD_SIZE = model.ConstantFill([], 'board_size', shape=[1,], value=361) # constant
120 | SPLIT_SIZE = model.GivenTensorIntFill([], 'split_size', shape=[15,], values=np.array([1,1,1,1,6,1,1,8,8,8,8,1,1,1,1])) # constant
121 |
122 | board0, board1, board2, ones3, \
123 | turns_since4to9, turns_since10, turns_since11, liberties12to19, \
124 | capture_size20to27, self_atari_size28to35, liberties_after36to43, \
125 | ladder_capture44, ladder_escape45, sensibleness46, zeros47 = model.Split([data, SPLIT_SIZE], \
126 | ['board0', 'board1', 'board2','ones3', \
127 | 'turns_since4to9', 'turns_since10', 'turns_since11', 'liberties12to19', \
128 | 'capture_size20to27', 'self_atari_size28to35', 'liberties_after36to43', \
129 | 'ladder_capture44', 'ladder_escape45', 'sensibleness46', 'zeros47'], \
130 | axis=1)
131 |
132 | _topk, topk_indices = model.TopK(predict, ['_topk', 'topk_indices'], k=1) #shape=(mini_batch,1)
133 | label = model.FlattenToVec([topk_indices], ['label']) # shape=(mini_batch,)
134 |
135 | onehot2d = model.OneHot([label, BOARD_SIZE], 'onehot2d') # shape=(mini_batch,361)
136 | onehot, _shape = model.Reshape(['onehot2d'], ['onehot', '_shape'], shape=(0,1,19,19)) #shape=(mini_batch,1,19,19)
137 |
138 | ## board
139 | # player of this turn = opponent of last turn
140 | board0n = board1
141 | # opponent of this turn = player of last turn
142 | board1n = model.Add([board0, onehot], 'board1n')
143 | # empty
144 | board2n = model.Sub([board2, onehot], 'board2n')
145 | ## ones
146 | ones3n = ones3 # all ONE
147 | ## turns since --- age the stones
148 | # for new move set age = 0
149 | turns_since4n = onehot
150 | # for age in [1..6] set age += 1
151 | turns_since5to10n = turns_since4to9
152 | # for age >= 7 set age = 8
153 | turns_since11n = model.Add([ turns_since10, turns_since11], ' turns_since11n')
154 | # liberties = liberties after move of last move
155 | liberties12to19n = liberties_after36to43
156 | # TBD:
157 | capture_size20to27n = capture_size20to27
158 | # TBD:
159 | self_atari_size28to35n = self_atari_size28to35
160 | # TBD: liberties after move
161 | liberties_after36to43n = liberties_after36to43
162 | # after this move, this stone (not group) has N vacant neighbor (N=0..3)
163 | # for neighbor opponent group, minus 1 liberties
164 | # if opponent group reaches 0 liberties, remove the stones
165 | # for neighbor self group, plus N-1 liberties
166 | # TBD:
167 | ladder_capture44n = ladder_capture44
168 | ladder_escape45n = ladder_escape45
169 | sensibleness46n = board2n
170 | ## zeros
171 | zeros47n = zeros47
172 | ###
173 | data, _dim = model.Concat([board0n, board1n, board2n, ones3n, \
174 | turns_since4n, turns_since5to10n, turns_since11n, liberties12to19n, \
175 | capture_size20to27n, self_atari_size28to35n, liberties_after36to43n, \
176 | ladder_capture44n, ladder_escape45n, sensibleness46n, zeros47n], \
177 | ['data','_dim'], axis=1)
178 | return data
179 |
180 | #def InitGame(model, mini_batch=64):
181 | # ZERO = np.zeros((mini_batch,1,19,19), dtype=np.float32)
182 | # ONE = np.ones((mini_batch,1,19,19), dtype=np.float32)
183 | # init_data = np.concatenate((ZERO,ZERO,ONE,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ONE), axis=1)
184 | # workspace.FeedBlob("data", init_data)
185 | #
186 | # model = model_helper.ModelHelper(name="model", arg_scope={"order": "NCHW"}, init_params=True)
187 | # AddConvModel(model, "data", dim_in=13)
188 | # AddGamePlay(model, "data", "predict", mini_batch=mini_batch)
189 | #
190 | # workspace.RunNetOnce(model.param_init_net)
191 | # workspace.CreateNet(model.net, overwrite=True)
192 | # workspace.RunNet(model.net)
193 | #
194 | # init_move = np.reshape(workspace.FetchBlob('predict')[0], (-1)) # shape=(361,)
195 | # top_choice = np.argsort(-init_move)[0:mini_batch] # the top K step
196 | #
197 | # for i in range(mini_batch):
198 | # x = top_choice[i]/19
199 | # y = top_choice[i]%19
200 | # init_data[i,1,x,y] = 1 # opponent plus (x,y)
201 | # init_data[i,2,x,y] = 0 # empty minus (x,y)
202 | # init_data[i,4,x,y] = 1 # last 1 step plus (x,y)
203 | # init_data[i,12] = -1
204 | #
205 | # workspace.FeedBlob("data", init_data)
206 | # return data
207 |
208 | #def Symmetric(model, predict):
209 | # ''' Symmetric is optional
210 | # Input: predict with shape (N*8, C, H, W)
211 | # Output: symm_predict with shape (N*8, C, H, W)
212 | # '''
213 | # # Unify
214 | # symm0, symm1, symm2, symm3, \
215 | # symm4, symm5, symm6, symm7 = model.Split([predict], ['symm0', 'symm1', 'symm2', 'symm3',
216 | # 'symm4', 'symm5', 'symm6', 'symm7'], axis=0)
217 | # symm0u = symm0
218 | # symm1u = model.Flip(symm1, axes(3))
219 | # symm2u = model.Flip(symm2, axes=(2))
220 | # symm3u = model.Flip(symm3, axes=(2,3))
221 | # symm4u = model.Transpose(symm4, axes=(0,1,3,2))
222 | # symm5u = model.Flip(symm5, axes=(3))
223 | # symm6u = model.Flip(symm6, axes=(2))
224 | # symm7u = model.Flip(symm7, axes=(2,3))
225 | # # Average
226 | # unify_predict = model.avg(symm0r, symm1r, ... symm7r)
227 | # # Diversify
228 | # symm0d = model.Reshape(unify_predict, Nx1x19x19)
229 | # symm1d = model.Flip(symm0d, axes=(3))
230 | # symm2d = model.Flip(symm0d, axes=(2))
231 | # symm3d = model.Flip(symm0d, axes=(2,3))
232 | # symm4d = model.Transpose(symm0d, axes=(0,1,3,2))
233 | # symm5d = model.Flip(symm4d, axes=(3))
234 | # symm6d = model.Flip(symm4d, axes=(2))
235 | # symm7d = model.Flip(symm4d, axes=(2,3))
236 | # # shape(symm_predict) = [N*8,C,H,W]
237 | # symm_predict = model.concatenate(symm0, ... symm7)
238 | # return symm_predict
239 |
240 |
--------------------------------------------------------------------------------
/modeling.py:
--------------------------------------------------------------------------------
1 | from caffe2.python import core, model_helper, brew, utils
2 | from caffe2.proto import caffe2_pb2
3 |
4 | def AddInput(model, batch_size, db, db_type):
5 | # Data is stored in INT8 while label is stored in UINT16
6 | # This will save disk storage
7 | data_int8, label_uint16 = model.TensorProtosDBInput(
8 | [], ['data_int8', 'label_uint16'], batch_size=batch_size,
9 | db=db, db_type=db_type)
10 | # cast data to float
11 | data = model.Cast(data_int8, 'data', to=core.DataType.FLOAT)
12 | # cast to int
13 | label_int32 = model.Cast(label_uint16, 'label_int32', to=core.DataType.INT32)
14 | label = model.FlattenToVec(label_int32, 'label')
15 | # don't need the gradient for the backward pass
16 | data = model.StopGradient(data, data)
17 | label = model.StopGradient(label, label)
18 | return data, label
19 |
20 | def AddConvModel(model, data, conv_level=13, filters=192, dim_in=48):
21 | # Layer 1: 48 x 19 x 19 -pad-> 48 x 23 x 23 -conv-> 192 x 19 x 19
22 | pad1 = model.PadImage(data, 'pad1', pad_t=2, pad_l=2, pad_b=2, pad_r=2, mode="constant", value=0.)
23 | conv1 = brew.conv(model, pad1, 'conv1', dim_in=dim_in, dim_out=filters, kernel=5)
24 | input = brew.relu(model, conv1, 'relu1')
25 | # Layer 2-12: 192 x 19 x 19 -pad-> 192 x 21 x 21 -conv-> 192 x 19 x 19
26 | def AddConvLevel(model, input, i, filters):
27 | pad = model.PadImage(input, 'pad{}'.format(i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.)
28 | conv = brew.conv(model, pad, 'conv{}'.format(i), dim_in=filters, dim_out=filters, kernel=3)
29 | relu = brew.relu(model, conv, 'relu{}'.format(i))
30 | return relu
31 | for i in range(2, conv_level):
32 | output = AddConvLevel(model, input, i, filters)
33 | input = output
34 | # Layer 13: 192 x 19 x 19 -conv-> 1 x 19 x 19 -softmax-> 361
35 | conv13 = brew.conv(model, output, 'conv13', dim_in=filters, dim_out=1, kernel=1)
36 | predict = model.Flatten(conv13, 'predict')
37 | return predict
38 |
39 | def AddSoftmax(model, predict):
40 | softmax = brew.softmax(model, predict, 'softmax')
41 | return softmax
42 |
43 | def AddAccuracy(model, softmax, label, log=True):
44 | """Adds an accuracy op to the model"""
45 | accuracy = brew.accuracy(model, [softmax, label], "accuracy")
46 | if log:
47 | model.Print('accuracy', [], to_file=1)
48 | return accuracy
49 |
50 | def AddOneHot(model, label):
51 | """Decode onehot at modelling, not on the fly
52 | """
53 | with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
54 | BOARD_SIZE = model.param_init_net.ConstantFill([], 'BOARD_SIZE', shape=[1,], value=361) # constant
55 | label_int64 = model.Cast(label, 'label_int64', to=core.DataType.INT64)
56 | onehot = model.OneHot([label_int64, BOARD_SIZE], 'onehot') # shape=(mini_batch,361)
57 | onehot = model.StopGradient(onehot, onehot)
58 | return onehot
59 |
60 | def AddTrainingOperators(model, predict, label, expect, base_lr, log=True):
61 | """Adds training operators to the model.
62 | predict: Predicted distribution by Policy Model
63 | expect: Expected distribution by MCTS, or transformed from Policy Model
64 | base_lr: Base Learning Rate. Always fixed
65 | """
66 | # compute the expected loss
67 | if label:
68 | onehot = AddOneHot(model, label)
69 | softmax, xent = model.SoftmaxWithLoss([predict, onehot], ['softmax', 'xent'], label_prob=1)
70 | AddAccuracy(model, softmax, label, log)
71 | else:
72 | softmax, xent = model.SoftmaxWithLoss([predict, expect], ['softmax', 'xent'], label_prob=1)
73 | loss = model.AveragedLoss(xent, "loss")
74 | # use the average loss we just computed to add gradient operators to the model
75 | model.AddGradientOperators([loss])
76 | # do a simple stochastic gradient descent
77 | ITER = brew.iter(model, "iter")
78 | # set the learning rate schedule
79 | LR = model.LearningRate(ITER, "LR", base_lr=base_lr, policy="fixed") # when policy=fixed, stepsize and gamma are ignored
80 | # ONE is a constant value that is used in the gradient update. We only need
81 | # to create it once, so it is explicitly placed in param_init_net.
82 | ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
83 | # Now, for each parameter, we do the gradient updates.
84 | for param in model.params:
85 | # Note how we get the gradient of each parameter - ModelHelper keeps
86 | # track of that.
87 | param_grad = model.param_to_grad[param]
88 | # The update is a simple weighted sum: param = param + param_grad * LR
89 | model.WeightedSum([param, ONE, param_grad, LR], param)
90 | if log:
91 | model.Print('loss', [], to_file=1)
92 |
--------------------------------------------------------------------------------
/modelingZero.py:
--------------------------------------------------------------------------------
1 | from caffe2.python import core, model_helper, brew, utils, workspace
2 | from caffe2.proto import caffe2_pb2
3 |
4 | def AddInput(model, batch_size, db, db_type):
5 | # Data is stored in INT8 while label is stored in INT32 and reward is stored in FLOAT
6 | # This will save disk storage
7 | data_int8, label_int32, reward_float = model.TensorProtosDBInput(
8 | [], ['data_int8', 'label_int32', 'reward_float'], batch_size=batch_size,
9 | db=db, db_type=db_type)
10 | # cast data to float
11 | data = model.Cast(data_int8, 'data', to=core.DataType.FLOAT)
12 | label = model.Cast(label_int32, 'label', to=core.DataType.INT32)
13 | reward = model.Cast(reward_float, 'reward', to=core.DataType.FLOAT)
14 | # don't need the gradient for the backward pass
15 | data = model.StopGradient(data, data)
16 | label = model.StopGradient(label, label)
17 | reward = model.StopGradient(reward, reward)
18 | return data, label, reward
19 |
20 | def AddResNetModel(model, data, num_blocks=19, filters=256, dim_in=17, is_test=True):
21 | """
22 | params
23 | data: Data Input in shape of NCHW.
24 | num_blocks: Number of Residual Tower. Each block contains 2 convolution. Default 19
25 | filters: Number of output Channel of NCHW. Default 256
26 | dim_in: Number of input Channel of NCHW. Default 17. i.e.
27 | returns
28 | predict: unscaled prediction, need Softmax to translate to probabilities
29 | value: scaled value [-1,1]
30 | """
31 | # Layer 1: 17 x 19 x 19 -pad-> 17 x 21 x 21 -conv-> 256 x 19 x 19
32 | pad1 = model.PadImage(data, 'pad1', pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.)
33 | conv1 = brew.conv(model, pad1, 'conv1', dim_in=dim_in, dim_out=filters, kernel=3)
34 | norm1 = brew.spatial_bn(model, conv1, 'norm1', filters, is_test=is_test)
35 | res_in = brew.relu(model, norm1, 'relu1')
36 | # Blocks: 256 x 19 x 19 -conv-> -normalize-> -relu-> -conv-> -normalize-> +INPUT -relu-> 256 x 19 x 19
37 | def AddResBlock(model, input, i, filters, scope='res'):
38 | pad1 = model.PadImage(input, '{}/{}/pad1'.format(scope,i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.)
39 | conv1 = brew.conv(model, pad1, '{}/{}/conv1'.format(scope,i), dim_in=filters, dim_out=filters, kernel=3)
40 | norm1 = brew.spatial_bn(model, conv1, '{}/{}/norm1'.format(scope,i), filters, is_test=is_test)
41 | relu1 = brew.relu(model, norm1, '{}/{}/relu1'.format(scope,i))
42 | pad2 = model.PadImage(relu1, '{}/{}/pad2'.format(scope,i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.)
43 | conv2 = brew.conv(model, pad2, '{}/{}/conv2'.format(scope,i), dim_in=filters, dim_out=filters, kernel=3)
44 | norm2 = brew.spatial_bn(model, conv2, '{}/{}/norm2'.format(scope,i), filters, is_test=is_test)
45 | res = model.Add([norm2, input], '{}/{}/res'.format(scope,i))
46 | output = brew.relu(model, res, '{}/{}/relu2'.format(scope,i))
47 | return output
48 | for i in range(num_blocks):
49 | res_out = AddResBlock(model, res_in, i, filters)
50 | res_in = res_out
51 | # Policy Head: 256 x 19 x 19 -conv-> 2 x 19 x 19 -normalize-> -relu-> -FC-> 362
52 | ph_conv1 = brew.conv(model, res_out, 'ph/conv1', dim_in=filters, dim_out=2, kernel=1)
53 | ph_norm1 = brew.spatial_bn(model, ph_conv1, 'ph/norm1', 2, is_test=is_test)
54 | ph_relu1 = brew.relu(model, ph_norm1, 'ph/relu1')
55 | ph_fc = brew.fc(model, ph_relu1, 'ph/fc', dim_in=2*19*19, dim_out=362)
56 | predict = model.Flatten(ph_fc, 'predict')
57 | # Value Head: 256 x 19 x 19 -conv-> 1 x 19 x 19 -> -normalize-> -relu-> -FC-> 256 x 19 x19 -relu-> -FC-> 1(scalar) -tanh->
58 | vh_conv1 = brew.conv(model, res_out, 'vh/conv1', dim_in=filters, dim_out=1, kernel=1)
59 | vh_norm1 = brew.spatial_bn(model, vh_conv1, 'vh/norm1', 1, is_test=is_test)
60 | vh_relu1 = brew.relu(model, vh_norm1, 'vh/relu1')
61 | vh_fc1 = brew.fc(model, vh_relu1, 'vh/fc1', dim_in=1*19*19, dim_out=filters*19*19)
62 | vh_relu2 = brew.relu(model, vh_fc1, 'vh/relu2')
63 | vh_fc2 = brew.fc(model, vh_relu2, 'vh/fc2', dim_in=filters*19*19, dim_out=1)
64 | vh_tanh = brew.tanh(model, vh_fc2, 'vh/tanh')
65 | value = model.FlattenToVec(vh_tanh, 'value')
66 | return predict, value
67 |
68 | def AddSoftmax(model, predict):
69 | softmax = brew.softmax(model, predict, 'softmax')
70 | return softmax
71 |
72 | def AddAccuracy(model, softmax, label, log=True):
73 | """Adds an accuracy op to the model"""
74 | accuracy = brew.accuracy(model, [softmax, label], "accuracy")
75 | if log:
76 | model.Print('accuracy', [], to_file=1)
77 | return accuracy
78 |
79 | def AddOneHot(model, label):
80 | """Decode onehot at modelling, not on the fly
81 | """
82 | with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
83 | BOARD_SIZE = model.param_init_net.ConstantFill([], 'BOARD_SIZE', shape=[1,], value=362) # constant
84 | label_int64 = model.Cast(label, 'label_int64', to=core.DataType.INT64)
85 | onehot = model.OneHot([label_int64, BOARD_SIZE], 'onehot') # shape=(mini_batch,362)
86 | onehot = model.StopGradient(onehot, onehot)
87 | return onehot
88 |
89 | def AddTrainingOperators(model, predict, expect, value, reward,
90 | base_lr=-0.1, policy='fixed', stepsize=200000, gamma=0.1, log=True):
91 | """Adds training operators to the model.
92 | params
93 | predict: Predicted move by Policy Model, unscaled, in shape (N,362)
94 | label: Labelled move in shape (N,)
95 | expect: Expected distribution by MCTS, in shape (N,362)
96 | value: Predicted value by Value Model, scalar value in (-1,1)
97 | reward: Labelled value, scalar value in {-1,1}
98 | base_lr: Base Learning Rate. Policy is always fixed
99 | log: Whether to log the loss and accuracy in file, default True
100 | """
101 | _, xent = model.SoftmaxWithLoss([predict, expect], ['_', 'xent'], label_prob=1)
102 | #loss1 = model.AveragedLoss(xent, 'loss1')
103 | msqrl2 = model.AveragedLoss(model.SquaredL2Distance([value, reward], None), 'msqrl2')
104 | loss = model.Add([xent, msqrl2], 'loss')
105 | # use the average loss we just computed to add gradient operators to the model
106 | #model.AddGradientOperators([xent, loss2])
107 | model.AddGradientOperators([loss])
108 | # do a simple stochastic gradient descent
109 | ITER = brew.iter(model, "iter")
110 | # set the learning rate schedule
111 | LR = model.LearningRate(ITER, "LR", base_lr=base_lr, policy=policy, stepsize=stepsize, gamma=gamma)
112 | # ONE is a constant value that is used in the gradient update. We only need
113 | # to create it once, so it is explicitly placed in param_init_net.
114 | ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
115 | # Now, for each parameter, we do the gradient updates.
116 | for param in model.params:
117 | # Note how we get the gradient of each parameter - ModelHelper keeps
118 | # track of that.
119 | param_grad = model.param_to_grad[param]
120 | # The update is a simple weighted sum: param = param + param_grad * LR
121 | model.WeightedSum([param, ONE, param_grad, LR], param)
122 | if log:
123 | model.Print('loss', [], to_file=1)
124 | model.Print('xent', [], to_file=1)
125 | model.Print('msqrl2', [], to_file=1)
126 |
127 | def LoadParams(load_from):
128 | init_def = caffe2_pb2.NetDef()
129 | with open(load_from, 'r') as f:
130 | init_def.ParseFromString(f.read())
131 | #init_def.device_option.CopyFrom(device_opts)
132 | workspace.RunNetOnce(init_def.SerializeToString())
133 |
134 | def SaveParams(model, save_to) :
135 | init_net = caffe2_pb2.NetDef()
136 | for param in model.params:
137 | blob = workspace.FetchBlob(param)
138 | shape = blob.shape
139 | op = core.CreateOperator("GivenTensorFill",
140 | [],
141 | [param],
142 | arg=[utils.MakeArgument("shape", shape),utils.MakeArgument("values", blob)])
143 | init_net.op.extend([op])
144 | with open(save_to, 'wb') as f:
145 | f.write(init_net.SerializeToString())
146 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import os
3 | from distutils.core import setup
4 | from Cython.Build import cythonize
5 |
6 | setup(
7 |
8 | name='RocAlphaGo',
9 | # list with files to be cythonized
10 | ext_modules=cythonize(["RocAlphaGo/go.pyx", "RocAlphaGo/go_data.pyx",
11 | "RocAlphaGo/preprocessing.pyx", "RocAlphaGo/preprocessing_rollout.pyx"]),
12 | # include numpy
13 | include_dirs=[numpy.get_include(),
14 | os.path.join(numpy.get_include(), 'numpy')]
15 | )
16 |
17 | """
18 | install all necessary dependencies using:
19 | pip install -r requirements.txt
20 |
21 | run setup with command:
22 | python setup.py build_ext --inplace
23 |
24 | be aware cython uses a depricaped version of numpy this results in a lot of warnings
25 |
26 | you can run all unittests to verify everything works as it should:
27 | python -m unittest discover
28 | """
29 |
--------------------------------------------------------------------------------
/sgfutil.py:
--------------------------------------------------------------------------------
1 | import sgf, os
2 | from go import GameState, BLACK, WHITE, EMPTY
3 | from datetime import datetime
4 |
5 | # BOARD_POSITION contains SGF symbol which represents each row (or column) of the board
6 | # It can be used to convert between 0,1,2,3... and a,b,c,d...
7 | # Symbol [tt] or [] represents PASS in SGF, therefore is omitted
8 | BOARD_POSITION = 'abcdefghijklmnopqrs'
9 |
10 | def GetWinner(game_state):
11 | winner = game_state.get_winner()
12 | if winner == BLACK:
13 | return 'B+'
14 | elif winner == WHITE:
15 | return 'W+'
16 | else:
17 | return 'T'
18 |
19 | def WriteBackSGF(winner, history, filename, PB=None, PW=None, Komi='7.5'):
20 | parser = sgf.Parser()
21 | collection = sgf.Collection(parser)
22 | # game properties
23 | parser.start_gametree()
24 | parser.start_node()
25 | parser.start_property('FF') # SGF format version
26 | parser.add_prop_value('4')
27 | parser.end_property()
28 | parser.start_property('SZ') # Board Size = 19
29 | parser.add_prop_value('19')
30 | parser.end_property()
31 | parser.start_property('KM') # default Komi = 7.5
32 | parser.add_prop_value(str(Komi))
33 | parser.end_property()
34 | parser.start_property('PB') # Black Player = Supervised Learning / Reinforced Learning
35 | parser.add_prop_value('RL-{}'.format(PB))
36 | parser.end_property()
37 | parser.start_property('PW') # White Player = Supervised Learning / Reinforced Learning
38 | parser.add_prop_value('SL-{}'.format(PW))
39 | parser.end_property()
40 | parser.start_property('DT') # Game Date
41 | parser.add_prop_value(datetime.now().strftime("%Y-%m-%d"))
42 | parser.end_property()
43 | parser.start_property('RE') # Result = B+, W+, T
44 | parser.add_prop_value(str(winner))
45 | parser.end_property()
46 | parser.end_node()
47 | # start of game
48 | for step in history:
49 | parser.start_node()
50 | parser.start_property(step[0]) # or W
51 | parser.add_prop_value(BOARD_POSITION[step[1]/19]+BOARD_POSITION[step[1]%19])
52 | parser.end_property()
53 | parser.end_node()
54 | # end of game
55 | parser.end_gametree()
56 | # record the game in SGF
57 | with open(os.path.join('{}.sgf'.format(filename)), "w") as f:
58 | collection.output(f)
59 |
--------------------------------------------------------------------------------