├── .gitignore ├── MNIST.ipynb ├── Mock AlphaGo (1) Preprocess Pipeline.ipynb ├── Mock AlphaGo (2) Policy Network.ipynb ├── Mock AlphaGo (3) Reinforced Learning.ipynb ├── Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production.ipynb ├── Mock AlphaGo Zero (1) Preprocess Pipeline.ipynb ├── Mock AlphaGo Zero (2) Policy and Value Network.ipynb ├── Mock AlphaGo Zero (3B) Reinforced Learning.ipynb ├── Monitoring.ipynb ├── README.md ├── RocAlphaGo ├── go.pxd ├── go.pyx ├── go_data.pxd ├── go_data.pyx ├── preprocessing.pxd ├── preprocessing.pyx ├── preprocessing_rollout.pxd └── preprocessing_rollout.pyx ├── game.py ├── modeling.py ├── modelingZero.py ├── setup.py └── sgfutil.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | ### nohup ### 104 | nohup.out 105 | 106 | ### RocAlphaGo Cython ### 107 | /RocAlphaGo/*.c 108 | 109 | -------------------------------------------------------------------------------- /Mock AlphaGo (1) Preprocess Pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Preprocess\n", 8 | " The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor which are 48 feature planes of 19x19 size, according to DeepMind.\n", 9 | "\n", 10 | "| Feature | # of planes | Description\n", 11 | "|--------------|-------------|-------------------------\n", 12 | "| Stone colour | 3 | Player stone / opponent stone / empty\n", 13 | "| Ones | 1 | A constant plane filled with 1\n", 14 | "| Turns since | 8 | How many turns since a move was played\n", 15 | "| Liberties | 8 | Number of liberties (empty adjacent points)\n", 16 | "| Capture size | 8 | How many opponent stones would be captured\n", 17 | "| Self-atari size | 8 | How many of own stones would be captured\n", 18 | "| Liberties after move | 8 | Number of liberties after this move is played\n", 19 | "| Ladder capture | 1 | Whether a move at this point is a successful ladder capture\n", 20 | "| Ladder escape | 1 | Whether a move at this point is a successful ladder escape\n", 21 | "| Sensibleness | 1 | Whether a move is legal and does not fill its own eyes\n", 22 | "| Zeros | 1 | A constant plane filled with 0\n", 23 | "| Player color | 1 | Whether current player is black" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from preprocessing import Preprocess\n", 35 | "from go import GameState, BLACK, WHITE, EMPTY\n", 36 | "import os, sgf\n", 37 | "import numpy as np\n", 38 | "\n", 39 | "# input SGF files\n", 40 | "FILE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','go')\n", 41 | "# output archive SGF files\n", 42 | "SUCCEED_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','succeed')\n", 43 | "FAIL_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','fail')\n", 44 | "# output database\n", 45 | "DATA_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','go')\n", 46 | "TRAIN_DATA = os.path.join(DATA_FOLDER,'train_data')\n", 47 | "TEST_DATA = os.path.join(DATA_FOLDER,'test_data')\n", 48 | "\n", 49 | "# BOARD_POSITION contains SGF symbol which represents each row (or column) of the board\n", 50 | "# It can be used to convert between 0,1,2,3... and a,b,c,d...\n", 51 | "# Symbol [tt] or [] represents PASS in SGF, therefore is omitted\n", 52 | "BOARD_POSITION = 'abcdefghijklmnopqrs'\n", 53 | "\n", 54 | "DEFAULT_FEATURES = [\n", 55 | " \"board\", \"ones\", \"turns_since\", \"liberties\", \"capture_size\",\n", 56 | " \"self_atari_size\", \"liberties_after\", \"ladder_capture\", \"ladder_escape\",\n", 57 | " \"sensibleness\", \"zeros\"]" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Define DB output\n", 65 | " [LevelDB](http://leveldb.org/) is preferred database because it automatically use [Snappy](https://github.com/google/snappy) to compress the data." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "from caffe2.python import core, utils\n", 75 | "from caffe2.proto import caffe2_pb2\n", 76 | "\n", 77 | "def write_db(db_type, db_name, base_name, features, labels):\n", 78 | " db = core.C.create_db(db_type, db_name, core.C.Mode.write)\n", 79 | " transaction = db.new_transaction()\n", 80 | " for i in range(features.shape[0]):\n", 81 | " feature_and_label = caffe2_pb2.TensorProtos()\n", 82 | " feature_and_label.protos.extend([\n", 83 | " utils.NumpyArrayToCaffe2Tensor(features[i]),\n", 84 | " utils.NumpyArrayToCaffe2Tensor(labels[i])\n", 85 | " ])\n", 86 | " transaction.put(\n", 87 | " '{}_{:0=3}'.format(base_name,i),\n", 88 | " feature_and_label.SerializeToString())\n", 89 | " # Close the transaction, and then close the db.\n", 90 | " del transaction\n", 91 | " del db" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Parse SGF game file\n", 99 | " Parse the SGF file. SGF file use characters a to s to represents line 1 to 19. We convert SGF to Caffe2 Tensor. And write back database in batch of game." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "#%%capture output\n", 109 | "p = Preprocess(DEFAULT_FEATURES)\n", 110 | "for dirname, subDirList, fileList in os.walk(FILE_FOLDER):\n", 111 | " for filename in fileList:\n", 112 | " with open(os.path.join(dirname, filename)) as f:\n", 113 | " collection = sgf.parse(f.read())\n", 114 | " for game in collection:\n", 115 | " if game.nodes[0].properties['SZ'] == ['19']: # Size of the Board should only be 19x19\n", 116 | " state = GameState() # Initialize GameState\n", 117 | " features = np.empty(shape=(0,48,19,19), dtype=np.int8)\n", 118 | " labels = np.empty(shape=(0,1), dtype=np.uint16)\n", 119 | " try:\n", 120 | " for node in game.nodes[1:]: # Except nodes[0] for game properties\n", 121 | " features = np.append(features, p.state_to_tensor(state).astype(np.int8), axis=0)\n", 122 | " if 'B' in node.properties and len(node.properties['B'][0]) == 2: # Black move\n", 123 | " x = BOARD_POSITION.index(node.properties['B'][0][0])\n", 124 | " y = BOARD_POSITION.index(node.properties['B'][0][1])\n", 125 | " state.do_move(action=(x,y),color = BLACK)\n", 126 | " elif 'W' in node.properties and len(node.properties['W'][0]) == 2: # White move\n", 127 | " x = BOARD_POSITION.index(node.properties['W'][0][0])\n", 128 | " y = BOARD_POSITION.index(node.properties['W'][0][1])\n", 129 | " state.do_move(action=(x,y),color = WHITE)\n", 130 | " labels = np.append(labels, np.asarray([[x * 19 + y]], dtype=np.uint16), axis=0)\n", 131 | " write_db(\n", 132 | " db_type = 'leveldb',\n", 133 | " db_name = TRAIN_DATA, # replace this with TRAIN_DATA or TEST_DATA if you want to separate the dataset\n", 134 | " base_name = os.path.basename(filename),\n", 135 | " features = features,\n", 136 | " labels = labels\n", 137 | " )\n", 138 | " os.rename(f.name,os.path.join(SUCCEED_FOLDER,filename)) # move the file to SUCCEED_FOLDER, so Preprocess can resume after interrupted\n", 139 | " print('{} succeeded'.format(filename))\n", 140 | " except Exception as e:\n", 141 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n", 142 | " print('{} failed dues to {}'.format(filename, e))" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python 2", 158 | "language": "python", 159 | "name": "python2" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 2 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython2", 171 | "version": "2.7.12" 172 | }, 173 | "toc": { 174 | "colors": { 175 | "hover_highlight": "#DAA520", 176 | "navigate_num": "#000000", 177 | "navigate_text": "#333333", 178 | "running_highlight": "#FF0000", 179 | "selected_highlight": "#FFD700", 180 | "sidebar_border": "#EEEEEE", 181 | "wrapper_background": "#FFFFFF" 182 | }, 183 | "moveMenuLeft": true, 184 | "nav_menu": { 185 | "height": "30px", 186 | "width": "252px" 187 | }, 188 | "navigate_menu": true, 189 | "number_sections": true, 190 | "sideBar": true, 191 | "threshold": 4, 192 | "toc_cell": false, 193 | "toc_section_display": "block", 194 | "toc_window_display": false, 195 | "widenNotebook": false 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 1 200 | } 201 | -------------------------------------------------------------------------------- /Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Mock AlphaGo (3B) Policy Network - Reinforced Learning in mass production\n", 8 | "In this notebook, we will train the policy network by letting them compete each other according to DeepMind:\n", 9 | "\n", 10 | "> We further trained the policy network by policy gradient reinforcement learning.\n", 11 | "Each iteration consisted of a mini-batch of n games played in parallel, between\n", 12 | "the current policy network $p_\\rho$ that is being trained, and an opponent $p_\\rho-$\n", 13 | "that uses parameters $\\rho^-$ from a previous iteration, randomly sampled from\n", 14 | "a pool $O$ of opponents, so as to increase the stability of training. Weights were\n", 15 | "initialized to $\\rho = \\rho^- = \\sigma$. Every 500 iterations, we added the current\n", 16 | "parameters $\\rho$ to the opponent pool. Each game $i$ in the mini-batch was played\n", 17 | "out until termination at step $T^i$, and then scored to determine the outcome\n", 18 | "$z^i_t = \\pm r(s_{T^i})$ from each player’s perspective. The games were then replayed\n", 19 | "to determine the policy gradient update, $\\Delta\\rho = \\frac{a}{n}\\Sigma^n_{i=1}\n", 20 | "\\Sigma^{T^i}_{t=1}\\frac{\\partial\\log p_\\rho(a^i_t|s^i_t)}{\\partial_\\rho}(z^i_t-v(s^i_t))$, using the REINFORCE \n", 21 | "algorithm with baseline $v(s^i_t)$ for variance reduction. On the first pass \n", 22 | "through the training pipeline, the baseline was set to zero; on the second pass\n", 23 | "we used the value network $v_\\theta(s)$ as a baseline; this provided a small\n", 24 | "performance boost. The policy network was trained in this way for 10,000 \n", 25 | "mini-batches of 128 games, using 50 GPUs, for one day." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import os, numpy as np\n", 35 | "from caffe2.python import core, model_helper, workspace, brew, utils\n", 36 | "from caffe2.proto import caffe2_pb2\n", 37 | "from sgfutil import BOARD_POSITION\n", 38 | "\n", 39 | "%matplotlib inline\n", 40 | "from matplotlib import pyplot\n", 41 | "\n", 42 | "# how many games will be run in one minibatch\n", 43 | "GAMES_BATCHES = 16 # [1,infinity) depends on your hardware\n", 44 | "SEARCH_WIDE = 1600 # [1, infinity) for each step, run MCTS to obtain better distribution\n", 45 | "# how many iterations for this tournament\n", 46 | "TOURNAMENT_ITERS = 10000 # [1,infinity)\n", 47 | "\n", 48 | "if workspace.has_gpu_support:\n", 49 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n", 50 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n", 51 | "else :\n", 52 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n", 53 | " print('Running in CPU mode')\n", 54 | "\n", 55 | "arg_scope = {\"order\": \"NCHW\"}\n", 56 | "\n", 57 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','go','param') # folder stores the loss/accuracy log" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "We need to differentiate primary player and sparring partner. Primary player will learn from the game result" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "### Config for primary player\n", 74 | "PRIMARY_WORKSPACE = os.path.join(ROOT_FOLDER, 'primary')\n", 75 | "PRIMARY_CONV_LEVEL = 4\n", 76 | "PRIMARY_FILTERS = 128\n", 77 | "PRIMARY_PRE_TRAINED_ITERS = 1\n", 78 | "# before traning, where to load the params\n", 79 | "PRIMARY_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"RL-conv={}-k={}-iter={}\"\n", 80 | " .format(PRIMARY_CONV_LEVEL,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS))\n", 81 | "BASE_LR = -0.01 # (-0.003,0) The base Learning Rate; 0 to disable it.\n", 82 | "NEGATIVE_BASE_LR = 0.0 # [BASE_LR,0] Dues to multi-class softmax, this param is usually smaller than BASE_LR; 0 to disable it.\n", 83 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n", 84 | "# after training, where to store the params\n", 85 | "PRIMARY_SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"RL-conv={}-k={}-iter={}\"\n", 86 | " .format(PRIMARY_CONV_LEVEL,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))\n", 87 | "if not os.path.exists(PRIMARY_SAVE_FOLDER):\n", 88 | " os.makedirs(PRIMARY_SAVE_FOLDER)\n", 89 | "\n", 90 | "### Config for sparring partner\n", 91 | "SPARR_WORKSPACE = os.path.join(ROOT_FOLDER, 'sparring')\n", 92 | "SPARR_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"conv={}-k={}-iter={}\".format(4,128,1))\n", 93 | "\n", 94 | "print('Training model from {} to {} iterations'.format(PRIMARY_PRE_TRAINED_ITERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## AlphaGo Neural Network Architecture" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": true 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "from modeling import AddConvModel, AddTrainingOperators" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Build the actual network" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "import caffe2.python.predictor.predictor_exporter as pe\n", 131 | "\n", 132 | "data = np.empty(shape=(TRAIN_BATCHES,48,19,19), dtype=np.float32)\n", 133 | "label = np.empty(shape=(TRAIN_BATCHES,), dtype=np.int32)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Primary player\n", 141 | ">Train Net: Blob('data','label') ==> Predict Net ==> Loss ==> Backward Propergation" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "workspace.SwitchWorkspace(PRIMARY_WORKSPACE, True)\n", 151 | "# for learning from winner\n", 152 | "with core.DeviceScope(device_opts):\n", 153 | " primary_train_model = model_helper.ModelHelper(name=\"primary_train_model\", arg_scope=arg_scope, init_params=True)\n", 154 | " workspace.FeedBlob(\"data\", data)\n", 155 | " predict = AddConvModel(primary_train_model, \"data\", conv_level=PRIMARY_CONV_LEVEL, filters=PRIMARY_FILTERS)\n", 156 | " workspace.FeedBlob(\"label\", label)\n", 157 | " AddTrainingOperators(primary_train_model, predict, \"label\", None, base_lr=BASE_LR)\n", 158 | " workspace.RunNetOnce(primary_train_model.param_init_net)\n", 159 | " workspace.CreateNet(primary_train_model.net, overwrite=True)\n", 160 | "# for learning from negative examples\n", 161 | "with core.DeviceScope(device_opts):\n", 162 | " primary_train_neg_model = model_helper.ModelHelper(name=\"primary_train_neg_model\", arg_scope=arg_scope, init_params=True)\n", 163 | " workspace.FeedBlob(\"data\", data)\n", 164 | " predict = AddConvModel(primary_train_neg_model, \"data\", conv_level=PRIMARY_CONV_LEVEL, filters=PRIMARY_FILTERS)\n", 165 | " ONES = primary_train_neg_model.ConstantFill([], \"ONES\", shape=[TRAIN_BATCHES,361], value=1.0)\n", 166 | " negative = primary_train_neg_model.Sub([ONES, predict], 'negative')\n", 167 | " workspace.FeedBlob(\"label\", label)\n", 168 | " AddTrainingOperators(primary_train_neg_model, negative, None, expect, base_lr=NEGATIVE_BASE_LR)\n", 169 | " workspace.RunNetOnce(primary_train_neg_model.param_init_net)\n", 170 | " workspace.CreateNet(primary_train_neg_model.net, overwrite=True)\n", 171 | " \n", 172 | " primary_predict_net = pe.prepare_prediction_net(os.path.join(PRIMARY_LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Function `LearnFromWinner` takes the result of tournament and train primary player with the result." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "def LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES):\n", 191 | " data = np.empty(shape=(mini_batch,48,19,19), dtype=np.float32)\n", 192 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n", 193 | " #iter = 0\n", 194 | " k = 0\n", 195 | " for i in range(len(winner)):\n", 196 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n", 197 | " for step in history[i]:\n", 198 | " if (step[0] == 'B' and winner[i] == 'B+') or (step[0] == 'W' and winner[i] == 'W+'):\n", 199 | " data[k] = step[2]\n", 200 | " label[k] = step[1]\n", 201 | " k += 1\n", 202 | " #iter += 1\n", 203 | " if k == mini_batch:\n", 204 | " k = 0\n", 205 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n", 206 | " with core.DeviceScope(device_opts):\n", 207 | " workspace.FeedBlob(\"data\", data)\n", 208 | " workspace.FeedBlob(\"label\", label)\n", 209 | " workspace.RunNet(primary_train_model.net)\n", 210 | "\n", 211 | "def LearnFromLosingGames(history, winner, mini_batch=TRAIN_BATCHES):\n", 212 | " data = np.empty(shape=(mini_batch,48,19,19), dtype=np.float32)\n", 213 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n", 214 | " #iter = 0\n", 215 | " k = 0\n", 216 | " for i in range(len(winner)):\n", 217 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n", 218 | " for step in history[i]:\n", 219 | " if (step[0] == 'B' and winner[i] == 'W+') or (step[0] == 'W' and winner[i] == 'B+'):\n", 220 | " data[k] = step[2]\n", 221 | " label[k] = step[1]\n", 222 | " k += 1\n", 223 | " #iter += 1\n", 224 | " if k == mini_batch:\n", 225 | " k = 0\n", 226 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n", 227 | " with core.DeviceScope(device_opts):\n", 228 | " workspace.FeedBlob(\"data\", data)\n", 229 | " workspace.FeedBlob(\"label\", label)\n", 230 | " workspace.RunNet(primary_train_neg_model.net)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Sparring partner\n", 238 | ">Predict Net: Blob('data') ==> Predict Net ==> Blob('softmax')" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## Run the tournament and training\n", 246 | ">We use a reward function $r(s)$ that is zero for all non-terminal time-steps $t < T$.\n", 247 | "The outcome $z_t = \\pm r(s_T)$ is the terminal reward at the end of the game from the perspective of the\n", 248 | "current player at time-step $t$: $+1$ for winning and $-1$ for losing. Weights are then updated at each\n", 249 | "time-step $t$ by stochastic gradient ascent in the direction that maximizes expected outcome." 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": true 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "from go import GameState, BLACK, WHITE, EMPTY, PASS\n", 261 | "from preprocessing import Preprocess\n", 262 | "from game import DEFAULT_FEATURES\n", 263 | "from datetime import datetime\n", 264 | "from sgfutil import GetWinner, WriteBackSGF\n", 265 | "import sgf\n", 266 | "\n", 267 | "np.random.seed(datetime.now().microsecond)\n", 268 | "\n", 269 | "# construct the model to be exported\n", 270 | "pe_meta = pe.PredictorExportMeta(\n", 271 | " predict_net=primary_predict_net.Proto(),\n", 272 | " parameters=[str(b) for b in primary_train_model.params],\n", 273 | " inputs=[\"data\"],\n", 274 | " outputs=[\"softmax\"],\n", 275 | ")" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "scrolled": true 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "for tournament in range(PRIMARY_PRE_TRAINED_ITERS, PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS):\n", 287 | " # Every 500 tournament, copy current player to opponent. i.e. checkpoint\n", 288 | " if tournament > 0 and tournament % 20 == 0:\n", 289 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n", 290 | " print('Checkpoint saved to {}'.format(PRIMARY_SAVE_FOLDER))\n", 291 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+tournament)), pe_meta)\n", 292 | " print('Checkpoint saved to {}'.format(SPARR_LOAD_FOLDER))\n", 293 | " \n", 294 | " # Randomly change color of player\n", 295 | " PRIMARY_PLAYER = np.random.choice(['B','W'])\n", 296 | " if PRIMARY_PLAYER == 'B':\n", 297 | " SPARRING_PLAYER = 'W'\n", 298 | " else:\n", 299 | " SPARRING_PLAYER = 'B'\n", 300 | " \n", 301 | " # Randomly pickup sparring partner\n", 302 | " workspace.SwitchWorkspace(SPARR_WORKSPACE, True)\n", 303 | " sparring_param_file = np.random.choice(os.listdir(SPARR_LOAD_FOLDER))\n", 304 | " with core.DeviceScope(device_opts):\n", 305 | " sparring_predict_net = pe.prepare_prediction_net(os.path.join(SPARR_LOAD_FOLDER, sparring_param_file), \"minidb\")\n", 306 | " print('Tournament {} Primary({}) vs Sparring({}|{}) started @{}'\n", 307 | " .format(tournament, PRIMARY_PLAYER, SPARRING_PLAYER, sparring_param_file, datetime.now()))\n", 308 | "\n", 309 | " \n", 310 | " # Initialize game board and game state\n", 311 | " game_state = [ GameState() for i in range(GAMES_BATCHES) ]\n", 312 | " game_result = [0] * GAMES_BATCHES # 0 - Not Ended; BLACK - Black Wins; WHITE - White Wins\n", 313 | " p = Preprocess(DEFAULT_FEATURES) # Singleton\n", 314 | " history = [ [] for i in range(GAMES_BATCHES) ] # history[n][step] stores tuple of (player, x, y, board[n])\n", 315 | " board = None # The preprocessed board with shape Nx48x19x19\n", 316 | " \n", 317 | " # for each step in all games\n", 318 | " for step in range(0,722):\n", 319 | " \n", 320 | " # Preprocess the board\n", 321 | " board = np.concatenate([p.state_to_tensor(game_state[i]).astype(np.float32) for i in range(GAMES_BATCHES)])\n", 322 | "\n", 323 | " if step % 2 == 0:\n", 324 | " current_player = BLACK\n", 325 | " current_color = 'B'\n", 326 | " else:\n", 327 | " current_player = WHITE\n", 328 | " current_color = 'W'\n", 329 | "\n", 330 | " if step % 2 == (PRIMARY_PLAYER == 'W'): # if step %2 == 0 and Primary is Black, or vice versa.\n", 331 | " # primary player make move\n", 332 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n", 333 | " with core.DeviceScope(device_opts):\n", 334 | " workspace.FeedBlob('data', board)\n", 335 | " workspace.RunNet(primary_predict_net)\n", 336 | " else:\n", 337 | " # sparring partner make move\n", 338 | " workspace.SwitchWorkspace(SPARR_WORKSPACE)\n", 339 | " with core.DeviceScope(device_opts):\n", 340 | " workspace.FeedBlob('data', board)\n", 341 | " workspace.RunNet(sparring_predict_net)\n", 342 | "\n", 343 | " predict = workspace.FetchBlob('softmax') # [0.01, 0.02, ...] in shape (N,361)\n", 344 | "\n", 345 | " for i in range(GAMES_BATCHES):\n", 346 | " if game_result[i]: # game end\n", 347 | " continue\n", 348 | " else: # game not end\n", 349 | " legal_moves = [ x*19+y for (x,y) in game_state[i].get_legal_moves(include_eyes=False)] # [59, 72, ...] in 1D\n", 350 | " if len(legal_moves) > 0: # at least 1 legal move\n", 351 | " probabilities = predict[i][legal_moves] # [0.02, 0.01, ...]\n", 352 | " # use numpy.random.choice to randomize the step,\n", 353 | " # otherwise use np.argmax to get best choice\n", 354 | " # current_choice = legal_moves[np.argmax(probabilities)]\n", 355 | " if np.sum(probabilities) > 0:\n", 356 | " current_choice = np.random.choice(legal_moves, 1, p=probabilities/np.sum(probabilities))[0]\n", 357 | " else:\n", 358 | " current_choice = np.random.choice(legal_moves, 1)[0]\n", 359 | " (x, y) = (current_choice/19, current_choice%19)\n", 360 | " history[i].append((current_color, current_choice, board[i]))\n", 361 | " game_state[i].do_move(action = (x, y), color = current_player) # End of Game?\n", 362 | " #print('game({}) step({}) {} move({},{})'.format(i, step, current_color, x, y))\n", 363 | " else:\n", 364 | " game_state[i].do_move(action = PASS, color = current_player)\n", 365 | " #print('game({}) step({}) {} PASS'.format(i, step, current_color))\n", 366 | " game_result[i] = game_state[i].is_end_of_game\n", 367 | "\n", 368 | " if np.all(game_result):\n", 369 | " break\n", 370 | " \n", 371 | " # Get the winner\n", 372 | " winner = [ GetWinner(game_state[i]) for i in range(GAMES_BATCHES) ] # B+, W+, T\n", 373 | " print('Tournament {} Finished with Primary({}) {}:{} Sparring({}) @{}'.\n", 374 | " format(tournament, PRIMARY_PLAYER, sum(np.char.count(winner, PRIMARY_PLAYER)),\n", 375 | " sum(np.char.count(winner, SPARRING_PLAYER)), SPARRING_PLAYER, datetime.now()))\n", 376 | " \n", 377 | " # Save the games(optional)\n", 378 | " for i in range(GAMES_BATCHES):\n", 379 | " filename = os.path.join(\n", 380 | " os.path.expanduser('~'), 'python', 'tutorial_files','selfplay',\n", 381 | " '({}_{}_{})vs({})_{}_{}_{}'.format(PRIMARY_CONV_LEVEL, PRIMARY_FILTERS, PRIMARY_PRE_TRAINED_ITERS+tournament,\n", 382 | " sparring_param_file, i, winner[i],\n", 383 | " datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S%Z\")))\n", 384 | " WriteBackSGF(winner, history[i], filename)\n", 385 | " \n", 386 | " # After each tournament, learn from the winner\n", 387 | " if BASE_LR != 0:\n", 388 | " LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES)\n", 389 | " \n", 390 | " # And learn from negative examples\n", 391 | " if NEGATIVE_BASE_LR != 0:\n", 392 | " LearnFromLosingGames(history, winner, mini_batch=TRAIN_BATCHES)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": { 399 | "collapsed": true 400 | }, 401 | "outputs": [], 402 | "source": [ 403 | "if TOURNAMENT_ITERS>0 :\n", 404 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n", 405 | " print('Results saved to {}'.format(PRIMARY_SAVE_FOLDER))\n", 406 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS)), pe_meta)\n", 407 | " print('Results saved to {}'.format(SPARR_LOAD_FOLDER))" 408 | ] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "Python 2", 414 | "language": "python", 415 | "name": "python2" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 2 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython2", 427 | "version": "2.7.12" 428 | }, 429 | "toc": { 430 | "colors": { 431 | "hover_highlight": "#DAA520", 432 | "navigate_num": "#000000", 433 | "navigate_text": "#333333", 434 | "running_highlight": "#FF0000", 435 | "selected_highlight": "#FFD700", 436 | "sidebar_border": "#EEEEEE", 437 | "wrapper_background": "#FFFFFF" 438 | }, 439 | "moveMenuLeft": true, 440 | "nav_menu": { 441 | "height": "315px", 442 | "width": "367px" 443 | }, 444 | "navigate_menu": true, 445 | "number_sections": true, 446 | "sideBar": true, 447 | "threshold": 4, 448 | "toc_cell": false, 449 | "toc_position": { 450 | "height": "544px", 451 | "left": "0px", 452 | "right": "1723px", 453 | "top": "107px", 454 | "width": "130px" 455 | }, 456 | "toc_section_display": "block", 457 | "toc_window_display": true, 458 | "widenNotebook": false 459 | } 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 1 463 | } 464 | -------------------------------------------------------------------------------- /Mock AlphaGo Zero (1) Preprocess Pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Preprocess\n", 8 | "Neural Network Architecture The input to the neural network is a 19 × 19 × 17 image stack\n", 9 | "comprising 17 binary feature planes. 8 feature planes $X_t$ consist of binary values indicating the\n", 10 | "presence of the current player’s stones ($X_t^i = 1$ if intersection $i$ contains a stone of the player’s\n", 11 | "colour at time-step $t$; $0$ if the intersection is empty, contains an opponent stone, or if $t < 0$). A\n", 12 | "further 8 feature planes, $Y_t$ , represent the corresponding features for the opponent’s stones. The\n", 13 | "final feature plane, $C$, represents the colour to play, and has a constant value of either 1 if black\n", 14 | "is to play or 0 if white is to play. These planes are concatenated together to give input features\n", 15 | "$s_t = [X_t , Y_t , X_{t−1} , Y_{t−1} , ..., X_{t−7} , Y_{t−7} , C]$." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from preprocessing import Preprocess\n", 25 | "from go import GameState, BLACK, WHITE, EMPTY\n", 26 | "import os, sgf\n", 27 | "import numpy as np\n", 28 | "\n", 29 | "# input SGF files\n", 30 | "FILE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','go')\n", 31 | "# output archive SGF files\n", 32 | "SUCCEED_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','succeed')\n", 33 | "FAIL_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','fail')\n", 34 | "# output database\n", 35 | "TRAIN_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'train_data')\n", 36 | "TEST_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'test_data')\n", 37 | "\n", 38 | "# Config this to indicate whether it's training or testing data\n", 39 | "DATA_FOLDER = TRAIN_DATA\n", 40 | "\n", 41 | "# BOARD_POSITION contains SGF symbol which represents each row (or column) of the board\n", 42 | "# It can be used to convert between 0,1,2,3... and a,b,c,d...\n", 43 | "# Symbol [tt] or [] represents PASS in SGF, therefore is omitted\n", 44 | "BOARD_POSITION = 'abcdefghijklmnopqrs'\n", 45 | "\n", 46 | "# Only 3 features are needed for AlphaGo Zero\n", 47 | "# 0 - Player Stone, 1 - Opponent Stone, 3 - Current Player Color\n", 48 | "DEFAULT_FEATURES = [\"board\", \"color\"]\n", 49 | "\n", 50 | "# reverse the index of player/opponent\n", 51 | "# 0,2,4,6... are player, 1,3,5,7... are opponent\n", 52 | "OPPONENT_INDEX = [1,0,3,2,5,4,7,6,9,8,11,10,13,12]" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Define DB output\n", 60 | " [LevelDB](http://leveldb.org/) is preferred database because it automatically use [Snappy](https://github.com/google/snappy) to compress the data." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "from caffe2.python import core, utils\n", 70 | "from caffe2.proto import caffe2_pb2\n", 71 | "\n", 72 | "def write_db(db_type, db_name, base_name, features, labels, rewards):\n", 73 | " db = core.C.create_db(db_type, db_name, core.C.Mode.write)\n", 74 | " transaction = db.new_transaction()\n", 75 | " for i in range(features.shape[0]):\n", 76 | " feature_and_label = caffe2_pb2.TensorProtos()\n", 77 | " feature_and_label.protos.extend([\n", 78 | " utils.NumpyArrayToCaffe2Tensor(features[i]),\n", 79 | " utils.NumpyArrayToCaffe2Tensor(labels[i]),\n", 80 | " utils.NumpyArrayToCaffe2Tensor(rewards[i])\n", 81 | " ])\n", 82 | " transaction.put(\n", 83 | " '{}_{:0=3}'.format(base_name,i),\n", 84 | " feature_and_label.SerializeToString())\n", 85 | " # Close the transaction, and then close the db.\n", 86 | " del transaction\n", 87 | " del db" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Parse SGF game file\n", 95 | " Parse the SGF file. SGF file use characters a to s to represents line 1 to 19. We convert SGF to Caffe2 Tensor. And write back database in batch of game." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "#%%capture output\n", 105 | "p = Preprocess(DEFAULT_FEATURES)\n", 106 | "for dirname, subDirList, fileList in os.walk(FILE_FOLDER):\n", 107 | " for filename in fileList:\n", 108 | " with open(os.path.join(dirname, filename)) as f:\n", 109 | " collection = sgf.parse(f.read())\n", 110 | " for game in collection:\n", 111 | " # Size of the Board should only be 19x19, Komi should be 7.5 according to Chinese rule\n", 112 | " if (game.nodes[0].properties['SZ'] == ['19']\n", 113 | "# and game.nodes[0].properties['RU'] == ['Chinese']\n", 114 | "# and game.nodes[0].properties['KM'] == ['7.50']\n", 115 | " ):\n", 116 | " try:\n", 117 | " state = GameState() # Initialize GameState\n", 118 | " features = np.empty(shape=(0,17,19,19), dtype=np.int8)\n", 119 | " feature_history = np.zeros(shape=(1,17,19,19), dtype=np.int8)\n", 120 | " labels = np.empty(shape=(0,), dtype=np.int32)\n", 121 | " rewards = np.empty(shape=(0,), dtype=np.float32)\n", 122 | " result = 'B' if game.nodes[0].properties['RE'][0:2] == ['B+'] else 'W'\n", 123 | " for node in game.nodes[1:]: # Except nodes[0] for game properties\n", 124 | " feature_current = p.state_to_tensor(state).astype(np.int8) # Player/Opponent/Empty/Color\n", 125 | " feature_history = np.concatenate((feature_current[0:1,0:2], # Xt, Yt\n", 126 | " feature_history[0:1,OPPONENT_INDEX],\n", 127 | " feature_current[0:1,3:4]), # Color\n", 128 | " axis=1)\n", 129 | " if 'B' in node.properties and len(node.properties['B'][0]) == 2: # Black move\n", 130 | " x = BOARD_POSITION.index(node.properties['B'][0][0])\n", 131 | " y = BOARD_POSITION.index(node.properties['B'][0][1])\n", 132 | " state.do_move(action=(x,y),color = BLACK)\n", 133 | " elif 'W' in node.properties and len(node.properties['W'][0]) == 2: # White move\n", 134 | " x = BOARD_POSITION.index(node.properties['W'][0][0])\n", 135 | " y = BOARD_POSITION.index(node.properties['W'][0][1])\n", 136 | " state.do_move(action=(x,y),color = WHITE)\n", 137 | " reward = np.asarray([1.0 if result in node.properties else -1.0], dtype=np.float32)\n", 138 | " features = np.append(features, feature_history, axis=0)\n", 139 | " labels = np.append(labels, np.asarray([x * 19 + y], dtype=np.int32), axis=0)\n", 140 | " rewards = np.append(rewards, reward, axis=0)\n", 141 | " write_db(\n", 142 | " db_type = 'leveldb',\n", 143 | " db_name = DATA_FOLDER, # replace this with TRAIN_DATA or TEST_DATA if you want to separate the dataset\n", 144 | " base_name = os.path.basename(filename),\n", 145 | " features = features,\n", 146 | " labels = labels,\n", 147 | " rewards = rewards\n", 148 | " )\n", 149 | " os.rename(f.name,os.path.join(SUCCEED_FOLDER,filename)) # move the file to SUCCEED_FOLDER, so Preprocess can resume after interrupted\n", 150 | " print('{} succeeded'.format(filename))\n", 151 | " except Exception as e:\n", 152 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n", 153 | " print('{} failed dues to {}'.format(filename, e))\n", 154 | " else:\n", 155 | " os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted\n", 156 | " print('{} unqualified dues to Size, Rule or Komi'.format(filename))" 157 | ] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "Python 2", 163 | "language": "python", 164 | "name": "python2" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 2 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython2", 176 | "version": "2.7.12" 177 | }, 178 | "toc": { 179 | "colors": { 180 | "hover_highlight": "#DAA520", 181 | "navigate_num": "#000000", 182 | "navigate_text": "#333333", 183 | "running_highlight": "#FF0000", 184 | "selected_highlight": "#FFD700", 185 | "sidebar_border": "#EEEEEE", 186 | "wrapper_background": "#FFFFFF" 187 | }, 188 | "moveMenuLeft": true, 189 | "nav_menu": { 190 | "height": "30px", 191 | "width": "252px" 192 | }, 193 | "navigate_menu": true, 194 | "number_sections": true, 195 | "sideBar": true, 196 | "threshold": 4, 197 | "toc_cell": false, 198 | "toc_section_display": "block", 199 | "toc_window_display": false, 200 | "widenNotebook": false 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 1 205 | } 206 | -------------------------------------------------------------------------------- /Mock AlphaGo Zero (2) Policy and Value Network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Mock AlphaGo Zero (2) - Policy and Value Network\n", 8 | "In this notebook, we will build the model of AlphaGo Zero's Policy and Value Network, which is based on ResNet.\n", 9 | "\n", 10 | "Supervised Learning For comparison, we also trained neural network parameters $\\theta_{SL}$ by super-\n", 11 | "vised learning. The neural network architecture was identical to AlphaGo Zero. Mini-batches of\n", 12 | "data $(s,\\pi,z)$ were sampled at random from the KGS data-set, setting $\\pi_a = 1$ for the human expert\n", 13 | "move a. Parameters were optimised by stochastic gradient descent with momentum and learning\n", 14 | "rate annealing, using the same loss as in Equation 1, but weighting the mean-squared error com-\n", 15 | "ponent by a factor of $0.01$. The learning rate was annealed according to the standard schedule\n", 16 | "in Extended Data Table 3. The momentum parameter was set to $0.9$, and the L2 regularisation\n", 17 | "parameter was set to $c = 10^{−4}$." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stderr", 27 | "output_type": "stream", 28 | "text": [ 29 | "WARNING:root:This caffe2 python run does not have GPU support. Will run in CPU only mode.\n", 30 | "WARNING:root:Debug message: No module named caffe2_pybind11_state_gpu\n" 31 | ] 32 | }, 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Running in CPU mode\n", 38 | "Training model from 0 to 10000 iterations\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "import os, numpy as np\n", 44 | "from caffe2.python import core, model_helper, workspace, brew, utils\n", 45 | "from caffe2.proto import caffe2_pb2\n", 46 | "\n", 47 | "%matplotlib inline\n", 48 | "from matplotlib import pyplot\n", 49 | "\n", 50 | "NUM_RES_BLOCKS = 2 # [19(alphago zero),39] How many Residual Blocks will be used in the model\n", 51 | "FILTERS = 128 # 128/192/256(alphago zero)/384 How many K will be used in the model\n", 52 | "BASE_LR = -0.1 # (-0.1,0) The base Learning Rate, alphago zero uses -0.1 and times 0.1 every 200K iters\n", 53 | "\n", 54 | "if workspace.has_gpu_support:\n", 55 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n", 56 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n", 57 | "else:\n", 58 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n", 59 | " print('Running in CPU mode')\n", 60 | "\n", 61 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n", 62 | "PRE_TRAINED_ITERS = 0 # [0, infinity) how many batches the model has been trained before\n", 63 | "SKIP_TRAINED_DATA = 0 # [0, infinity) if this is a resumed training, how many input data will be skipped\n", 64 | "TRAIN_ITERS = 10000 # [0, infinity) how many batches the model will be trained\n", 65 | "TEST_BATCHES = 100 # how many samples will be tested within one mini-batch\n", 66 | "TEST_ITERS = 100 # how many batches the model will be tested\n", 67 | "\n", 68 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','param') # folder stores the loss/accuracy log\n", 69 | "TRAIN_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','train_data') # db folder stores the preprocessed games\n", 70 | "TEST_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','test_data') # db folder stores the preprocessed test data\n", 71 | "\n", 72 | "# if this is a resumed training, where to load the init_param from\n", 73 | "LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(NUM_RES_BLOCKS,FILTERS,PRE_TRAINED_ITERS))\n", 74 | "\n", 75 | "# if the model will be saved for future resume training, where to store it\n", 76 | "SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(NUM_RES_BLOCKS,FILTERS,PRE_TRAINED_ITERS+TRAIN_ITERS))\n", 77 | "\n", 78 | "workspace.ResetWorkspace(ROOT_FOLDER)\n", 79 | "\n", 80 | "print('Training model from {} to {} iterations'.format(PRE_TRAINED_ITERS,PRE_TRAINED_ITERS+TRAIN_ITERS))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## AlphaGo Neural Network Architecture\n" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Data Input\n", 95 | ">The input to the neural network is a 19 × 19 × 17 image stack\n", 96 | "comprising 17 binary feature planes. 8 feature planes $X_t$ consist of binary values indicating the\n", 97 | "presence of the current player’s stones ($X_t^i = 1$ if intersection $i$ contains a stone of the player’s\n", 98 | "colour at time-step $t$; $0$ if the intersection is empty, contains an opponent stone, or if $t < 0$). A\n", 99 | "further 8 feature planes, $Y_t$ , represent the corresponding features for the opponent’s stones. The\n", 100 | "final feature plane, $C$, represents the colour to play, and has a constant value of either 1 if black\n", 101 | "is to play or 0 if white is to play. These planes are concatenated together to give input features\n", 102 | "$s_t = [X_t , Y_t , X_{t−1} , Y_{t−1} , ..., X_{t−7} , Y_{t−7} , C]$." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 2, 108 | "metadata": { 109 | "collapsed": true 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "from modelingZero import AddInput, AddOneHot" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "### DCNN\n", 121 | ">The input features $S_t$ are processed by a residual tower that consists of a single convolutional block followed by either 19 or 39 residual blocks 4. The convolutional block applies the following modules:\n", 122 | "1. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n", 123 | "2. Batch normalisation\n", 124 | "3. A rectifier non-linearity\n", 125 | "\n", 126 | ">Each residual block applies the following modules sequentially to its input:\n", 127 | "1. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n", 128 | "2. Batch normalisation\n", 129 | "3. A rectifier non-linearity\n", 130 | "4. A convolution of 256 filters of kernel size 3 x 3 with stride 1\n", 131 | "5. Batch normalisation\n", 132 | "6. A skip connection that adds the input to the block\n", 133 | "7. A rectifier non-linearity\n", 134 | "\n", 135 | ">The output of the residual tower is passed into two separate “heads” for computing the policy and value respectively. \n", 136 | ">The policy head applies the following modules:\n", 137 | "1. A convolution of 2 filters of kernel size 1 x 1 with stride 1\n", 138 | "2. Batch normalisation\n", 139 | "3. A rectifier non-linearity\n", 140 | "4. A fully connected linear layer that outputs a vector of size 192 + 1 = 362 corresponding to logit probabilities for all intersections and the pass move\n", 141 | "\n", 142 | ">The value head applies the following modules:\n", 143 | "1. A convolution of 1 filter of kernel size 1 x 1 with stride 1\n", 144 | "2. Batch normalisation\n", 145 | "3. A rectifier non-linearity\n", 146 | "4. A fully connected linear layer to a hidden layer of size 256\n", 147 | "5. A rectifier non-linearity\n", 148 | "6. A fully connected linear layer to a scalar\n", 149 | "7. A tanh non-linearity outputting a scalar in the range `[-1; 1]`" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 3, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "from modelingZero import AddResNetModel, AddSoftmax" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### Accuracy\n", 168 | "Please note predict is 4 dimensional tensor in shape of N x 1 x 19 x 19, and label is 2 dimensional tensor in shape of N x 1." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 4, 174 | "metadata": { 175 | "collapsed": true 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "from modelingZero import AddAccuracy" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "### Training Operator (Backward Propagation)\n", 187 | "\n", 188 | "A game terminates at step $T$ when both players pass, when the search value drops below a \n", 189 | "resignation threshold (`10%`), or when the game exceeds a maximum length (`722 steps`); \n", 190 | "the game is then scored to give a final reward of $r_T\\in\\{-1,+1\\}$ (see\n", 191 | "Methods for details). The data for each time-step $t$ is stored as $(s_t, \\pi_t, z_t)$ \n", 192 | "where $z_t = \\pm r_T$ is the game winner from the perspective of the current player at step $t$.\n", 193 | "In parallel (Figure 1b), new network parameters $\\theta_i$ are trained from data $(s,\\pi,z)$\n", 194 | "sampled uniformly among all time-steps of the last iteration(s) of self-play. The neural \n", 195 | "network $(p,v) = f_{\\theta _i}(s)$ is adjusted to minimise the error between the predicted \n", 196 | "value $v$ and the self-play winner $z$, and to maximise the similarity of the neural network \n", 197 | "move probabilities $p$ to the search probabilities $\\pi$. Specifically, the parameters $\\theta$\n", 198 | "are adjusted by gradient descent on a loss function $l$ that sums over mean-squared error and\n", 199 | "cross-entropy losses respectively,\n", 200 | "\n", 201 | ">$(p,v) = f_\\theta(s), l = (z - v)^2 \\pi^T \\log p + c|| \\theta ||^2$" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 5, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "from modelingZero import AddTrainingOperators" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## Build the actual network" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 6, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "arg_scope = {\"order\": \"NCHW\"}" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Train Net" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 7, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "True" 249 | ] 250 | }, 251 | "execution_count": 7, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "# Skip model only has DBInput to waste the input\n", 258 | "skip_model = model_helper.ModelHelper(name=\"skip_model\", arg_scope=arg_scope, init_params=True)\n", 259 | "_d, _l, _r = AddInput(\n", 260 | " skip_model, batch_size=TRAIN_BATCHES,\n", 261 | " db=TRAIN_DATA,\n", 262 | " db_type='leveldb')\n", 263 | "# Initialize params and create network\n", 264 | "workspace.RunNetOnce(skip_model.param_init_net)\n", 265 | "workspace.CreateNet(skip_model.net, overwrite=True)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 8, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "name": "stderr", 275 | "output_type": "stream", 276 | "text": [ 277 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: OneHot.\n", 278 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 279 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 280 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 281 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 282 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 283 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: OneHot.\n" 284 | ] 285 | }, 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "True" 290 | ] 291 | }, 292 | "execution_count": 8, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "# Train Net: DBInput ==> Predict Net ==> Loss ==> Backward Propergation\n", 299 | "with core.DeviceScope(device_opts):\n", 300 | " train_model = model_helper.ModelHelper(name=\"policy_train\", arg_scope=arg_scope, init_params=True)\n", 301 | " data, label, reward = AddInput(\n", 302 | " train_model, batch_size=TRAIN_BATCHES,\n", 303 | " db=TRAIN_DATA,\n", 304 | " db_type='leveldb')\n", 305 | " onehot = AddOneHot(train_model, label)\n", 306 | " predict, value = AddResNetModel(train_model, data, num_blocks=NUM_RES_BLOCKS, filters=FILTERS, is_test=False)\n", 307 | " softmax = AddSoftmax(train_model, predict)\n", 308 | " AddAccuracy(train_model, softmax, label)\n", 309 | " AddTrainingOperators(train_model, predict, onehot, value, reward, \n", 310 | " base_lr=BASE_LR, policy='fixed') #policy='step', stepsize=200000, gamma=0.1)\n", 311 | "# Initialize params and create network\n", 312 | "workspace.RunNetOnce(train_model.param_init_net)\n", 313 | "workspace.CreateNet(train_model.net, overwrite=True)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "### Test Net" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 9, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stderr", 330 | "output_type": "stream", 331 | "text": [ 332 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 333 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 334 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 335 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 336 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n" 337 | ] 338 | }, 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "True" 343 | ] 344 | }, 345 | "execution_count": 9, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "# Test Net: DBInput ==> Predict Net ==> Accuracy\n", 352 | "with core.DeviceScope(device_opts):\n", 353 | " test_model = model_helper.ModelHelper(name=\"policy_test\", arg_scope=arg_scope, init_params=False)\n", 354 | " data, label, reward = AddInput(\n", 355 | " test_model, batch_size=TEST_BATCHES,\n", 356 | " db=TEST_DATA,\n", 357 | " db_type='leveldb')\n", 358 | " predict, value = AddResNetModel(test_model, data, num_blocks=NUM_RES_BLOCKS, filters=FILTERS)\n", 359 | " softmax = AddSoftmax(test_model, predict)\n", 360 | " AddAccuracy(test_model, softmax, label)\n", 361 | "# Initialize params and create network\n", 362 | "workspace.RunNetOnce(test_model.param_init_net)\n", 363 | "workspace.CreateNet(test_model.net, overwrite=True)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "### Deploy Net" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 10, 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "name": "stderr", 380 | "output_type": "stream", 381 | "text": [ 382 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 383 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 384 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 385 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 386 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n" 387 | ] 388 | }, 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "True" 393 | ] 394 | }, 395 | "execution_count": 10, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "# Train Net: Blob('data') ==> Predict Net ==> Blob('predict')\n", 402 | "with core.DeviceScope(device_opts):\n", 403 | " deploy_model = model_helper.ModelHelper(name=\"policy_deploy\", arg_scope=arg_scope, init_params=False)\n", 404 | " predict, value = AddResNetModel(deploy_model, 'data', num_blocks=NUM_RES_BLOCKS, filters=FILTERS)\n", 405 | " AddSoftmax(deploy_model, predict)\n", 406 | "# Initialize params and create network\n", 407 | "workspace.RunNetOnce(deploy_model.param_init_net)\n", 408 | "workspace.CreateNet(deploy_model.net, overwrite=True)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "## Run the training and testing\n", 416 | "### resume from last training\n", 417 | " Training a dCNN takes quite a long time. To pause-and-resume the training, set the PRE_TRAINED_ITERS so the program will start from where last time it was." 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "#import caffe2.python.predictor.predictor_exporter as pe\n", 427 | "from modelingZero import LoadParams, SaveParams\n", 428 | "# construct the model to be exported\n", 429 | "#pe_meta = pe.PredictorExportMeta(\n", 430 | "# predict_net=deploy_model.net.Proto(),\n", 431 | "# parameters=[str(b) for b in deploy_model.params], \n", 432 | "# inputs=[\"data\"],\n", 433 | "# outputs=[\"softmax\", \"value\"],\n", 434 | "#)\n", 435 | "\n", 436 | "if PRE_TRAINED_ITERS > 0:\n", 437 | " # load the predict net\n", 438 | " with core.DeviceScope(device_opts):\n", 439 | " #deploy_model.net = pe.prepare_prediction_net(os.path.join(LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")\n", 440 | " LoadParams(os.path.join(LOAD_FOLDER, \"policy_model.pb\"))\n", 441 | " print('Params loaded from {}'.format(LOAD_FOLDER))" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "### Train the model" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": { 455 | "scrolled": true 456 | }, 457 | "outputs": [], 458 | "source": [ 459 | "#%%capture output # Jupyter magic command to capture the output\n", 460 | "\n", 461 | "# set the number of iterations and track the accuracy & loss\n", 462 | "accuracy = np.zeros(TRAIN_ITERS)\n", 463 | "loss = np.zeros(TRAIN_ITERS)\n", 464 | "loss1 = np.zeros(TRAIN_ITERS)\n", 465 | "loss2 = np.zeros(TRAIN_ITERS)\n", 466 | "\n", 467 | "if TRAIN_ITERS > 0:\n", 468 | " # skip the data which should not be trained again\n", 469 | " for i in range(SKIP_TRAINED_DATA):\n", 470 | " workspace.RunNet(skip_model.net)\n", 471 | " \n", 472 | " # Now, run the network \n", 473 | " for i in range(0, TRAIN_ITERS):\n", 474 | " workspace.RunNet(train_model.net)\n", 475 | " accuracy[i] = workspace.FetchBlob('accuracy')\n", 476 | " loss[i] = workspace.FetchBlob('loss')\n", 477 | " loss1[i] = workspace.FetchBlob('xent')\n", 478 | " loss2[i] = workspace.FetchBlob('msqrl2')\n", 479 | " # checkpoint every 10000 iterations\n", 480 | " if i > 0 and i % 10000 == 0:\n", 481 | " if not os.path.exists(SAVE_FOLDER):\n", 482 | " os.makedirs(SAVE_FOLDER)\n", 483 | " #pe.save_to_db(\"minidb\", os.path.join(SAVE_FOLDER, \"policy_model_checkpoint_{}.minidb\".format(PRE_TRAINED_ITERS+i)), pe_meta)\n", 484 | " SaveParams(deploy_model, os.path.join(SAVE_FOLDER, \"policy_model_checkpoint_{}.pb\".format(PRE_TRAINED_ITERS+i)))\n", 485 | " print('Checkpoint {} saved to {}'.format(PRE_TRAINED_ITERS+i,SAVE_FOLDER))" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "if TRAIN_ITERS > 0:\n", 495 | " # After the execution is done, plot the values.\n", 496 | " pyplot.plot(loss, 'b')\n", 497 | " pyplot.plot(loss1, 'darkgreen')\n", 498 | " pyplot.plot(loss2, 'lightgreen')\n", 499 | " pyplot.plot(accuracy, 'r')\n", 500 | " pyplot.legend(('Loss', 'Accuracy'), loc='upper right')" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | "### Test the model" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "test_accuracy = np.zeros(TEST_ITERS)\n", 517 | "for i in range(TEST_ITERS):\n", 518 | " workspace.RunNet(test_model.net)\n", 519 | " test_accuracy[i] = workspace.FetchBlob('accuracy')\n", 520 | "# After the execution is done, let's plot the values.\n", 521 | "pyplot.plot(test_accuracy, 'r')\n", 522 | "pyplot.title('Acuracy over test batches.')\n", 523 | "print('test_accuracy: %f' % test_accuracy.mean())" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "### Save the work for future use" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "if TRAIN_ITERS > 0:\n", 540 | " if not os.path.exists(SAVE_FOLDER):\n", 541 | " os.makedirs(SAVE_FOLDER)\n", 542 | " # save the model to a file. Use minidb as the file format\n", 543 | " #pe.save_to_db(\"minidb\", os.path.join(SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n", 544 | " SaveParams(deploy_model, os.path.join(SAVE_FOLDER, \"policy_model.pb\"))\n", 545 | " print('Params saved to {}'.format(SAVE_FOLDER))" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": { 552 | "collapsed": true 553 | }, 554 | "outputs": [], 555 | "source": [] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": { 561 | "collapsed": true 562 | }, 563 | "outputs": [], 564 | "source": [] 565 | } 566 | ], 567 | "metadata": { 568 | "kernelspec": { 569 | "display_name": "Python 2", 570 | "language": "python", 571 | "name": "python2" 572 | }, 573 | "language_info": { 574 | "codemirror_mode": { 575 | "name": "ipython", 576 | "version": 2 577 | }, 578 | "file_extension": ".py", 579 | "mimetype": "text/x-python", 580 | "name": "python", 581 | "nbconvert_exporter": "python", 582 | "pygments_lexer": "ipython2", 583 | "version": "2.7.12" 584 | }, 585 | "toc": { 586 | "colors": { 587 | "hover_highlight": "#DAA520", 588 | "navigate_num": "#000000", 589 | "navigate_text": "#333333", 590 | "running_highlight": "#FF0000", 591 | "selected_highlight": "#FFD700", 592 | "sidebar_border": "#EEEEEE", 593 | "wrapper_background": "#FFFFFF" 594 | }, 595 | "moveMenuLeft": true, 596 | "nav_menu": { 597 | "height": "30px", 598 | "width": "252px" 599 | }, 600 | "navigate_menu": true, 601 | "number_sections": true, 602 | "sideBar": true, 603 | "threshold": 4, 604 | "toc_cell": false, 605 | "toc_position": { 606 | "height": "856px", 607 | "left": "0px", 608 | "right": "20px", 609 | "top": "107px", 610 | "width": "179px" 611 | }, 612 | "toc_section_display": "block", 613 | "toc_window_display": true, 614 | "widenNotebook": false 615 | } 616 | }, 617 | "nbformat": 4, 618 | "nbformat_minor": 1 619 | } 620 | -------------------------------------------------------------------------------- /Mock AlphaGo Zero (3B) Reinforced Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Mock AlphaGo Zero (3B) Reinforced Learning\n", 8 | "In this notebook, we will train the policy network by letting them compete each other according to DeepMind:" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stderr", 18 | "output_type": "stream", 19 | "text": [ 20 | "WARNING:root:This caffe2 python run does not have GPU support. Will run in CPU only mode.\n", 21 | "WARNING:root:Debug message: No module named caffe2_pybind11_state_gpu\n" 22 | ] 23 | }, 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Running in CPU mode\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import os, numpy as np\n", 34 | "from caffe2.python import core, model_helper, workspace, brew, utils\n", 35 | "from caffe2.proto import caffe2_pb2\n", 36 | "from sgfutil import BOARD_POSITION\n", 37 | "\n", 38 | "%matplotlib inline\n", 39 | "from matplotlib import pyplot\n", 40 | "\n", 41 | "# how many games will be run in one minibatch\n", 42 | "GAMES_BATCHES = 16 # [1,infinity) depends on your hardware\n", 43 | "SEARCH_WIDE = 1600 # [1, infinity) for each step, run MCTS to obtain better distribution\n", 44 | "# how many iterations for this tournament\n", 45 | "TOURNAMENT_ITERS = 1 # [1,infinity)\n", 46 | "\n", 47 | "if workspace.has_gpu_support:\n", 48 | " device_opts = core.DeviceOption(caffe2_pb2.CUDA, workspace.GetDefaultGPUID())\n", 49 | " print('Running in GPU mode on default device {}'.format(workspace.GetDefaultGPUID()))\n", 50 | "else :\n", 51 | " device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)\n", 52 | " print('Running in CPU mode')\n", 53 | "\n", 54 | "arg_scope = {\"order\": \"NCHW\"}\n", 55 | "\n", 56 | "ROOT_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data','zero','param') # folder stores the loss/accuracy log" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "# Only 3 features are needed for AlphaGo Zero\n", 68 | "# 0 - Player Stone, 1 - Opponent Stone, 3 - Current Player Color\n", 69 | "DEFAULT_FEATURES = [\"board\", \"color\"]\n", 70 | "\n", 71 | "# reverse the index of player/opponent\n", 72 | "# 0,2,4,6... are player, 1,3,5,7... are opponent\n", 73 | "OPPONENT_INDEX = [1,0,3,2,5,4,7,6,9,8,11,10,13,12]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "We need to differentiate primary player and sparring partner. Primary player will learn from the game result" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Training model from 0 to 1 iterations\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "### Config for primary player\n", 98 | "PRIMARY_WORKSPACE = os.path.join(ROOT_FOLDER, 'primary')\n", 99 | "PRIMARY_RES_BLOCKS = 1 # [1,19(AlphaGo Zero),39]\n", 100 | "PRIMARY_FILTERS = 128 # [128, 192, 256(AlphaGo Zero), 384]\n", 101 | "PRIMARY_PRE_TRAINED_ITERS = 0\n", 102 | "# before traning, where to load the params\n", 103 | "PRIMARY_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"RL-res={}-k={}-iter={}\"\n", 104 | " .format(PRIMARY_RES_BLOCKS,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS))\n", 105 | "BASE_LR = -0.01 # (-0.01,0) The base Learning Rate; 0 to disable it.\n", 106 | "TRAIN_BATCHES = 16 # how many samples will be trained within one mini-batch, depends on your hardware\n", 107 | "# after training, where to store the params\n", 108 | "PRIMARY_SAVE_FOLDER = os.path.join(ROOT_FOLDER, \"RL-res={}-k={}-iter={}\"\n", 109 | " .format(PRIMARY_RES_BLOCKS,PRIMARY_FILTERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))\n", 110 | "if not os.path.exists(PRIMARY_SAVE_FOLDER):\n", 111 | " os.makedirs(PRIMARY_SAVE_FOLDER)\n", 112 | "\n", 113 | "### Config for sparring partner\n", 114 | "SPARR_WORKSPACE = os.path.join(ROOT_FOLDER, 'sparring')\n", 115 | "SPARR_LOAD_FOLDER = os.path.join(ROOT_FOLDER, \"res={}-k={}-iter={}\".format(1,128,1))\n", 116 | "\n", 117 | "print('Training model from {} to {} iterations'.format(PRIMARY_PRE_TRAINED_ITERS,PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## AlphaGo Neural Network Architecture" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 4, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "from modelingZero import AddResNetModel, AddSoftmax, AddTrainingOperators" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## Build the actual network" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 5, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "import caffe2.python.predictor.predictor_exporter as pe\n", 154 | "\n", 155 | "data = np.empty(shape=(TRAIN_BATCHES,17,19,19), dtype=np.float32)\n", 156 | "expect = np.empty(shape=(TRAIN_BATCHES,362), dtype=np.float32) # expected distribution of probability\n", 157 | "reward = np.empty(shape=(TRAIN_BATCHES,), dtype=np.float32) # scalar values between [-1,1]" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### Primary player\n", 165 | ">Train Net: Blob('data','label') ==> Predict Net ==> Loss ==> Backward Propergation" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 6, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stderr", 175 | "output_type": "stream", 176 | "text": [ 177 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 178 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n", 179 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 180 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n", 181 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: PadImage.\n", 182 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n", 183 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n", 184 | "WARNING:root:You are creating an op that the ModelHelper does not recognize: Normalize.\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "workspace.SwitchWorkspace(PRIMARY_WORKSPACE, True)\n", 190 | "\n", 191 | "with core.DeviceScope(device_opts):\n", 192 | " workspace.FeedBlob(\"data\", data)\n", 193 | " workspace.FeedBlob('expect', expect)\n", 194 | " workspace.FeedBlob('reward', reward)\n", 195 | " # for learning from winner\n", 196 | " primary_train_model = model_helper.ModelHelper(name=\"primary_train_model\", arg_scope=arg_scope, init_params=True)\n", 197 | " predict, value = AddResNetModel(primary_train_model, 'data', num_blocks=PRIMARY_RES_BLOCKS, filters=PRIMARY_FILTERS)\n", 198 | " AddTrainingOperators(primary_train_model, predict, None, 'expect', value, 'reward', base_lr=BASE_LR)\n", 199 | " workspace.RunNetOnce(primary_train_model.param_init_net)\n", 200 | " workspace.CreateNet(primary_train_model.net, overwrite=True)\n", 201 | " # \n", 202 | " primary_predict_net = pe.prepare_prediction_net(os.path.join(PRIMARY_LOAD_FOLDER, \"policy_model.minidb\"), \"minidb\")" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "Function `LearnFromWinner` takes the result of tournament and train primary player with the result." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 7, 215 | "metadata": { 216 | "collapsed": true 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "def LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES):\n", 221 | " data = np.empty(shape=(mini_batch,17,19,19), dtype=np.float32)\n", 222 | " label = np.empty(shape=(mini_batch,), dtype=np.int32)\n", 223 | " #iter = 0\n", 224 | " k = 0\n", 225 | " for i in range(len(winner)):\n", 226 | " #print('Learning {} steps in {} of {} games'.format(iter * TRAIN_BATCHES, i, GAMES_BATCHES))\n", 227 | " for step in history[i]:\n", 228 | " if (step[0] == 'B' and winner[i] == 'B+') or (step[0] == 'W' and winner[i] == 'W+'):\n", 229 | " data[k] = step[2]\n", 230 | " label[k] = step[1]\n", 231 | " k += 1\n", 232 | " #iter += 1\n", 233 | " if k == mini_batch:\n", 234 | " k = 0\n", 235 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n", 236 | " with core.DeviceScope(device_opts):\n", 237 | " workspace.FeedBlob(\"data\", data)\n", 238 | " workspace.FeedBlob(\"label\", label)\n", 239 | " workspace.RunNet(primary_train_model.net)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Sparring partner\n", 247 | " Load on the fly" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "## Run the tournament and training\n", 255 | ">We use a reward function $r(s)$ that is zero for all non-terminal time-steps $t < T$.\n", 256 | "The outcome $z_t = \\pm r(s_T)$ is the terminal reward at the end of the game from the perspective of the\n", 257 | "current player at time-step $t$: $+1$ for winning and $-1$ for losing. Weights are then updated at each\n", 258 | "time-step $t$ by stochastic gradient ascent in the direction that maximizes expected outcome." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 8, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "from go import GameState, BLACK, WHITE, EMPTY, PASS\n", 270 | "from preprocessing import Preprocess\n", 271 | "from datetime import datetime\n", 272 | "from sgfutil import GetWinner, WriteBackSGF\n", 273 | "import sgf\n", 274 | "\n", 275 | "np.random.seed(datetime.now().microsecond)\n", 276 | "\n", 277 | "# construct the model to be exported\n", 278 | "pe_meta = pe.PredictorExportMeta(\n", 279 | " predict_net=primary_predict_net.Proto(),\n", 280 | " parameters=[str(b) for b in primary_train_model.params],\n", 281 | " inputs=[\"data\"],\n", 282 | " outputs=[\"softmax\"],\n", 283 | ")" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 9, 289 | "metadata": { 290 | "scrolled": true 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "Tournament 0 Primary(B) vs Sparring(W|policy_model.minidb) started @2017-11-01 18:30:04.795610\n", 298 | "Traceback for operator 1 in network policy_deploy_1\n" 299 | ] 300 | }, 301 | { 302 | "ename": "RuntimeError", 303 | "evalue": "[enforce fail at conv_op_impl.h:46] C == filter.dim32(1) * group_. Convolution op: input channels does not match: # of input channels 4 is not equal to kernel channels * group:17*1 Error from operator: \ninput: \"pad1\" input: \"conv1_w\" input: \"conv1_b\" output: \"conv1\" name: \"\" type: \"Conv\" arg { name: \"kernel\" i: 3 } arg { name: \"exhaustive_search\" i: 0 } arg { name: \"order\" s: \"NCHW\" } device_option { device_type: 0 cuda_gpu_id: 0 } engine: \"CUDNN\"", 304 | "output_type": "error", 305 | "traceback": [ 306 | "\u001b[0;31m\u001b[0m", 307 | "\u001b[0;31mRuntimeError\u001b[0mTraceback (most recent call last)", 308 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDeviceScope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice_opts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0mworkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFeedBlob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mboard\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 50\u001b[0;31m \u001b[0mworkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRunNet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprimary_predict_net\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 51\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# sparring partner make move\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 309 | "\u001b[0;32m/usr/local/caffe2/python/workspace.py\u001b[0m in \u001b[0;36mRunNet\u001b[0;34m(name, num_iter, allow_fail)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWorkspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurrent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_last_failed_op_net_position\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0mGetNetName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m \u001b[0mStringifyNetName\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_iter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallow_fail\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 225\u001b[0m )\n\u001b[1;32m 226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 310 | "\u001b[0;32m/usr/local/caffe2/python/workspace.py\u001b[0m in \u001b[0;36mCallWithExceptionIntercept\u001b[0;34m(func, op_id_fetcher, net_name, *args, **kwargs)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mCallWithExceptionIntercept\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_id_fetcher\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnet_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0mop_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mop_id_fetcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 311 | "\u001b[0;31mRuntimeError\u001b[0m: [enforce fail at conv_op_impl.h:46] C == filter.dim32(1) * group_. Convolution op: input channels does not match: # of input channels 4 is not equal to kernel channels * group:17*1 Error from operator: \ninput: \"pad1\" input: \"conv1_w\" input: \"conv1_b\" output: \"conv1\" name: \"\" type: \"Conv\" arg { name: \"kernel\" i: 3 } arg { name: \"exhaustive_search\" i: 0 } arg { name: \"order\" s: \"NCHW\" } device_option { device_type: 0 cuda_gpu_id: 0 } engine: \"CUDNN\"" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "for tournament in range(PRIMARY_PRE_TRAINED_ITERS, PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS):\n", 317 | " # Every 500 tournament, copy current player to opponent. i.e. checkpoint\n", 318 | " if tournament > 0 and tournament % 20 == 0:\n", 319 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n", 320 | " print('Checkpoint saved to {}'.format(PRIMARY_SAVE_FOLDER))\n", 321 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+tournament)), pe_meta)\n", 322 | " print('Checkpoint saved to {}'.format(SPARR_LOAD_FOLDER))\n", 323 | " \n", 324 | " # Randomly change color of player\n", 325 | " PRIMARY_PLAYER = np.random.choice(['B','W'])\n", 326 | " if PRIMARY_PLAYER == 'B':\n", 327 | " SPARRING_PLAYER = 'W'\n", 328 | " else:\n", 329 | " SPARRING_PLAYER = 'B'\n", 330 | " \n", 331 | " # Randomly pickup sparring partner\n", 332 | " workspace.SwitchWorkspace(SPARR_WORKSPACE, True)\n", 333 | " sparring_param_file = np.random.choice(os.listdir(SPARR_LOAD_FOLDER))\n", 334 | " with core.DeviceScope(device_opts):\n", 335 | " sparring_predict_net = pe.prepare_prediction_net(os.path.join(SPARR_LOAD_FOLDER, sparring_param_file), \"minidb\")\n", 336 | " print('Tournament {} Primary({}) vs Sparring({}|{}) started @{}'\n", 337 | " .format(tournament, PRIMARY_PLAYER, SPARRING_PLAYER, sparring_param_file, datetime.now()))\n", 338 | "\n", 339 | " \n", 340 | " # Initialize game board and game state\n", 341 | " game_state = [ GameState() for i in range(GAMES_BATCHES) ]\n", 342 | " game_result = [0] * GAMES_BATCHES # 0 - Not Ended; BLACK - Black Wins; WHITE - White Wins\n", 343 | " p = Preprocess(DEFAULT_FEATURES) # Singleton\n", 344 | " history = [ [] for i in range(GAMES_BATCHES) ] # history[n][step] stores tuple of (player, x, y, board[n])\n", 345 | " board = None # The preprocessed board with shape Nx17x19x19\n", 346 | " \n", 347 | " # for each step in all games\n", 348 | " for step in range(0,722):\n", 349 | " \n", 350 | " # Preprocess the board\n", 351 | " board = np.concatenate([p.state_to_tensor(game_state[i]).astype(np.float32) for i in range(GAMES_BATCHES)])\n", 352 | "\n", 353 | " if step % 2 == 0:\n", 354 | " current_player = BLACK\n", 355 | " current_color = 'B'\n", 356 | " else:\n", 357 | " current_player = WHITE\n", 358 | " current_color = 'W'\n", 359 | "\n", 360 | " if step % 2 == (PRIMARY_PLAYER == 'W'): # if step %2 == 0 and Primary is Black, or vice versa.\n", 361 | " # primary player make move\n", 362 | " workspace.SwitchWorkspace(PRIMARY_WORKSPACE)\n", 363 | " with core.DeviceScope(device_opts):\n", 364 | " workspace.FeedBlob('data', board)\n", 365 | " workspace.RunNet(primary_predict_net)\n", 366 | " else:\n", 367 | " # sparring partner make move\n", 368 | " workspace.SwitchWorkspace(SPARR_WORKSPACE)\n", 369 | " with core.DeviceScope(device_opts):\n", 370 | " workspace.FeedBlob('data', board)\n", 371 | " workspace.RunNet(sparring_predict_net)\n", 372 | "\n", 373 | " predict = workspace.FetchBlob('softmax') # [0.01, 0.02, ...] in shape (N,361)\n", 374 | "\n", 375 | " for i in range(GAMES_BATCHES):\n", 376 | " if game_result[i]: # game end\n", 377 | " continue\n", 378 | " else: # game not end\n", 379 | " legal_moves = [ x*19+y for (x,y) in game_state[i].get_legal_moves(include_eyes=False)] # [59, 72, ...] in 1D\n", 380 | " if len(legal_moves) > 0: # at least 1 legal move\n", 381 | " probabilities = predict[i][legal_moves] # [0.02, 0.01, ...]\n", 382 | " # use numpy.random.choice to randomize the step,\n", 383 | " # otherwise use np.argmax to get best choice\n", 384 | " # current_choice = legal_moves[np.argmax(probabilities)]\n", 385 | " if np.sum(probabilities) > 0:\n", 386 | " current_choice = np.random.choice(legal_moves, 1, p=probabilities/np.sum(probabilities))[0]\n", 387 | " else:\n", 388 | " current_choice = np.random.choice(legal_moves, 1)[0]\n", 389 | " (x, y) = (current_choice/19, current_choice%19)\n", 390 | " history[i].append((current_color, current_choice, board[i]))\n", 391 | " game_state[i].do_move(action = (x, y), color = current_player) # End of Game?\n", 392 | " #print('game({}) step({}) {} move({},{})'.format(i, step, current_color, x, y))\n", 393 | " else:\n", 394 | " game_state[i].do_move(action = PASS, color = current_player)\n", 395 | " #print('game({}) step({}) {} PASS'.format(i, step, current_color))\n", 396 | " game_result[i] = game_state[i].is_end_of_game\n", 397 | "\n", 398 | " if np.all(game_result):\n", 399 | " break\n", 400 | " \n", 401 | " # Get the winner\n", 402 | " winner = [ GetWinner(game_state[i]) for i in range(GAMES_BATCHES) ] # B+, W+, T\n", 403 | " print('Tournament {} Finished with Primary({}) {}:{} Sparring({}) @{}'.\n", 404 | " format(tournament, PRIMARY_PLAYER, sum(np.char.count(winner, PRIMARY_PLAYER)),\n", 405 | " sum(np.char.count(winner, SPARRING_PLAYER)), SPARRING_PLAYER, datetime.now()))\n", 406 | " \n", 407 | " # Save the games(optional)\n", 408 | " for i in range(GAMES_BATCHES):\n", 409 | " filename = os.path.join(\n", 410 | " os.path.expanduser('~'), 'python', 'tutorial_files','selfplay',\n", 411 | " '({}_{}_{})vs({})_{}_{}_{}'.format(PRIMARY_CONV_LEVEL, PRIMARY_FILTERS, PRIMARY_PRE_TRAINED_ITERS+tournament,\n", 412 | " sparring_param_file, i, winner[i],\n", 413 | " datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S%Z\")))\n", 414 | " WriteBackSGF(winner, history[i], filename)\n", 415 | " \n", 416 | " # After each tournament, learn from the winner\n", 417 | " if BASE_LR != 0:\n", 418 | " LearnFromWinningGames(history, winner, mini_batch=TRAIN_BATCHES)" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": { 425 | "collapsed": true 426 | }, 427 | "outputs": [], 428 | "source": [ 429 | "if TOURNAMENT_ITERS>0 :\n", 430 | " pe.save_to_db(\"minidb\", os.path.join(PRIMARY_SAVE_FOLDER, \"policy_model.minidb\"), pe_meta)\n", 431 | " print('Results saved to {}'.format(PRIMARY_SAVE_FOLDER))\n", 432 | " pe.save_to_db(\"minidb\", os.path.join(SPARR_LOAD_FOLDER, \"policy_model_RL_{}.minidb\".format(PRIMARY_PRE_TRAINED_ITERS+TOURNAMENT_ITERS)), pe_meta)\n", 433 | " print('Results saved to {}'.format(SPARR_LOAD_FOLDER))" 434 | ] 435 | } 436 | ], 437 | "metadata": { 438 | "kernelspec": { 439 | "display_name": "Python 2", 440 | "language": "python", 441 | "name": "python2" 442 | }, 443 | "language_info": { 444 | "codemirror_mode": { 445 | "name": "ipython", 446 | "version": 2 447 | }, 448 | "file_extension": ".py", 449 | "mimetype": "text/x-python", 450 | "name": "python", 451 | "nbconvert_exporter": "python", 452 | "pygments_lexer": "ipython2", 453 | "version": "2.7.12" 454 | }, 455 | "toc": { 456 | "colors": { 457 | "hover_highlight": "#DAA520", 458 | "navigate_num": "#000000", 459 | "navigate_text": "#333333", 460 | "running_highlight": "#FF0000", 461 | "selected_highlight": "#FFD700", 462 | "sidebar_border": "#EEEEEE", 463 | "wrapper_background": "#FFFFFF" 464 | }, 465 | "moveMenuLeft": true, 466 | "nav_menu": { 467 | "height": "315px", 468 | "width": "367px" 469 | }, 470 | "navigate_menu": true, 471 | "number_sections": true, 472 | "sideBar": true, 473 | "threshold": 4, 474 | "toc_cell": false, 475 | "toc_position": { 476 | "height": "544px", 477 | "left": "0px", 478 | "right": "1723px", 479 | "top": "107px", 480 | "width": "130px" 481 | }, 482 | "toc_section_display": "block", 483 | "toc_window_display": true, 484 | "widenNotebook": false 485 | } 486 | }, 487 | "nbformat": 4, 488 | "nbformat_minor": 1 489 | } 490 | -------------------------------------------------------------------------------- /Monitoring.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Monitoring Caffe2 Learning Status\n", 8 | " This notebook will help you monitoring Caffe2 learning status." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "from matplotlib import pyplot\n", 20 | "import numpy as np\n", 21 | "import os\n", 22 | "from StringIO import StringIO\n", 23 | "\n", 24 | "# Let's show all plots inline.\n", 25 | "%matplotlib inline\n", 26 | "\n", 27 | "BASE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'go', 'param')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "\n", 35 | "## Loss\n", 36 | " This program will first load Loss from log file and then plot them." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "scrolled": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "loss_t = open(os.path.join(BASE_FOLDER, 'loss.log'), 'r').read()\n", 48 | "\n", 49 | "loss = np.genfromtxt(StringIO(loss_t), usecols=(7))\n", 50 | "\n", 51 | "weight = np.ones(100)/100\n", 52 | "sma = np.convolve(weight, loss)[100:-100]\n", 53 | "\n", 54 | "pyplot.plot(loss, 'b')\n", 55 | "pyplot.plot(sma, 'g')\n", 56 | "pyplot.legend(('Loss', 'SMA'), loc='upper right')\n", 57 | "\n", 58 | "nighty = len(loss)*9/10 # monitor the trend: last 10% of the data\n", 59 | "loss[nighty:].mean()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Training Accuracy\n", 67 | " This program will first load Accuracy from log file, and then plot them." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "scrolled": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "accuracy_t = open(os.path.join(BASE_FOLDER, 'accuracy.log'), 'r').read()\n", 79 | "#loss_t = open(os.path.join(BASE_FOLDER, 'param', 'loss.log'), 'r').read()\n", 80 | "\n", 81 | "accuracy = np.genfromtxt(StringIO(accuracy_t), usecols=(7))\n", 82 | "#loss = np.genfromtxt(StringIO(loss_t), usecols=(7))\n", 83 | "\n", 84 | "weight = np.ones(100)/100\n", 85 | "\n", 86 | "sma = np.convolve(weight, accuracy)[100:-100]\n", 87 | "\n", 88 | "#pyplot.plot(loss, 'b')\n", 89 | "pyplot.plot(accuracy, 'red')\n", 90 | "pyplot.plot(sma, 'g')\n", 91 | "pyplot.legend(('Accuracy', 'SMA'), loc='upper right')\n", 92 | "\n", 93 | "nighty = len(accuracy)*9/10 # monitor the trend: last 10% of the data\n", 94 | "accuracy[nighty:].mean()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "# Monitoring Hardware\n", 102 | "## CPU and Memory\n", 103 | " This program will display top processes" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "scrolled": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "!top -n 1" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "collapsed": true 121 | }, 122 | "source": [ 123 | "## GPU" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "!nvidia-smi" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "Python 2", 139 | "language": "python", 140 | "name": "python2" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 2 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython2", 152 | "version": "2.7.12" 153 | }, 154 | "toc": { 155 | "colors": { 156 | "hover_highlight": "#DAA520", 157 | "navigate_num": "#000000", 158 | "navigate_text": "#333333", 159 | "running_highlight": "#FF0000", 160 | "selected_highlight": "#FFD700", 161 | "sidebar_border": "#EEEEEE", 162 | "wrapper_background": "#FFFFFF" 163 | }, 164 | "moveMenuLeft": true, 165 | "nav_menu": { 166 | "height": "105px", 167 | "width": "252px" 168 | }, 169 | "navigate_menu": true, 170 | "number_sections": true, 171 | "sideBar": true, 172 | "threshold": 4, 173 | "toc_cell": false, 174 | "toc_position": { 175 | "height": "576px", 176 | "left": "0px", 177 | "right": "auto", 178 | "top": "107px", 179 | "width": "212px" 180 | }, 181 | "toc_section_display": "block", 182 | "toc_window_display": false, 183 | "widenNotebook": false 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # C2TutorialsGo 2 | This is a tutorial written for Caffe2 which mocks google AlphaGo Fan and AlphaGO Zero. 3 | v0.2.0 is released, with ResNet based AlphaGo Zero model. 4 | 5 | ## Installation 6 | This program by so far relies on [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) Cython implementation for feature preprocessing and Go rules. Cython compilation can be done by running shell command `python setup.py build_ext --inplace`. 7 | 8 | # New updates from AlphaGo Zero 9 | ## Preprocess 10 |    The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor. AlphaGo Zero requires 17 feature planes of 19x19 size, which does not include 'human knowledge' like Liberties or Escape. 11 |    [This preprocess program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20Zero%20%281%29%20Preprocess%20Pipeline.ipynb) still relies on [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) for Go rules, but no more dependencies for feature generation. I'm looking for a better(more accurate) Go rule implementation which can support Chinese/Korean/Japanese Go rules and different Komi, please feel free to recommend. 12 | 13 | ## Dual Policy and Value network with ResNet 14 |    The Supervised Learning program is used to evaluate whether the network architecture is correct. Due to a bug in Caffe2 spatial_BN op, the program cannot resume from previous run. Since each epoch requires 200~250 GPU hours, thus it's not viable to run it on personal computer. 15 | 16 | | epochs | LR | loss | train/test accu | epochs | LR | loss | train/test accu | 17 | |--------|--------|--------|-----------------|--------|--------|--------|-----------------| 18 | | 0.2 | 0.1 | - | - / 0.1698 | 11 | | | / | 19 | | 0.4 | | | / | 12 | | | / | 20 | | 0.6 | | | / | 13 | | | / | 21 | | 0.8 | | | / | 14 | | | / | 22 | | 1 | | | / | 15 | | | / | 23 | | 6 | | | / | 16 | | | / | 24 | | 7 | | | / | 17 | | | / | 25 | | 8     | | | / | 18     | |       | / | 26 | | 9 | | | / | 19 | | | / | 27 | | 10 | | | / | * | | | 0.60/0.57(alphago zero)| 28 | 29 | ## Reinforced Learning pipline 30 | On going. This will be different from AlphaGo Fan in may ways: 31 | 1. Always use the best primary player to generate data. 32 | 2. Before each move, do wide search to obtain better distribution than Policy predict. 33 | 3. MCTS only relies on Policy and Value network, no more Rollout. 34 | 4. more detail will be added during implementation 35 | 36 | # About AlphaGo Fan 37 | ## Preprocess 38 | The Go game dataset are usually stored in [SGF](http://www.red-bean.com/sgf/go.html) file format. We need to transform SGF file into Caffe2 Tensor which are 48 feature planes of 19x19 size, according to [DeepMind](http://www.nature.com/nature/journal/v529/n7587/full/nature16961.html?foxtrotcallback=true). 39 |    [The preprocess program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%281%29%20Preprocess%20Pipeline.ipynb) relies on `Cython` implementation of [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo) project for Go rules and feature plane generation. It is estimated to take 60 CPU hours for preprocess complete KGS data set. 40 | 41 | ## Supervised Learning - Policy Network 42 | According to [DeepMind](http://www.nature.com/nature/journal/v529/n7587/full/nature16961.html?foxtrotcallback=true), AlphaGo can achieve 55.4% test accuracy after 20 epochs training. Test set is the first 1 million steps. i.e. KGS2004. The speed of each prediction is 4.8ms (on Kepler K40 GPU). 43 | [This program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%282%29%20Policy%20Network.ipynb) achieves 52.83% by 11 epochs so far. Test set is the latest 1M steps. i.e.KGS201705-KGS201709. It also achieved speed of around 4.5ms for each single prediction (on Maxwell GTX980m GPU). Therefore each epochs takes ~40 GPU hours. Running on GPU mode is around 100x faster than CPU mode. 44 | 45 | | epochs | LR | loss | train/test accu | epochs | LR | loss | train/test accu | 46 | |--------|--------|--------|-----------------|--------|--------|--------|-----------------| 47 | | 1 | 0.003 | 1.895 | 0.4800 / 0.4724 | 11 | 0.0002 | 1.5680 | 0.5416 / 0.5283 | 48 | | 2 | 0.003 | 1.7782 | 0.5024 / 0.4912 | 12 | 0.0001 | 1.5639 | 0.5424 / 0.5291 | 49 | | 3 | 0.002 | 1.7110 | 0.5157 / 0.5029 | 13 | | | / | 50 | | 4 | 0.002 | 1.6803 | 0.5217 / 0.5079 | 14 | | | / | 51 | | 5 | 0.002 | 1.6567 | - / 0.5119 | 15 | | | / | 52 | | 6 | 0.002 | 1.6376 | 0.5302 / 0.5146 | 16 | | | / | 53 | | 7 | 0.001 | 1.6022 | 0.5377 / 0.5202 | 17 | | | / | 54 | | 8     | 0.0005 | 1.5782 | - / 0.5273 | 18     | |       | / | 55 | | 9 | 0.0005 | 1.6039 | 0.5450 / 0.5261 | 19 | | | / | 56 | | 10 | 0.0002 | 1.5697 | 0.5447 / 0.5281 | 20 | | | 0.569/0.554(alphago)| 57 | 58 | > The training accuracy record of epoch 5/8 were lost. 59 | > Intel Broadwell CPU can provide around 30 GFlops compute power per core. Nvidia Kepler K40 and Maxwell GTX980m GPU can provide around 3 TFlops compute power. 60 | 61 | ## Reinforced Learning - Policy Network 62 | [The RL program](http://nbviewer.jupyter.org/github/davinwang/C2TutorialsGo/blob/master/Mock%20AlphaGo%20%283B%29%20Policy%20Network%20-%20Reinforced%20Learning%20in%20mass%20production.ipynb) is runnable now but still under evaluation. It also relies on RocAlphaGo project for Go rules by now. A new program is under construction to implement first 12 features in GPU mode to replace RocAlphaGo. It is believed to be at least 10x faster than RocAlphaGo(python implementation). 63 | 64 | ## Supervised Learning - Value Network 65 | tbd. Depends on Reinforced Learning to generate 30 millions games. And pick 1 state of each game. 66 | 67 | ## Supervised Learning - Fast Rollout 68 | tbd. AlphaGo achieved 24.2% of accuracy and 2us of speed. 69 | 70 | ## MTCS 71 | tbd. Depends on Fast Rollout. 72 | -------------------------------------------------------------------------------- /RocAlphaGo/go.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | from go_data cimport * 4 | 5 | 6 | cdef class GameState: 7 | 8 | ############################################################################ 9 | # variables declarations # 10 | # # 11 | ############################################################################ 12 | 13 | # amount of locations on one side 14 | cdef char size 15 | # amount of locations on board, size * size 16 | cdef short board_size 17 | 18 | # possible ko location 19 | cdef short ko 20 | 21 | # list with all groups 22 | cdef Groups_List *groups_list 23 | # pointer to empty group 24 | cdef Group *group_empty 25 | 26 | # list representing board locations as groups 27 | # a Group contains all group stone locations and group liberty locations 28 | cdef Group **board_groups 29 | 30 | cdef char player_current 31 | cdef char player_opponent 32 | 33 | # amount of black stones captured 34 | cdef short capture_black 35 | # amount of white stones captured 36 | cdef short capture_white 37 | 38 | # amount of passes by black 39 | cdef short passes_black 40 | # amount of passes by white 41 | cdef short passes_white 42 | 43 | # list with move history 44 | cdef Locations_List *moves_history 45 | 46 | # list with legal moves 47 | cdef Locations_List *moves_legal 48 | 49 | # arrays, neighbor arrays pointers 50 | cdef short *neighbor 51 | cdef short *neighbor3x3 52 | cdef short *neighbor12d 53 | 54 | # zobrist 55 | cdef unsigned long long zobrist_current 56 | cdef unsigned long long *zobrist_lookup 57 | 58 | cdef bint enforce_superko 59 | cdef set previous_hashes 60 | 61 | ############################################################################ 62 | # init functions # 63 | # # 64 | ############################################################################ 65 | 66 | cdef void initialize_new(self, char size) 67 | """ 68 | initialize this state as empty state 69 | """ 70 | 71 | cdef void initialize_duplicate(self, GameState copyState) 72 | """ 73 | Initialize all variables as a copy of copy_state 74 | """ 75 | 76 | 77 | ############################################################################ 78 | # private cdef functions used for game-play # 79 | # # 80 | ############################################################################ 81 | 82 | cdef void update_hash(self, short location, char colour) 83 | """ 84 | xor current hash with location + colour action value 85 | """ 86 | 87 | cdef bint is_positional_superko(self, short location, Group **board) 88 | """ 89 | Find all actions that the current_player has done in the past, taking into 90 | account the fact that history starts with BLACK when there are no 91 | handicaps or with WHITE when there are. 92 | """ 93 | 94 | cdef bint is_legal_move(self, short location, Group **board, short ko) 95 | """ 96 | check if playing at location is a legal move to make 97 | """ 98 | 99 | cdef bint is_legal_move_superko(self, short location, Group **board, short ko) 100 | """ 101 | check if playing at location is a legal move to make 102 | """ 103 | 104 | cdef bint has_liberty_after(self, short location, Group **board) 105 | """ 106 | check if a play at location results in an alive group 107 | - has liberty 108 | - conects to group with >= 2 liberty 109 | - captures enemy group 110 | """ 111 | 112 | cdef short calculate_board_location(self, char x, char y) 113 | """ 114 | return location on board 115 | no checks on outside board 116 | x = columns 117 | y = rows 118 | """ 119 | 120 | cdef tuple calculate_tuple_location(self, short location) 121 | """ 122 | return location on board as a tupple 123 | no checks on outside board 124 | """ 125 | 126 | cdef void set_moves_legal_list(self, Locations_List *moves_legal) 127 | """ 128 | generate moves_legal list 129 | """ 130 | 131 | cdef void combine_groups(self, Group* group_keep, Group* group_remove, Group **board) 132 | """ 133 | combine group_keep and group_remove and replace group_remove on the board 134 | """ 135 | 136 | cdef void remove_group(self, Group* group_remove, Group **board, short* ko) 137 | """ 138 | remove group from board -> set all locations to group_empty 139 | """ 140 | 141 | cdef void add_to_group(self, short location, Group **board, short* ko, short* count_captures) 142 | """ 143 | check if a stone on location is connected to a group, kills a group 144 | or is a new group on the board 145 | """ 146 | 147 | ############################################################################ 148 | # private cdef functions used for feature generation # 149 | # # 150 | ############################################################################ 151 | 152 | cdef long generate_12d_hash(self, short centre) 153 | """ 154 | generate 12d hash around centre location 155 | """ 156 | 157 | cdef long generate_3x3_hash(self, short centre) 158 | """ 159 | generate 3x3 hash around centre location 160 | """ 161 | 162 | cdef void get_group_after_pointer(self, short* stones, short* liberty, short* capture, char* locations, char* captures, short location) 163 | cdef void get_group_after(self, char* groups_after, char* locations, char* captures, short location) 164 | """ 165 | groups_after is a board_size * 3 array representing STONES, LIBERTY, CAPTURE for every location 166 | 167 | calculate group after a play on location and set 168 | groups_after[ location * 3 + ] to stone count 169 | groups_after[ location * 3 + 1 ] to liberty count 170 | groups_after[ location * 3 + 2 ] to capture count 171 | """ 172 | 173 | cdef bint is_true_eye(self, short location, Locations_List* eyes, char owner) 174 | """ 175 | check if location is a real eye 176 | """ 177 | 178 | ############################################################################ 179 | # private cdef Ladder functions # 180 | # # 181 | ############################################################################ 182 | 183 | """ 184 | Ladder evaluation consumes a lot of time duplicating data, the original 185 | version (still can be found in go_python.py) made a copy of the whole 186 | GameState for every move played. 187 | 188 | This version only duplicates self.board_groups (so the list with pointers to groups) 189 | the add_ladder_move playes a move like the add_to_group function but it 190 | does not change the original groups and creates a list with groups removed 191 | 192 | with this groups removed list undo_ladder_move will return the board state to 193 | be the same as before add_ladder_move was called 194 | 195 | get_removed_groups and unremove_group are being used my add/undo_ladder_move 196 | 197 | nb. 198 | duplicating self.board_groups is not neccisary stricktly speaking but 199 | it is safer to do so in a threaded environment. as soon as mcts is 200 | implemented this duplication could be removed if the mcts ensures a 201 | GameState is not accesed while preforming a ladder evaluation 202 | 203 | TODO validate no changes are being made! 204 | 205 | TODO self.player colour is used, should become a pointer 206 | """ 207 | 208 | cdef Groups_List* add_ladder_move(self, short location, Group **board, short* ko) 209 | """ 210 | create a new group for location move and add all connected groups to it 211 | 212 | similar to add_to_group except no groups are changed or killed and a list 213 | with groups removed is returned so the board can be restored to original 214 | position 215 | """ 216 | 217 | cdef void remove_ladder_group(self, Group* group_remove, Group **board, short* ko) 218 | """ 219 | remove group from board -> set all locations to group_empty 220 | does not update zobrist hash 221 | """ 222 | 223 | cdef void undo_ladder_move(self, short location, Groups_List* removed_groups, short ko, Group **board, short* ko) 224 | """ 225 | Use removed_groups list to return board state to be the same as before 226 | add_ladder_move was used 227 | """ 228 | 229 | cdef void unremove_group(self, Group* group_remove, Group **board) 230 | """ 231 | unremove group from board 232 | loop over all stones in this group and set board to group_unremove 233 | remove liberty from neigbor locations 234 | """ 235 | 236 | cdef dict get_capture_moves(self, Group* group, char color, Group **board) 237 | """ 238 | create a dict with al moves that capture a group surrounding group 239 | """ 240 | 241 | cdef void get_removed_groups(self, short location, Groups_List* removed_groups, Group **board, short* ko) 242 | """ 243 | create a new group for location move and add all connected groups to it 244 | 245 | similar to add_to_group except no groups are changed or killed 246 | all changes to the board are stored in removed_groups 247 | """ 248 | 249 | cdef bint is_ladder_escape_move(self, Group **board, short* ko, Locations_List *list_ko, short location_group, dict capture, short location, int maxDepth, char colour_group, char colour_chase) 250 | """ 251 | play a ladder move on location, check if group has escaped, 252 | if the group has 2 liberty it is undetermined -> 253 | try to capture it by playing at both liberty 254 | """ 255 | 256 | cdef bint is_ladder_capture_move(self, Group **board, short* ko, Locations_List *list_ko, short location_group, dict capture, short location, int maxDepth, char colour_group, char colour_chase) 257 | """ 258 | play a ladder move on location, try capture and escape moves 259 | and see if the group is able to escape ladder 260 | """ 261 | 262 | ############################################################################ 263 | # public cdef functions used by preprocessing # 264 | # # 265 | ############################################################################ 266 | 267 | cdef char* get_groups_after(self) 268 | """ 269 | return a short array of size board_size * 3 representing 270 | STONES, LIBERTY, CAPTURE for every board location 271 | 272 | max count values are 100 273 | 274 | loop over all legal moves and determine stone count, liberty count and 275 | capture count of a play on that location 276 | """ 277 | 278 | cdef long get_hash_12d(self, short centre) 279 | """ 280 | return hash for 12d star pattern around location 281 | """ 282 | 283 | cdef long get_hash_3x3(self, short location) 284 | """ 285 | return 3x3 pattern hash + current player 286 | """ 287 | 288 | cdef char* get_ladder_escapes(self, int maxDepth) 289 | """ 290 | return char array with size board_size 291 | every location represents a location on the board where: 292 | _FREE = no ladder escape 293 | _STONE = ladder escape 294 | """ 295 | 296 | cdef char* get_ladder_captures(self, int maxDepth) 297 | """ 298 | return char array with size board_size 299 | every location represents a location on the board where: 300 | _FREE = no ladder capture 301 | _STONE = ladder capture 302 | """ 303 | 304 | ############################################################################ 305 | # public cdef functions used for game play # 306 | # # 307 | ############################################################################ 308 | 309 | cdef void add_move(self, short location) 310 | """ 311 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 312 | Move should be legal! 313 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 314 | 315 | play move on location, move should be legal! 316 | 317 | update player_current, history and moves_legal 318 | """ 319 | 320 | cdef GameState new_state_add_move(self, short location) 321 | """ 322 | copy this gamestate and play move at location 323 | """ 324 | 325 | cdef float get_score(self, float komi) 326 | """ 327 | Calculate score of board state. Uses 'Area scoring'. 328 | 329 | http://senseis.xmp.net/?Passing#1 330 | 331 | negative value indicates black win 332 | positive value indicates white win 333 | """ 334 | 335 | cdef char get_winner_colour(self, float komi) 336 | """ 337 | Calculate score of board state and return player ID (1, -1, or 0 for tie) 338 | corresponding to winner. Uses 'Area scoring'. 339 | 340 | http://senseis.xmp.net/?Passing#1 341 | """ 342 | 343 | ############################################################################ 344 | # public def functions used for game play (Python) # 345 | # # 346 | ############################################################################ 347 | 348 | cdef Locations_List* get_sensible_moves(self) 349 | """ 350 | only used for def get_legal_moves 351 | """ 352 | -------------------------------------------------------------------------------- /RocAlphaGo/go_data.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | ############################################################################ 5 | # constants # 6 | # # 7 | ############################################################################ 8 | 9 | # TODO find out if these are really used as compile time-constants 10 | 11 | # value for PASS move 12 | cdef char _PASS 13 | 14 | # observe: stones > EMPTY 15 | # border < EMPTY 16 | # be aware you should NOT use != EMPTY as this includes border locations 17 | cdef char _BORDER 18 | cdef char _EMPTY 19 | cdef char _WHITE 20 | cdef char _BLACK 21 | 22 | # used for group stone, liberty locations, legal move and eye locations 23 | cdef char _FREE 24 | cdef char _STONE 25 | cdef char _LIBERTY 26 | cdef char _CAPTURE 27 | cdef char _LEGAL 28 | cdef char _EYE 29 | 30 | # value used to generate pattern hashes 31 | cdef char _HASHVALUE 32 | 33 | 34 | ############################################################################ 35 | # Structs # 36 | # # 37 | ############################################################################ 38 | 39 | """ 40 | a struct has the advantage of being completely C, no python wrapper so 41 | no python overhead. 42 | 43 | compared to a cdef class a struct has some advantages: 44 | - C only, no python overhead 45 | - able to get a pointer to it 46 | - smaller in size 47 | 48 | drawbacks 49 | - have to be Malloc created and freed after use -> memory leak 50 | - no convenient functions available 51 | - no boundchecks 52 | """ 53 | 54 | """ 55 | struct to store group stone and liberty locations 56 | 57 | locations is a char pointer array of size board_size and initialized 58 | to _FREE. after adding a stone/liberty that location is set to 59 | _STONE/_LIBERTY and count_stones/count_liberty is incremented 60 | 61 | note that a stone location can never be a liberty location, 62 | if a stone is placed on a liberty location liberty_count is decremented 63 | 64 | it works as a dictionary so lookup time for a location is O(1) 65 | looping over all stone/liberty location could be optimized by adding 66 | two lists containing stone/liberty locations 67 | 68 | TODO check if this dictionary implementation is faster on average 69 | use as a two list implementation 70 | """ 71 | cdef struct Group: 72 | char *locations 73 | short count_stones 74 | short count_liberty 75 | char colour 76 | 77 | """ 78 | struct to store a list of Group 79 | 80 | board_groups is a Group pointer array of size #size and containing 81 | #count_groups groups 82 | 83 | TODO convert to c++ list? 84 | """ 85 | cdef struct Groups_List: 86 | Group **board_groups 87 | short count_groups 88 | short size 89 | 90 | """ 91 | struct to store a list of short (board locations) 92 | 93 | locations is a short pointer array of size #size and containing 94 | #count locations 95 | 96 | TODO convert to c++ list and/or set 97 | """ 98 | cdef struct Locations_List: 99 | short *locations 100 | short count 101 | short size 102 | 103 | 104 | ############################################################################ 105 | # group functions # 106 | # # 107 | ############################################################################ 108 | 109 | cdef Group* group_new(char colour, short size) 110 | """ 111 | create new struct Group 112 | with locations #size char long initialized to FREE 113 | """ 114 | 115 | cdef Group* group_duplicate(Group* group, short size) 116 | """ 117 | create new struct Group initialized as a duplicate of group 118 | """ 119 | 120 | cdef void group_destroy(Group* group) 121 | """ 122 | free memory location of group and locations 123 | """ 124 | 125 | cdef void group_add_stone(Group* group, short location) 126 | """ 127 | update location as STONE 128 | update liberty count if it was a liberty location 129 | 130 | n.b. stone count is not incremented if a stone was present already 131 | """ 132 | 133 | cdef void group_remove_stone(Group* group, short location) 134 | """ 135 | update location as FREE 136 | update stone count if it was a stone location 137 | """ 138 | 139 | cdef short group_location_stone(Group* group, short size) 140 | """ 141 | return first location where a STONE is located 142 | """ 143 | 144 | cdef void group_add_liberty(Group* group, short location) 145 | """ 146 | update location as LIBERTY 147 | update liberty count if it was a FREE location 148 | 149 | n.b. liberty count is not incremented if a stone was present already 150 | """ 151 | 152 | cdef void group_remove_liberty(Group* group, short location) 153 | """ 154 | update location as FREE 155 | update liberty count if it was a LIBERTY location 156 | 157 | n.b. liberty count is not decremented if location is a FREE location 158 | """ 159 | 160 | cdef short group_location_liberty(Group* group, short size) 161 | """ 162 | return location where a LIBERTY is located 163 | """ 164 | 165 | ############################################################################ 166 | # Groups_List functions # 167 | # # 168 | ############################################################################ 169 | 170 | cdef Groups_List* groups_list_new(short size) 171 | """ 172 | create new struct Groups_List 173 | with locations #size Group* long and count_groups set to 0 174 | """ 175 | 176 | cdef void groups_list_add(Group* group, Groups_List* groups_list) 177 | """ 178 | add group to list and increment groups count 179 | """ 180 | 181 | cdef void groups_list_add_unique(Group* group, Groups_List* groups_list) 182 | """ 183 | check if a group is already in the list, return if so 184 | add group to list if not 185 | """ 186 | 187 | cdef void groups_list_remove(Group* group, Groups_List* groups_list) 188 | """ 189 | remove group from list and decrement groups count 190 | """ 191 | 192 | ############################################################################ 193 | # Locations_List functions # 194 | # # 195 | ############################################################################ 196 | 197 | cdef Locations_List* locations_list_new(short size) 198 | """ 199 | create new struct Locations_List 200 | with locations #size short long and count set to 0 201 | """ 202 | 203 | cdef void locations_list_destroy(Locations_List* locations_list) 204 | """ 205 | free memory location of locations_list and locations 206 | """ 207 | 208 | cdef void locations_list_remove_location(Locations_List* locations_list, short location) 209 | """ 210 | remove location from list 211 | """ 212 | 213 | cdef void locations_list_add_location(Locations_List* locations_list, short location) 214 | """ 215 | add location to list and increment count 216 | """ 217 | 218 | cdef void locations_list_add_location_increment(Locations_List* locations_list, short location) 219 | """ 220 | check if list can hold one more location, resize list if not 221 | add location to list and increment count 222 | """ 223 | 224 | cdef void locations_list_add_location_unique(Locations_List* locations_list, short location) 225 | """ 226 | check if location is present in list, return if so 227 | add location to list if not 228 | """ 229 | 230 | ############################################################################ 231 | # neighbor creation functions # 232 | # # 233 | ############################################################################ 234 | 235 | cdef short calculate_board_location(char x, char y, char size) 236 | """ 237 | return location on board 238 | no checks on outside board 239 | x = columns 240 | y = rows 241 | """ 242 | 243 | cdef short calculate_board_location_or_border(char x, char y, char size) 244 | """ 245 | return location on board or borderlocation 246 | board locations = [ 0, size * size) 247 | border location = size * size 248 | x = columns 249 | y = rows 250 | """ 251 | 252 | cdef short* get_neighbors(char size) 253 | """ 254 | create array for every board location with all 4 direct neighbour locations 255 | neighbor order: left - right - above - below 256 | 257 | -1 x 258 | x x 259 | +1 x 260 | 261 | order: 262 | -1 2 263 | 0 1 264 | +1 3 265 | 266 | TODO neighbors is obsolete as neighbor3x3 contains the same values 267 | """ 268 | 269 | cdef short* get_3x3_neighbors(char size) 270 | """ 271 | create for every board location array with all 8 surrounding neighbour locations 272 | neighbor order: above middle - middle left - middle right - below middle 273 | above left - above right - below left - below right 274 | this order is more useful as it separates neighbors and then diagonals 275 | -1 xxx 276 | x x 277 | +1 xxx 278 | 279 | order: 280 | -1 405 281 | 1 2 282 | +1 637 283 | 284 | 0-3 contains neighbors 285 | 4-7 contains diagonals 286 | """ 287 | 288 | cdef short* get_12d_neighbors(char size) 289 | """ 290 | create array for every board location with 12d star neighbour locations 291 | neighbor order: top star tip 292 | above left - above middle - above right 293 | left star tip - left - right - right star tip 294 | below left - below middle - below right 295 | below star tip 296 | 297 | -2 x 298 | -1 xxx 299 | xx xx 300 | +1 xxx 301 | +2 x 302 | 303 | order: 304 | -2 0 305 | -1 123 306 | 45 67 307 | +1 89a 308 | +2 b 309 | """ 310 | 311 | ############################################################################ 312 | # zobrist creation functions # 313 | # # 314 | ############################################################################ 315 | 316 | 317 | cdef unsigned long long* get_zobrist_lookup(char size) 318 | """ 319 | 320 | """ 321 | -------------------------------------------------------------------------------- /RocAlphaGo/go_data.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | import numpy as np 3 | cimport numpy as np 4 | from libc.stdlib cimport malloc, free, realloc 5 | from libc.string cimport memcpy, memset, memchr 6 | 7 | """ 8 | Future speedups, right now the usage of C dicts and List is copied from original 9 | Java implementation. not all usages have been tested for max performance. 10 | 11 | possible speedups could be swapping certain dicts for lists and vice versa. 12 | more testing should be done where this might apply. 13 | 14 | some notes: 15 | - using list for Group stone&liberty locations? 16 | - do we need to consider 25*25 boards? 17 | - dict for moves_legal instead of list? 18 | - create mixed short+char arrays to store location+value in one array? 19 | - implement dict+list struct to get fast lookup and fast looping over all elements 20 | - store one liberty&stone location in group for fast lookup of group location/liberty 21 | - implement faster loop over all elements for dict using memchr and offset pointer 22 | """ 23 | 24 | ############################################################################ 25 | # constants # 26 | # # 27 | ############################################################################ 28 | 29 | 30 | # value for PASS move 31 | _PASS = -1 32 | 33 | # observe: stones > EMPTY 34 | # border < EMPTY 35 | # be aware you should NOT use != EMPTY as this includes border locations 36 | _BORDER = 1 37 | _EMPTY = 2 38 | _WHITE = 3 39 | _BLACK = 4 40 | 41 | # used for group stone, liberty locations, legal move and sensible move 42 | _FREE = 3 43 | _STONE = 0 44 | _LIBERTY = 1 45 | _CAPTURE = 2 46 | _LEGAL = 4 47 | _EYE = 5 48 | 49 | # value used to generate pattern hashes 50 | _HASHVALUE = 33 51 | 52 | 53 | ############################################################################ 54 | # Structs # 55 | # # 56 | ############################################################################ 57 | 58 | """ -> structs, declared in go_data.pxd 59 | 60 | # a struct has the advantage of being completely C, no python wrapper so 61 | # no python overhead. 62 | # 63 | # compared to a cdef class a struct has some advantages: 64 | # - C only, no python overhead 65 | # - able to get a pointer to it 66 | # - smaller in size 67 | # 68 | # drawbacks 69 | # - have to be Malloc created and freed after use -> memory leak 70 | # - no convenient functions available 71 | # - no boundchecks 72 | 73 | 74 | # struct to store group stone and liberty locations 75 | # 76 | # locations is a char pointer array of size board_size and initialized 77 | # to _FREE. after adding a stone/liberty that location is set to 78 | # _STONE/_LIBERTY and count_stones/count_liberty is incremented 79 | # 80 | # note that a stone location can never be a liberty location, 81 | # if a stone is placed on a liberty location liberty_count is decremented 82 | # 83 | # it works as a dictionary so lookup time for a location is O(1) 84 | # looping over all stone/liberty location could be optimized by adding 85 | # two lists containing stone/liberty locations 86 | # 87 | # TODO check if this dictionary implementation is faster on average 88 | # use as a two list implementation 89 | 90 | cdef struct Group: 91 | char *locations 92 | short count_stones 93 | short count_liberty 94 | char colour 95 | 96 | 97 | # struct to store a list of Group 98 | # 99 | # board_groups is a Group pointer array of size #size and containing 100 | # #count_groups groups 101 | # 102 | # TODO convert to c++ list? 103 | 104 | cdef struct Groups_List: 105 | Group **board_groups 106 | short count_groups 107 | short size 108 | 109 | 110 | # struct to store a list of short (board locations) 111 | # 112 | # locations is a short pointer array of size #size and containing 113 | # #count locations 114 | 115 | TODO convert to c++ list and/or set 116 | 117 | cdef struct Locations_List: 118 | short *locations 119 | short count 120 | short size 121 | """ 122 | 123 | ############################################################################ 124 | # group functions # 125 | # # 126 | ############################################################################ 127 | 128 | 129 | @cython.boundscheck(False) 130 | @cython.wraparound(False) 131 | cdef Group* group_new(char colour, short size): 132 | """ 133 | create new struct Group 134 | with locations #size char long initialized to FREE 135 | """ 136 | 137 | cdef int i 138 | 139 | # allocate memory for Group 140 | cdef Group *group = malloc(sizeof(Group)) 141 | if not group: 142 | raise MemoryError() 143 | 144 | # allocate memory for array locations 145 | group.locations = malloc(size) 146 | if not group.locations: 147 | raise MemoryError() 148 | 149 | # set counts to 0 and colour to colour 150 | group.count_stones = 0 151 | group.count_liberty = 0 152 | group.colour = colour 153 | 154 | # initialize locations with FREE 155 | memset(group.locations, _FREE, size) 156 | 157 | return group 158 | 159 | 160 | @cython.boundscheck(False) 161 | @cython.wraparound(False) 162 | cdef Group* group_duplicate(Group* group, short size): 163 | """ 164 | create new struct Group initialized as a duplicate of group 165 | """ 166 | 167 | cdef int i 168 | 169 | # allocate memory for Group 170 | cdef Group *duplicate = malloc(sizeof(Group)) 171 | if not duplicate: 172 | raise MemoryError() 173 | 174 | # allocate memory for array locations 175 | duplicate.locations = malloc(size) 176 | if not duplicate.locations: 177 | raise MemoryError() 178 | 179 | # set counts and colour values 180 | duplicate.count_stones = group.count_stones 181 | duplicate.count_liberty = group.count_liberty 182 | duplicate.colour = group.colour 183 | 184 | # duplicate locations array in memory 185 | # memcpy is optimized to do this quickly 186 | memcpy(duplicate.locations, group.locations, size) 187 | 188 | return duplicate 189 | 190 | 191 | @cython.boundscheck(False) 192 | @cython.wraparound(False) 193 | cdef void group_destroy(Group* group): 194 | """ 195 | free memory location of group and locations 196 | """ 197 | 198 | # check if group exists 199 | if group is not NULL: 200 | 201 | # check if locations exists 202 | if group.locations is not NULL: 203 | 204 | # free locations 205 | free(group.locations) 206 | 207 | # free group 208 | free(group) 209 | 210 | 211 | @cython.boundscheck(False) 212 | @cython.wraparound(False) 213 | cdef void group_add_stone(Group* group, short location): 214 | """ 215 | update location as STONE 216 | update liberty count if it was a liberty location 217 | 218 | n.b. stone count is not incremented if a stone was present already 219 | """ 220 | 221 | # check if locations is a liberty 222 | if group.locations[ location ] == _FREE: 223 | 224 | # locations is FREE, increment stone count 225 | group.count_stones += 1 226 | elif group.locations[ location ] == _LIBERTY: 227 | 228 | # locations is LIBERTY, increment stone count and decrement liberty count 229 | group.count_stones += 1 230 | group.count_liberty -= 1 231 | 232 | # set STONE 233 | group.locations[ location ] = _STONE 234 | 235 | 236 | @cython.boundscheck(False) 237 | @cython.wraparound(False) 238 | cdef void group_remove_stone(Group* group, short location): 239 | """ 240 | update location as FREE 241 | update stone count if it was a stone location 242 | """ 243 | 244 | # check if a stone is present 245 | if group.locations[ location ] == _STONE: 246 | 247 | # stone present, decrement stone count and set location to FREE 248 | group.count_stones -= 1 249 | group.locations[ location ] = _FREE 250 | 251 | 252 | @cython.boundscheck(False) 253 | @cython.wraparound(False) 254 | cdef short group_location_stone(Group* group, short size): 255 | """ 256 | return first location where a STONE is located 257 | """ 258 | 259 | # memchr is a in memory search function, it starts searching at 260 | # pointer location #group.locations for a max of size continous bytes untill 261 | # a location with value _STONE is found -> returns a pointer to this location 262 | # when this pointer location is substracted with pointer #group.locations 263 | # the location is calculated where a stone is 264 | return (memchr(group.locations, _STONE, size) - group.locations) 265 | 266 | 267 | @cython.boundscheck(False) 268 | @cython.wraparound(False) 269 | cdef void group_add_liberty(Group* group, short location): 270 | """ 271 | update location as LIBERTY 272 | update liberty count if it was a FREE location 273 | 274 | n.b. liberty count is not incremented if a stone was present already 275 | """ 276 | 277 | # check if location is FREE 278 | if group.locations[ location ] == _FREE: 279 | 280 | # increment liberty count, set location to LIBERTY 281 | group.count_liberty += 1 282 | group.locations[ location ] = _LIBERTY 283 | 284 | 285 | @cython.boundscheck(False) 286 | @cython.wraparound(False) 287 | cdef void group_remove_liberty(Group* group, short location): 288 | """ 289 | update location as FREE 290 | update liberty count if it was a LIBERTY location 291 | 292 | n.b. liberty count is not decremented if location is a FREE location 293 | """ 294 | 295 | # check if location is LIBERTY 296 | if group.locations[ location ] == _LIBERTY: 297 | 298 | # decrement liberty count, set location to FREE 299 | group.count_liberty -= 1 300 | group.locations[ location ] = _FREE 301 | 302 | 303 | @cython.boundscheck(False) 304 | @cython.wraparound(False) 305 | cdef short group_location_liberty(Group* group, short size): 306 | """ 307 | return location where a LIBERTY is located 308 | """ 309 | 310 | # memchr is a in memory search function, it starts searching at 311 | # pointer location #group.locations for a max of size continous bytes untill 312 | # a location with value _LIBERTY is found -> returns a pointer to this location 313 | # when this pointer location is substracted with pointer #group.locations 314 | # the location is calculated where a liberty is 315 | return (memchr(group.locations, _LIBERTY, size) - group.locations) 316 | 317 | 318 | ############################################################################ 319 | # Groups_List functions # 320 | # # 321 | ############################################################################ 322 | 323 | 324 | @cython.boundscheck(False) 325 | @cython.wraparound(False) 326 | cdef Groups_List* groups_list_new(short size): 327 | """ 328 | create new struct Groups_List 329 | with locations #size Group* long and count_groups set to 0 330 | """ 331 | 332 | cdef Groups_List* list_new 333 | 334 | list_new = malloc(sizeof(Groups_List)) 335 | if not list_new: 336 | raise MemoryError() 337 | 338 | list_new.board_groups = malloc(size * sizeof(Group*)) 339 | if not list_new.board_groups: 340 | raise MemoryError() 341 | 342 | list_new.count_groups = 0 343 | 344 | return list_new 345 | 346 | 347 | @cython.boundscheck(False) 348 | @cython.wraparound(False) 349 | cdef void groups_list_add(Group* group, Groups_List* groups_list): 350 | """ 351 | add group to list and increment groups count 352 | """ 353 | 354 | groups_list.board_groups[ groups_list.count_groups ] = group 355 | groups_list.count_groups += 1 356 | 357 | 358 | @cython.boundscheck(False) 359 | @cython.wraparound(False) 360 | cdef void groups_list_add_unique(Group* group, Groups_List* groups_list): 361 | """ 362 | check if a group is already in the list, return if so 363 | add group to list if not 364 | """ 365 | 366 | cdef int i 367 | 368 | # loop over array 369 | for i in range(groups_list.count_groups): 370 | 371 | # check if group is present 372 | if group == groups_list.board_groups[ i ]: 373 | 374 | # group is present, return 375 | return 376 | 377 | # group is not present, add to group 378 | groups_list.board_groups[ groups_list.count_groups ] = group 379 | groups_list.count_groups += 1 380 | 381 | 382 | @cython.boundscheck(False) 383 | @cython.wraparound(False) 384 | cdef void groups_list_remove(Group* group, Groups_List* groups_list): 385 | """ 386 | remove group from list and decrement groups count 387 | """ 388 | 389 | cdef int i 390 | 391 | # loop over array 392 | for i in range(groups_list.count_groups): 393 | 394 | # check if group is present 395 | if groups_list.board_groups[ i ] == group: 396 | 397 | # group is present, move last group to this location 398 | # and decrement groups count 399 | groups_list.count_groups -= 1 400 | groups_list.board_groups[ i ] = groups_list.board_groups[ groups_list.count_groups ] 401 | return 402 | 403 | # TODO this should not happen, create error for this?? 404 | print("Group not found!!!!!!!!!!!!!!") 405 | 406 | 407 | ############################################################################ 408 | # Locations_List functions # 409 | # # 410 | ############################################################################ 411 | 412 | 413 | @cython.boundscheck(False) 414 | @cython.wraparound(False) 415 | cdef Locations_List* locations_list_new(short size): 416 | """ 417 | create new struct Locations_List 418 | with locations #size short long and count set to 0 419 | """ 420 | 421 | cdef Locations_List* list_new 422 | 423 | # allocate memory for Group 424 | list_new = malloc(sizeof(Locations_List)) 425 | if not list_new: 426 | raise MemoryError() 427 | 428 | # allocate memory for locations 429 | list_new.locations = malloc(size * sizeof(short)) 430 | if not list_new.locations: 431 | raise MemoryError() 432 | 433 | # set count to 0 434 | list_new.count = 0 435 | 436 | # set size 437 | list_new.size = size 438 | 439 | return list_new 440 | 441 | @cython.boundscheck(False) 442 | @cython.wraparound(False) 443 | cdef void locations_list_destroy(Locations_List* locations_list): 444 | """ 445 | free memory location of locations_list and locations 446 | """ 447 | 448 | # check if locations_list exists 449 | if locations_list is not NULL: 450 | 451 | # check if locations exists 452 | if locations_list.locations is not NULL: 453 | 454 | # free locations 455 | free(locations_list.locations) 456 | 457 | # free locations_list 458 | free(locations_list) 459 | 460 | @cython.boundscheck(False) 461 | @cython.wraparound(False) 462 | cdef void locations_list_remove_location(Locations_List* locations_list, short location): 463 | """ 464 | remove location from list 465 | """ 466 | 467 | cdef int i 468 | 469 | # loop over array 470 | for i in range(locations_list.count): 471 | 472 | # check if [ i ] == location 473 | if locations_list.locations[ i ] == location: 474 | 475 | # location found, move last value to this location 476 | # and decrement count 477 | locations_list.count -= 1 478 | locations_list.locations[ i ] = locations_list.locations[ locations_list.count ] 479 | return 480 | 481 | # TODO this should not happen, create error for this?? 482 | print("location not found!!!!!!!!!!!!!!") 483 | 484 | 485 | @cython.boundscheck(False) 486 | @cython.wraparound(False) 487 | cdef void locations_list_add_location(Locations_List* locations_list, short location): 488 | """ 489 | add location to list and increment count 490 | """ 491 | 492 | locations_list.locations[ locations_list.count ] = location 493 | locations_list.count += 1 494 | 495 | 496 | @cython.boundscheck(False) 497 | @cython.wraparound(False) 498 | cdef void locations_list_add_location_increment(Locations_List* locations_list, short location): 499 | """ 500 | check if list can hold one more location, resize list if not 501 | add location to list and increment count 502 | """ 503 | 504 | if locations_list.count == locations_list.size: 505 | 506 | locations_list.size += 10 507 | locations_list.locations = realloc(locations_list.locations, locations_list.size * sizeof(short)) 508 | if not locations_list.locations: 509 | print("MEM ERROR") 510 | raise MemoryError() 511 | 512 | 513 | locations_list.locations[ locations_list.count ] = location 514 | locations_list.count += 1 515 | 516 | 517 | @cython.boundscheck(False) 518 | @cython.wraparound(False) 519 | @cython.nonecheck(False) 520 | cdef void locations_list_add_location_unique(Locations_List* locations_list, short location): 521 | """ 522 | check if location is present in list, return if so 523 | add location to list if not 524 | """ 525 | 526 | cdef int i 527 | 528 | # loop over array 529 | for i in range(locations_list.count): 530 | 531 | # check if location is present 532 | if location == locations_list.locations[ i ]: 533 | 534 | # location found, do nothing -> return 535 | return 536 | 537 | # add location to list and increment count 538 | locations_list.locations[ locations_list.count ] = location 539 | locations_list.count += 1 540 | 541 | 542 | ############################################################################ 543 | # neighbor creation functions # 544 | # # 545 | ############################################################################ 546 | 547 | 548 | @cython.boundscheck(False) 549 | @cython.wraparound(False) 550 | cdef short calculate_board_location(char x, char y, char size): 551 | """ 552 | return location on board 553 | no checks on outside board 554 | x = columns 555 | y = rows 556 | """ 557 | 558 | # return board location 559 | return x + (y * size) 560 | 561 | 562 | @cython.boundscheck(False) 563 | @cython.wraparound(False) 564 | cdef short calculate_board_location_or_border(char x, char y, char size): 565 | """ 566 | return location on board or borderlocation 567 | board locations = [ 0, size * size) 568 | border location = size * size 569 | x = columns 570 | y = rows 571 | """ 572 | 573 | # check if x or y are outside board 574 | if x < 0 or y < 0 or x >= size or y >= size: 575 | 576 | # return border location 577 | return size * size 578 | 579 | # return board location 580 | return calculate_board_location(x, y, size) 581 | 582 | 583 | @cython.boundscheck(False) 584 | @cython.wraparound(False) 585 | cdef short* get_neighbors(char size): 586 | """ 587 | create array for every board location with all 4 direct neighbor locations 588 | neighbor order: left - right - above - below 589 | 590 | -1 x 591 | x x 592 | +1 x 593 | 594 | order: 595 | -1 2 596 | 0 1 597 | +1 3 598 | 599 | TODO neighbors is obsolete as neighbor3x3 contains the same values 600 | """ 601 | 602 | # create array 603 | cdef short* neighbor = malloc(size * size * 4 * sizeof(short)) 604 | if not neighbor: 605 | raise MemoryError() 606 | 607 | cdef short location 608 | cdef char x, y 609 | 610 | # add all direct neighbors to every board location 611 | for y in range(size): 612 | 613 | for x in range(size): 614 | 615 | location = (x + (y * size)) * 4 616 | neighbor[ location + 0 ] = calculate_board_location_or_border(x - 1, y , size) 617 | neighbor[ location + 1 ] = calculate_board_location_or_border(x + 1, y , size) 618 | neighbor[ location + 2 ] = calculate_board_location_or_border(x , y - 1, size) 619 | neighbor[ location + 3 ] = calculate_board_location_or_border(x , y + 1, size) 620 | 621 | return neighbor 622 | 623 | @cython.boundscheck(False) 624 | @cython.wraparound(False) 625 | cdef short* get_3x3_neighbors(char size): 626 | """ 627 | create for every board location array with all 8 surrounding neighbor locations 628 | neighbor order: above middle - middle left - middle right - below middle 629 | above left - above right - below left - below right 630 | this order is more useful as it separates neighbors and then diagonals 631 | -1 xxx 632 | x x 633 | +1 xxx 634 | 635 | order: 636 | -1 405 637 | 1 2 638 | +1 637 639 | 640 | 0-3 contains neighbors 641 | 4-7 contains diagonals 642 | """ 643 | 644 | # create array 645 | cdef short* neighbor3x3 = malloc(size * size * 8 * sizeof(short)) 646 | if not neighbor3x3: 647 | raise MemoryError() 648 | 649 | cdef short location 650 | cdef char x, y 651 | 652 | # add all surrounding neighbors to every board location 653 | for x in range(size): 654 | 655 | for y in range(size): 656 | 657 | location = (x + (y * size)) * 8 658 | neighbor3x3[ location + 0 ] = calculate_board_location_or_border(x , y - 1, size) 659 | neighbor3x3[ location + 1 ] = calculate_board_location_or_border(x - 1, y , size) 660 | neighbor3x3[ location + 2 ] = calculate_board_location_or_border(x + 1, y , size) 661 | neighbor3x3[ location + 3 ] = calculate_board_location_or_border(x , y + 1, size) 662 | 663 | neighbor3x3[ location + 4 ] = calculate_board_location_or_border(x - 1, y - 1, size) 664 | neighbor3x3[ location + 5 ] = calculate_board_location_or_border(x + 1, y - 1, size) 665 | neighbor3x3[ location + 6 ] = calculate_board_location_or_border(x - 1, y + 1, size) 666 | neighbor3x3[ location + 7 ] = calculate_board_location_or_border(x + 1, y + 1, size) 667 | 668 | return neighbor3x3 669 | 670 | @cython.boundscheck(False) 671 | @cython.wraparound(False) 672 | cdef short* get_12d_neighbors(char size): 673 | """ 674 | create array for every board location with 12d star neighbor locations 675 | neighbor order: top star tip 676 | above left - above middle - above right 677 | left star tip - left - right - right star tip 678 | below left - below middle - below right 679 | below star tip 680 | 681 | -2 x 682 | -1 xxx 683 | xx xx 684 | +1 xxx 685 | +2 x 686 | 687 | order: 688 | -2 0 689 | -1 123 690 | 45 67 691 | +1 89a 692 | +2 b 693 | """ 694 | 695 | # create array 696 | cdef short* neighbor12d = malloc(size * size * 12 * sizeof(short)) 697 | if not neighbor12d: 698 | raise MemoryError() 699 | 700 | cdef short location 701 | cdef char x, y 702 | 703 | # add all 12d neighbors to every board location 704 | for x in range(size): 705 | 706 | for y in range(size): 707 | 708 | location = (x + (y * size)) * 12 709 | neighbor12d[ location + 4 ] = calculate_board_location_or_border(x , y - 2, size) 710 | 711 | neighbor12d[ location + 1 ] = calculate_board_location_or_border(x - 1, y - 1, size) 712 | neighbor12d[ location + 5 ] = calculate_board_location_or_border(x , y - 1, size) 713 | neighbor12d[ location + 8 ] = calculate_board_location_or_border(x + 1, y - 1, size) 714 | 715 | neighbor12d[ location + 0 ] = calculate_board_location_or_border(x - 2, y , size) 716 | neighbor12d[ location + 2 ] = calculate_board_location_or_border(x - 1, y , size) 717 | neighbor12d[ location + 9 ] = calculate_board_location_or_border(x + 1, y , size) 718 | neighbor12d[ location + 11 ] = calculate_board_location_or_border(x + 2, y , size) 719 | 720 | neighbor12d[ location + 3 ] = calculate_board_location_or_border(x - 1, y + 1, size) 721 | neighbor12d[ location + 6 ] = calculate_board_location_or_border(x , y + 1, size) 722 | neighbor12d[ location + 10 ] = calculate_board_location_or_border(x + 1, y + 1, size) 723 | 724 | neighbor12d[ location + 7 ] = calculate_board_location_or_border(x , y + 2, size) 725 | 726 | return neighbor12d 727 | 728 | 729 | ############################################################################ 730 | # zobrist creation functions # 731 | # # 732 | ############################################################################ 733 | 734 | 735 | @cython.boundscheck(False) 736 | @cython.wraparound(False) 737 | cdef unsigned long long* get_zobrist_lookup(char size): 738 | """ 739 | generate zobrist lookup array for boardsize size 740 | """ 741 | 742 | cdef unsigned long long* zobrist_lookup 743 | 744 | zobrist_lookup = malloc((size * size * 2) * sizeof(unsigned long long)) 745 | if not zobrist_lookup: 746 | raise MemoryError() 747 | 748 | # initialize all zobrist hash lookup values 749 | for i in range(size * size * 2): 750 | zobrist_lookup[i] = np.random.randint(np.iinfo(np.uint64).max, dtype='uint64') 751 | 752 | return zobrist_lookup -------------------------------------------------------------------------------- /RocAlphaGo/preprocessing.pxd: -------------------------------------------------------------------------------- 1 | import ast 2 | import time 3 | import numpy as np 4 | cimport numpy as np 5 | from numpy cimport ndarray 6 | from libc.stdlib cimport malloc, free 7 | from go cimport GameState 8 | from go_data cimport _BLACK, _EMPTY, _STONE, _LIBERTY, _CAPTURE, _FREE, _PASS, Group, Locations_List, locations_list_destroy, locations_list_new 9 | 10 | # type of tensor created 11 | # char works but float might be needed later 12 | ctypedef char tensor_type 13 | 14 | # type defining cdef function 15 | ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], char*, int) 16 | 17 | 18 | cdef class Preprocess: 19 | 20 | ############################################################################ 21 | # variables declarations # 22 | # # 23 | ############################################################################ 24 | 25 | # all feature processors 26 | # TODO find correct type so an array can be used 27 | cdef preprocess_method *processors 28 | 29 | # list with all features used currently 30 | # TODO find correct type so an array can be used 31 | cdef list feature_list 32 | 33 | # output tensor size 34 | cdef int output_dim 35 | 36 | # board size 37 | cdef char size 38 | cdef short board_size 39 | 40 | # pattern dictionaries 41 | cdef dict pattern_nakade 42 | cdef dict pattern_response_12d 43 | cdef dict pattern_non_response_3x3 44 | 45 | # pattern dictionary sizes 46 | cdef int pattern_nakade_size 47 | cdef int pattern_response_12d_size 48 | cdef int pattern_non_response_3x3_size 49 | 50 | ############################################################################ 51 | # Tensor generating functions # 52 | # # 53 | ############################################################################ 54 | 55 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 56 | """ 57 | A feature encoding WHITE BLACK and EMPTY on separate planes. 58 | plane 0 always refers to the current player stones 59 | plane 1 to the opponent stones 60 | plane 2 to empty locations 61 | """ 62 | 63 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 64 | """ 65 | A feature encoding the age of the stone at each location up to 'maximum' 66 | 67 | Note: 68 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum 69 | - EMPTY locations are all-zero features 70 | """ 71 | 72 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 73 | """ 74 | A feature encoding the number of liberties of the group connected to the stone at 75 | each location 76 | 77 | Note: 78 | - there is no zero-liberties plane; the 0th plane indicates groups in atari 79 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum 80 | - EMPTY locations are all-zero features 81 | """ 82 | 83 | cdef int get_capture_size(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 84 | """ 85 | A feature encoding the number of opponent stones that would be captured by 86 | playing at each location, up to 'maximum' 87 | 88 | Note: 89 | - we currently *do* treat the 0th plane as "capturing zero stones" 90 | - the [maximum-1] plane is used for any capturable group of size 91 | greater than or equal to maximum-1 92 | - the 0th plane is used for legal moves that would not result in capture 93 | - illegal move locations are all-zero features 94 | """ 95 | 96 | cdef int get_self_atari_size(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 97 | """ 98 | A feature encoding the size of the own-stone group that is put into atari by 99 | playing at a location 100 | 101 | """ 102 | 103 | cdef int get_liberties_after(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 104 | """ 105 | A feature encoding what the number of liberties *would be* of the group connected to 106 | the stone *if* played at a location 107 | 108 | Note: 109 | - there is no zero-liberties plane; the 0th plane indicates groups in atari 110 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum 111 | - illegal move locations are all-zero features 112 | """ 113 | 114 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 115 | """ 116 | A feature wrapping GameState.is_ladder_capture(). 117 | check if an opponent group can be captured in a ladder 118 | """ 119 | 120 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 121 | """ 122 | A feature wrapping GameState.is_ladder_escape(). 123 | check if player_current group can escape ladder 124 | """ 125 | 126 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 127 | """ 128 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye 129 | """ 130 | 131 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 132 | """ 133 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done 134 | not used?? 135 | """ 136 | 137 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 138 | """ 139 | Plane filled with zeros 140 | """ 141 | 142 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 143 | """ 144 | Plane filled with ones 145 | """ 146 | 147 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 148 | """ 149 | Value net feature, plane with ones if active_player is black else zeros 150 | """ 151 | 152 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 153 | """ 154 | ko 155 | """ 156 | 157 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 158 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 159 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 160 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 161 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 162 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 163 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 164 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 165 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, char *groups_after, int offSet) 166 | 167 | ############################################################################ 168 | # public cdef function # 169 | # # 170 | ############################################################################ 171 | 172 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state) 173 | """ 174 | Convert a GameState to a Theano-compatible tensor 175 | """ 176 | -------------------------------------------------------------------------------- /RocAlphaGo/preprocessing_rollout.pxd: -------------------------------------------------------------------------------- 1 | import ast 2 | import time 3 | import numpy as np 4 | cimport numpy as np 5 | from numpy cimport ndarray 6 | from libc.stdlib cimport malloc, free 7 | from go cimport GameState 8 | from go_data cimport _BLACK, _EMPTY, _STONE, _LIBERTY, _CAPTURE, _FREE, _PASS, _HASHVALUE, Group, Locations_List, locations_list_destroy, locations_list_new 9 | 10 | # type of tensor created 11 | # char works but float might be needed later 12 | ctypedef char tensor_type 13 | 14 | # type defining cdef function 15 | ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], int) 16 | 17 | 18 | cdef class Preprocess: 19 | 20 | ############################################################################ 21 | # variables declarations # 22 | # # 23 | ############################################################################ 24 | 25 | # all feature processors 26 | # TODO find correct type so an array can be used 27 | cdef preprocess_method *processors 28 | 29 | # list with all features used currently 30 | # TODO find correct type so an array can be used 31 | cdef list feature_list 32 | 33 | # output tensor size 34 | cdef int output_dim 35 | 36 | # board size 37 | cdef char size 38 | cdef short board_size 39 | 40 | # pattern dictionaries 41 | cdef dict pattern_nakade 42 | cdef dict pattern_response_12d 43 | cdef dict pattern_non_response_3x3 44 | 45 | # pattern dictionary sizes 46 | cdef int pattern_nakade_size 47 | cdef int pattern_response_12d_size 48 | cdef int pattern_non_response_3x3_size 49 | 50 | ############################################################################ 51 | # Tensor generating functions # 52 | # # 53 | ############################################################################ 54 | 55 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 56 | """ 57 | A feature encoding WHITE BLACK and EMPTY on separate planes. 58 | plane 0 always refers to the current player stones 59 | plane 1 to the opponent stones 60 | plane 2 to empty locations 61 | """ 62 | 63 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 64 | """ 65 | A feature encoding the age of the stone at each location up to 'maximum' 66 | 67 | Note: 68 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum 69 | - EMPTY locations are all-zero features 70 | """ 71 | 72 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 73 | """ 74 | A feature encoding the number of liberties of the group connected to the stone at 75 | each location 76 | 77 | Note: 78 | - there is no zero-liberties plane; the 0th plane indicates groups in atari 79 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum 80 | - EMPTY locations are all-zero features 81 | """ 82 | 83 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 84 | """ 85 | A feature wrapping GameState.is_ladder_capture(). 86 | check if an opponent group can be captured in a ladder 87 | """ 88 | 89 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 90 | """ 91 | A feature wrapping GameState.is_ladder_escape(). 92 | check if player_current group can escape ladder 93 | """ 94 | 95 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 96 | """ 97 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye 98 | """ 99 | 100 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 101 | """ 102 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done 103 | not used?? 104 | """ 105 | 106 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 107 | """ 108 | Plane filled with zeros 109 | """ 110 | 111 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 112 | """ 113 | Plane filled with ones 114 | """ 115 | 116 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 117 | """ 118 | Value net feature, plane with ones if active_player is black else zeros 119 | """ 120 | 121 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 122 | """ 123 | Single plane encoding ko location 124 | """ 125 | 126 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 127 | """ 128 | single feature plane encoding whether this location matches any of the response 129 | patterns, for now it only checks the 12d response patterns as we do not use the 130 | 3x3 response patterns. 131 | """ 132 | 133 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 134 | """ 135 | A feature wrapping GameState.is_ladder_escape(). 136 | check if player_current group can escape atari for at least one turn 137 | """ 138 | 139 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 140 | """ 141 | encode last move neighbor positions in two planes: 142 | - horizontal & vertical / direct neighbor 143 | - diagonal neighbor 144 | """ 145 | 146 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 147 | """ 148 | A nakade pattern is a 12d pattern on a location a stone was captured before 149 | it is unclear if a max size of the captured group has to be considered and 150 | how recent the capture event should have been 151 | 152 | the 12d pattern can be encoded without stone colour and liberty count 153 | unclear if a border location should be considered a stone or liberty 154 | 155 | pattern lookup value is being set instead of 1 156 | """ 157 | 158 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 159 | """ 160 | A nakade pattern is a 12d pattern on a location a stone was captured before 161 | it is unclear if a max size of the captured group has to be considered and 162 | how recent the capture event should have been 163 | 164 | the 12d pattern can be encoded without stone colour and liberty count 165 | unclear if a border location should be considered a stone or liberty 166 | 167 | #pattern_id is offset 168 | """ 169 | 170 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 171 | """ 172 | Set 12d hash pattern for 12d shape around last move 173 | pattern lookup value is being set instead of 1 174 | """ 175 | 176 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 177 | """ 178 | Set 12d hash pattern for 12d shape around last move where 179 | #pattern_id is offset 180 | """ 181 | 182 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 183 | """ 184 | Set 3x3 hash pattern for every legal location where 185 | pattern lookup value is being set instead of 1 186 | """ 187 | 188 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet) 189 | """ 190 | Set 3x3 hash pattern for every legal location where 191 | #pattern_id is offset 192 | """ 193 | 194 | ############################################################################ 195 | # public cdef function # 196 | # # 197 | ############################################################################ 198 | 199 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state) 200 | """ 201 | Convert a GameState to a Theano-compatible tensor 202 | """ 203 | -------------------------------------------------------------------------------- /RocAlphaGo/preprocessing_rollout.pyx: -------------------------------------------------------------------------------- 1 | # cython: profile=True 2 | # cython: linetrace=True 3 | # cython: wraparound=False 4 | # cython: boundscheck=False 5 | # cython: initializedcheck=False 6 | cimport cython 7 | import numpy as np 8 | cimport numpy as np 9 | 10 | 11 | cdef class Preprocess: 12 | 13 | ############################################################################ 14 | # all variables are declared in the .pxd file # 15 | # # 16 | ############################################################################ 17 | 18 | 19 | """ -> variables, declared in preprocessing.pxd 20 | 21 | # all feature processors 22 | # TODO find correct type so an array can be used 23 | cdef list processors 24 | 25 | # list with all features used currently 26 | # TODO find correct type so an array can be used 27 | cdef list feature_list 28 | 29 | # output tensor size 30 | cdef int output_dim 31 | 32 | # board size 33 | cdef char size 34 | cdef short board_size 35 | 36 | # pattern dictionaries 37 | cdef dict pattern_nakade 38 | cdef dict pattern_response_12d 39 | cdef dict pattern_non_response_3x3 40 | 41 | # pattern dictionary sizes 42 | cdef int pattern_nakade_size 43 | cdef int pattern_response_12d_size 44 | cdef int pattern_non_response_3x3_size 45 | 46 | -> variables, declared in preprocessing.pxd 47 | """ 48 | 49 | 50 | ############################################################################ 51 | # Tensor generating functions # 52 | # # 53 | ############################################################################ 54 | 55 | 56 | @cython.nonecheck(False) 57 | cdef int get_board(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 58 | """ 59 | A feature encoding WHITE BLACK and EMPTY on separate planes. 60 | plane 0 always refers to the current player stones 61 | plane 1 to the opponent stones 62 | plane 2 to empty locations 63 | """ 64 | 65 | cdef short location 66 | cdef Group* group 67 | cdef int plane 68 | cdef char opponent = state.player_opponent 69 | 70 | # loop over all locations on board 71 | for location in range(self.board_size): 72 | 73 | group = state.board_groups[ location ] 74 | 75 | if group.colour == _EMPTY: 76 | 77 | plane = offSet + 2 78 | elif group.colour == opponent: 79 | 80 | plane = offSet + 1 81 | else: 82 | 83 | plane = offSet 84 | 85 | tensor[ plane, location ] = 1 86 | 87 | return offSet + 3 88 | 89 | 90 | @cython.nonecheck(False) 91 | cdef int get_turns_since(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 92 | """ 93 | A feature encoding the age of the stone at each location up to 'maximum' 94 | 95 | Note: 96 | - the [maximum-1] plane is used for any stone with age greater than or equal to maximum 97 | - EMPTY locations are all-zero features 98 | """ 99 | 100 | cdef short location 101 | cdef Locations_List *history = state.moves_history 102 | cdef int age = offSet + 7 103 | cdef dict agesSet = {} 104 | cdef int i 105 | 106 | # set all stones to max age 107 | for i in range(history.count): 108 | 109 | location = history.locations[ i ] 110 | 111 | if location != _PASS and state.board_groups[ location ].colour > _EMPTY: 112 | 113 | tensor[ age, location ] = 1 114 | 115 | # start with newest stone 116 | i = history.count - 1 117 | age = 0 118 | 119 | # loop over history backwards 120 | while age < 7 and i >= 0: 121 | 122 | location = history.locations[ i ] 123 | 124 | # if age has not been set yet 125 | if location != _PASS and not location in agesSet and state.board_groups[ location ].colour > _EMPTY: 126 | 127 | tensor[ offSet + age, location ] = 1 128 | tensor[ offSet + 7, location ] = 0 129 | agesSet[ location ] = location 130 | 131 | i -= 1 132 | age += 1 133 | 134 | return offSet + 8 135 | 136 | 137 | @cython.nonecheck(False) 138 | cdef int get_liberties(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 139 | """ 140 | A feature encoding the number of liberties of the group connected to the stone at 141 | each location 142 | 143 | Note: 144 | - there is no zero-liberties plane; the 0th plane indicates groups in atari 145 | - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum 146 | - EMPTY locations are all-zero features 147 | """ 148 | 149 | cdef int i, groupLiberty 150 | cdef Group* group 151 | cdef short location 152 | 153 | for location in range(self.board_size): 154 | 155 | group = state.board_groups[ location ] 156 | 157 | if group.colour > _EMPTY: 158 | 159 | groupLiberty = group.count_liberty - 1 160 | 161 | # check max liberty count 162 | if groupLiberty > 7: 163 | 164 | groupLiberty = 7 165 | 166 | groupLiberty += offSet 167 | 168 | tensor[ groupLiberty, location ] = 1 169 | 170 | return offSet + 8 171 | 172 | 173 | @cython.nonecheck(False) 174 | cdef int get_ladder_capture(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 175 | """ 176 | A feature wrapping GameState.is_ladder_capture(). 177 | check if an opponent group can be captured in a ladder 178 | """ 179 | 180 | cdef int location 181 | cdef char* captures = state.get_ladder_captures(80) 182 | 183 | # loop over all groups on board 184 | for location in range(state.board_size): 185 | 186 | if captures[ location ] != _FREE: 187 | 188 | tensor[ offSet, location ] = 1 189 | 190 | # free captures 191 | free(captures) 192 | 193 | return offSet + 1 194 | 195 | 196 | @cython.nonecheck(False) 197 | cdef int get_ladder_escape(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 198 | """ 199 | A feature wrapping GameState.is_ladder_escape(). 200 | check if player_current group can escape ladder 201 | """ 202 | 203 | cdef int location 204 | cdef char* escapes = state.get_ladder_escapes(80) 205 | 206 | # loop over all groups on board 207 | for location in range(state.board_size): 208 | 209 | if escapes[ location ] != _FREE: 210 | 211 | tensor[ offSet, location ] = 1 212 | 213 | # free escapes 214 | free(escapes) 215 | 216 | return offSet + 1 217 | 218 | 219 | @cython.nonecheck(False) 220 | cdef int get_sensibleness(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 221 | """ 222 | A move is 'sensible' if it is legal and if it does not fill the current_player's own eye 223 | """ 224 | 225 | cdef int i 226 | cdef short location 227 | cdef Group* group 228 | 229 | # set all legal moves to 1 230 | for i in range(state.moves_legal.count): 231 | 232 | tensor[ offSet, state.moves_legal.locations[ i ] ] = 1 233 | 234 | # list can increment but a big enough starting value is important 235 | cdef Locations_List* eyes = locations_list_new(15) 236 | 237 | # loop over all board groups 238 | for i in range(state.groups_list.count_groups): 239 | 240 | group = state.groups_list.board_groups[ i ] 241 | 242 | # if group is current player 243 | if group.colour == state.player_current: 244 | 245 | # loop over liberties because they are possible eyes 246 | for location in range(self.board_size): 247 | 248 | # check liberty location as possible eye 249 | if group.locations[ location ] == _LIBERTY: 250 | 251 | # check if location is an eye 252 | if state.is_true_eye(location, eyes, state.player_current): 253 | 254 | tensor[ offSet, location ] = 0 255 | 256 | locations_list_destroy(eyes) 257 | 258 | return offSet + 1 259 | 260 | 261 | @cython.nonecheck(False) 262 | cdef int get_legal(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 263 | """ 264 | Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done 265 | not used?? 266 | """ 267 | 268 | cdef short location 269 | 270 | # loop over all legal moves and set to one 271 | for location in range(state.moves_legal.count): 272 | 273 | tensor[ offSet, state.moves_legal.locations[ location ] ] = 1 274 | 275 | return offSet + 1 276 | 277 | 278 | @cython.nonecheck(False) 279 | cdef int get_response(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 280 | """ 281 | single feature plane encoding whether this location matches any of the response 282 | patterns, for now it only checks the 12d response patterns as we do not use the 283 | 3x3 response patterns. 284 | 285 | TODO 286 | - decide if we consider nakade patterns response patterns as well 287 | - optimization? 12d response patterns are calculated twice.. 288 | """ 289 | 290 | cdef short location, location_x, location_y, last_move, last_move_x, last_move_y 291 | cdef int i, plane, id 292 | cdef long hash_base, hash_pattern 293 | cdef short *neighbor12d = state.neighbor12d 294 | 295 | # get last move 296 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ] 297 | 298 | # check if last move is not _PASS 299 | if last_move != _PASS: 300 | 301 | # get 12d pattern hash of last move location and colour 302 | hash_base = state.get_hash_12d(last_move) 303 | 304 | # calculate last_move x and y 305 | last_move_x = last_move / state.size 306 | last_move_y = last_move % state.size 307 | 308 | # last_move location in neighbor12d array 309 | last_move *= 12 310 | 311 | # loop over all locations in 12d shape 312 | for i in range(12): 313 | 314 | # get location 315 | location = neighbor12d[last_move + i] 316 | 317 | # check if location is empty 318 | if state.board_groups[ location ].colour == _EMPTY: 319 | 320 | # calculate location x and y 321 | location_x = (location / state.size) - last_move_x 322 | location_y = (location % state.size) - last_move_y 323 | 324 | # calculate 12d response pattern hash 325 | hash_pattern = hash_base + location_x 326 | hash_pattern *= _HASHVALUE 327 | hash_pattern += location_y 328 | 329 | # dictionary lookup 330 | id = self.pattern_response_12d.get( hash_pattern ) 331 | 332 | if id >= 0: 333 | 334 | tensor[ offSet, location ] = 1 335 | 336 | return offSet + 1 337 | 338 | 339 | @cython.nonecheck(False) 340 | cdef int get_save_atari(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 341 | """ 342 | A feature wrapping GameState.is_ladder_escape(). 343 | check if player_current group can escape atari for at least one turn 344 | """ 345 | 346 | cdef int location 347 | cdef char* escapes = state.get_ladder_escapes(1) 348 | 349 | # loop over all groups on board 350 | for location in range(state.board_size): 351 | 352 | if escapes[ location ] != _FREE: 353 | 354 | tensor[ offSet, location ] = 1 355 | 356 | # free escapes 357 | free(escapes) 358 | 359 | return offSet + 1 360 | 361 | 362 | @cython.nonecheck(False) 363 | cdef int get_neighbor(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 364 | """ 365 | encode last move neighbor positions in two planes: 366 | - horizontal & vertical / direct neighbor 367 | - diagonal neighbor 368 | """ 369 | 370 | cdef short location, last_move 371 | cdef int i, plane 372 | cdef short *neighbor3x3 = state.neighbor3x3 373 | 374 | # get last move 375 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ] 376 | 377 | # check if last move is not _PASS 378 | if last_move != _PASS: 379 | 380 | # last_move location in neighbor3x3 array 381 | last_move *= 8 382 | 383 | # direct neighbor plane is plane offset 384 | plane = offSet 385 | 386 | # loop over direct neighbor 387 | # 0,1,2,3 are direct neighbor locations 388 | for i in range(4): 389 | 390 | # get neighbor location 391 | location = neighbor3x3[ last_move + i ] 392 | 393 | # check if location is empty 394 | if state.board_groups[ location ].colour == _EMPTY: 395 | 396 | tensor[ plane, location ] = 1 397 | 398 | # diagonal neighbor plane is plane offset + 1 399 | plane = offSet + 1 400 | 401 | # loop over diagonal neighbor 402 | # 4,5,6,7 are diagonal neighbor locations 403 | for i in range(4, 8): 404 | 405 | # get neighbor location 406 | location = neighbor3x3[ last_move + i ] 407 | 408 | # check if location is empty 409 | if state.board_groups[ location ].colour == _EMPTY: 410 | 411 | tensor[ plane, location ] = 1 412 | 413 | return offSet + 2 414 | 415 | 416 | @cython.nonecheck(False) 417 | cdef int get_nakade(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 418 | """ 419 | A nakade pattern is a 12d pattern on a location a stone was captured before 420 | it is unclear if a max size of the captured group has to be considered and 421 | how recent the capture event should have been 422 | 423 | the 12d pattern can be encoded without stone colour and liberty count 424 | unclear if a border location should be considered a stone or liberty 425 | 426 | pattern lookup value is being set instead of 1 427 | """ 428 | 429 | # TODO tensor type has to be float 430 | 431 | return offSet + 1 432 | 433 | 434 | @cython.nonecheck(False) 435 | cdef int get_nakade_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 436 | """ 437 | A nakade pattern is a 12d pattern on a location a stone was captured before 438 | it is unclear if a max size of the captured group has to be considered and 439 | how recent the capture event should have been 440 | 441 | the 12d pattern can be encoded without stone colour and liberty count 442 | unclear if a border location should be considered a stone or liberty 443 | 444 | #pattern_id is offset 445 | """ 446 | 447 | return offSet + self.pattern_nakade_size 448 | 449 | 450 | @cython.nonecheck(False) 451 | cdef int get_response_12d(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 452 | """ 453 | Set 12d hash pattern for 12d shape around last move 454 | pattern lookup value is being set instead of 1 455 | """ 456 | 457 | # get last move location 458 | # check for pass 459 | 460 | return offSet + 1 461 | 462 | 463 | @cython.nonecheck(False) 464 | cdef int get_response_12d_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 465 | """ 466 | Check all empty locations in a 12d shape around the last move for being a 12d response 467 | pattern match 468 | #pattern_id is offset 469 | 470 | base hash is 12d pattern hash of last move location + colour 471 | add relative position of every empty location in a 12d shape to get 12d response pattern hash 472 | 473 | c hash x y 474 | ... location a has: state.get_hash_12d(x), -1, 0 475 | .ax.. location b has: state.get_hash_12d(x), +1, -1 476 | ..b location c has: state.get_hash_12d(x), 0, +2 477 | . 478 | 479 | 12d response pattern hash value is calculated by: 480 | ( ( hash + x ) * _HASHVALUE ) + y 481 | """ 482 | 483 | cdef short location, location_x, location_y, last_move, last_move_x, last_move_y 484 | cdef int i, plane, id 485 | cdef long hash_base, hash_pattern 486 | cdef short *neighbor12d = state.neighbor12d 487 | 488 | # get last move 489 | last_move = state.moves_history.locations[ state.moves_history.count - 1 ] 490 | 491 | # check if last move is not _PASS 492 | if last_move != _PASS: 493 | 494 | # get 12d pattern hash of last move location and colour 495 | hash_base = state.get_hash_12d(last_move) 496 | 497 | # calculate last_move x and y 498 | last_move_x = last_move / state.size 499 | last_move_y = last_move % state.size 500 | 501 | # last_move location in neighbor12d array 502 | last_move *= 12 503 | 504 | # loop over all locations in 12d shape 505 | for i in range(12): 506 | 507 | # get location 508 | location = neighbor12d[last_move + i] 509 | 510 | # check if location is empty 511 | if state.board_groups[ location ].colour == _EMPTY: 512 | 513 | # calculate location x and y 514 | location_x = (location / state.size) - last_move_x 515 | location_y = (location % state.size) - last_move_y 516 | 517 | # calculate 12d response pattern hash 518 | hash_pattern = hash_base + location_x 519 | hash_pattern *= _HASHVALUE 520 | hash_pattern += location_y 521 | 522 | # dictionary lookup 523 | id = self.pattern_response_12d.get( hash_pattern ) 524 | 525 | if id >= 0: 526 | 527 | tensor[ offSet + id, location ] = 1 528 | 529 | return offSet + self.pattern_response_12d_size 530 | 531 | 532 | @cython.nonecheck(False) 533 | cdef int get_non_response_3x3(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 534 | """ 535 | Set 3x3 hash pattern for every legal location where 536 | pattern lookup value is being set instead of 1 537 | """ 538 | 539 | # TODO tensor type has to be float 540 | 541 | return offSet + 1 542 | 543 | 544 | @cython.nonecheck(False) 545 | cdef int get_non_response_3x3_offset(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 546 | """ 547 | Set 3x3 hash pattern for every legal location where 548 | #pattern_id is offset 549 | """ 550 | 551 | cdef short i, location 552 | cdef int id 553 | 554 | # loop over all legal moves and set to one 555 | for i in range(state.moves_legal.count): 556 | 557 | # get location 558 | location = state.moves_legal.locations[ i ] 559 | # get location hash and dict lookup 560 | id = self.pattern_non_response_3x3.get( state.get_3x3_hash( location ) ) 561 | 562 | if id >= 0: 563 | 564 | tensor[ offSet + id, location ] = 1 565 | 566 | return offSet + self.pattern_non_response_3x3_size 567 | 568 | 569 | @cython.nonecheck(False) 570 | cdef int zeros(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 571 | """ 572 | Plane filled with zeros 573 | """ 574 | 575 | ########################################################## 576 | # strange things happen if a function does not do anything 577 | # do not remove next line without extensive testing!!!!!!! 578 | tensor[ offSet, 0 ] = 0 579 | 580 | return offSet + 1 581 | 582 | 583 | @cython.nonecheck(False) 584 | cdef int ones(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 585 | """ 586 | Plane filled with ones 587 | """ 588 | 589 | cdef short location 590 | 591 | for location in range(0, self.board_size): 592 | 593 | tensor[ offSet, location ] = 1 594 | return offSet + 1 595 | 596 | 597 | @cython.nonecheck(False) 598 | cdef int colour(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 599 | """ 600 | Value net feature, plane with ones if active_player is black else zeros 601 | """ 602 | 603 | cdef short location 604 | 605 | # if player_current is white 606 | if state.player_current == _BLACK: 607 | 608 | for location in range(0, self.board_size): 609 | 610 | tensor[ offSet, location ] = 1 611 | 612 | return offSet + 1 613 | 614 | 615 | @cython.nonecheck(False) 616 | cdef int ko(self, GameState state, tensor_type[ :, ::1 ] tensor, int offSet): 617 | """ 618 | Single plane encoding ko location 619 | """ 620 | 621 | if state.ko is not _PASS: 622 | 623 | tensor[ offSet, state.ko ] = 1 624 | 625 | return offSet + 1 626 | 627 | 628 | ############################################################################ 629 | # init function # 630 | # # 631 | ############################################################################ 632 | 633 | 634 | def __init__(self, list feature_list, char size=19, dict_nakade=None, dict_3x3=None, dict_12d=None, verbose=False): 635 | """ 636 | """ 637 | 638 | self.size = size 639 | self.board_size = size * size 640 | 641 | cdef int i 642 | 643 | # preprocess_method is a function pointer: 644 | # ctypedef int (*preprocess_method)(Preprocess, GameState, tensor_type[ :, ::1 ], char*, int) 645 | cdef preprocess_method processor 646 | 647 | # create a list with function pointers 648 | self.processors = malloc(len(feature_list) * sizeof(preprocess_method)) 649 | 650 | if not self.processors: 651 | raise MemoryError() 652 | 653 | # load nakade patterns 654 | self.pattern_nakade = {} 655 | self.pattern_nakade_size = 0 656 | if dict_nakade is not None: 657 | with open(dict_nakade, 'r') as f: 658 | s = f.read() 659 | self.pattern_nakade = ast.literal_eval(s) 660 | self.pattern_nakade_size = max(self.pattern_nakade.values()) + 1 661 | 662 | # load 12d response patterns 663 | self.pattern_response_12d = {} 664 | self.pattern_response_12d_size = 0 665 | if dict_12d is not None: 666 | with open(dict_12d, 'r') as f: 667 | s = f.read() 668 | self.pattern_response_12d = ast.literal_eval(s) 669 | self.pattern_response_12d_size = max(self.pattern_response_12d.values()) + 1 670 | 671 | # load 3x3 non response patterns 672 | self.pattern_non_response_3x3 = {} 673 | self.pattern_non_response_3x3_size = 0 674 | if dict_3x3 is not None: 675 | with open(dict_3x3, 'r') as f: 676 | s = f.read() 677 | self.pattern_non_response_3x3 = ast.literal_eval(s) 678 | self.pattern_non_response_3x3_size = max(self.pattern_non_response_3x3.values()) + 1 679 | 680 | if verbose: 681 | print("loaded " + str(self.pattern_nakade_size) + " nakade patterns") 682 | print("loaded " + str(self.pattern_response_12d_size) + " 12d patterns") 683 | print("loaded " + str(self.pattern_non_response_3x3_size) + " 3x3 patterns") 684 | 685 | self.feature_list = feature_list 686 | self.output_dim = 0 687 | 688 | # loop over feature_list add the corresponding function 689 | # and increment output_dim accordingly 690 | for i in range(len(feature_list)): 691 | feat = feature_list[ i ].lower() 692 | if feat == "board": 693 | processor = self.get_board 694 | self.output_dim += 3 695 | 696 | elif feat == "ones": 697 | processor = self.ones 698 | self.output_dim += 1 699 | 700 | elif feat == "turns_since": 701 | processor = self.get_turns_since 702 | self.output_dim += 8 703 | 704 | elif feat == "liberties": 705 | processor = self.get_liberties 706 | self.output_dim += 8 707 | 708 | elif feat == "ladder_capture": 709 | processor = self.get_ladder_capture 710 | self.output_dim += 1 711 | 712 | elif feat == "ladder_escape": 713 | processor = self.get_ladder_escape 714 | self.output_dim += 1 715 | 716 | elif feat == "sensibleness": 717 | processor = self.get_sensibleness 718 | self.output_dim += 1 719 | 720 | elif feat == "zeros": 721 | processor = self.zeros 722 | self.output_dim += 1 723 | 724 | elif feat == "legal": 725 | processor = self.get_legal 726 | self.output_dim += 1 727 | 728 | elif feat == "response": 729 | processor = self.get_response 730 | self.output_dim += 1 731 | 732 | elif feat == "save_atari": 733 | processor = self.get_save_atari 734 | self.output_dim += 1 735 | 736 | elif feat == "neighbor": 737 | processor = self.get_neighbor 738 | self.output_dim += 2 739 | 740 | elif feat == "nakade": 741 | processor = self.get_nakade 742 | self.output_dim += self.pattern_nakade_size 743 | 744 | elif feat == "response_12d": 745 | processor = self.get_response_12d 746 | self.output_dim += self.pattern_response_12d_size 747 | 748 | elif feat == "non_response_3x3": 749 | processor = self.get_non_response_3x3 750 | self.output_dim += self.pattern_non_response_3x3_size 751 | 752 | elif feat == "color": 753 | processor = self.colour 754 | self.output_dim += 1 755 | 756 | elif feat == "ko": 757 | processor = self.ko 758 | self.output_dim += 1 759 | else: 760 | 761 | # incorrect feature input 762 | raise ValueError("uknown feature: %s" % feat) 763 | 764 | self.processors[ i ] = processor 765 | 766 | 767 | def __dealloc__(self): 768 | """ 769 | Prevent memory leaks by freeing all arrays created with malloc 770 | """ 771 | 772 | if self.processors is not NULL: 773 | free(self.processors) 774 | 775 | ############################################################################ 776 | # public cdef function # 777 | # # 778 | ############################################################################ 779 | 780 | 781 | @cython.nonecheck(False) 782 | cdef np.ndarray[ tensor_type, ndim=4 ] generate_tensor(self, GameState state): 783 | """ 784 | Convert a GameState to a Theano-compatible tensor 785 | """ 786 | 787 | cdef int i 788 | cdef preprocess_method proc 789 | 790 | # create complete array now instead of concatenate later 791 | # TODO check if we can use a Malloc array somehow.. faster!! 792 | cdef np.ndarray[ tensor_type, ndim=2 ] np_tensor = np.zeros((self.output_dim, self.board_size), dtype=np.int8) 793 | cdef tensor_type[ :, ::1 ] tensor = np_tensor 794 | 795 | cdef int offSet = 0 796 | 797 | # loop over all processors and generate tensor 798 | for i in range(len(self.feature_list)): 799 | 800 | proc = self.processors[ i ] 801 | offSet = proc(self, state, tensor, offSet) 802 | 803 | # create a singleton 'batch' dimension 804 | return np_tensor.reshape((1, self.output_dim, self.size, self.size)) 805 | 806 | 807 | ############################################################################ 808 | # public def function (Python) # 809 | # # 810 | ############################################################################ 811 | 812 | 813 | def state_to_tensor(self, GameState state): 814 | """ 815 | Convert a GameState to a Theano-compatible tensor 816 | """ 817 | 818 | return self.generate_tensor(state) 819 | 820 | 821 | def get_output_dimension(self): 822 | """ 823 | return output_dim, the amount of planes an output tensor will have 824 | """ 825 | 826 | return self.output_dim 827 | 828 | 829 | def get_feature_list(self): 830 | """ 831 | return feature list 832 | """ 833 | 834 | return self.feature_list 835 | -------------------------------------------------------------------------------- /game.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | PASS = -1 3 | EMPTY = 2 4 | WHITE = 3 5 | BLACK = 4 6 | 7 | ''' 8 | | Feature | # of planes | Description 9 | |-----------------|-------------|------------------------- 10 | | Stone colour | 3 | Player stone / opponent stone / empty 11 | | Ones | 1 | A constant plane filled with 1 12 | | Turns since | 8 | How many turns since a move was played 13 | | Liberties | 8 | Number of liberties (empty adjacent points) 14 | | Capture size | 8 | How many opponent stones would be captured 15 | | Self-atari size | 8 | How many of own stones would be captured 16 | | Liberties after move | 8 | Number of liberties after this move is played 17 | | Ladder capture | 1 | Whether a move at this point is a successful ladder capture 18 | | Ladder escape | 1 | Whether a move at this point is a successful ladder escape 19 | | Sensibleness | 1 | Whether a move is legal and does not fill its own eyes 20 | | Zeros | 1 | A constant plane filled with 0 21 | | Player color | 1 | Whether current player is black (only for Value Network) 22 | ''' 23 | DEFAULT_FEATURES = [ 24 | "board", "ones", "turns_since", "liberties", "capture_size", 25 | "self_atari_size", "liberties_after", "ladder_capture", "ladder_escape", 26 | "sensibleness", "zeros"] 27 | 28 | def UpdateGroups(model, stones, liberties, captures, data, label, player): 29 | ''' 30 | stones: shape (mini_batch, 361, 361) type=BOOL 31 | | Features | # of planes | 32 | |-----------------|-------------| 33 | | Stones | 1 | 34 | | Liberties | 1 | 35 | | Captures | 1 | 36 | 37 | data: shape (mini_batch,48,19,19) 38 | label: type blob, shape (mini_batch,) 39 | player: BLACK or WHITE 40 | ''' 41 | black = model.ConstantFill([], 'BLACK', shape=[1,], value=BLACK) 42 | white = model.ConstantFill([], 'WHITE', shape=[1,], value=WHITE) 43 | 44 | # group of current position = self + intersect(union of neighbor groups, self stones) 45 | g_left = momodel.Slice([stones, [label-1,0], [label-1,-1]], 'g_left') # if label-1<0 should be null 46 | g_right = momodel.Slice([stones, [label-1,0], [label-1,-1]], 'g_right') # if label+1>360 should be null 47 | g_up = model.Slice([stones, [label-19,0], [label-19,-1]], 'g_up') # if label-19<0 should be null 48 | g_down = model.Slice([stones, [label+19,0], [label+19,-1]], 'g_down') # if label+19>360 should be null 49 | # union of neighbor groups 50 | g_self = model.Or([g_left, g_right], 'g_self') 51 | g_self = model.Or([g_self, g_up], 'g_self') 52 | g_self = model.Or([g_self, g_down], 'g_self') 53 | # board0 contains all player stones 54 | board0 = model.Slice([data, [N,0,0,0], [N,0,19,19]], 'board0') 55 | g_self = model.And([g_self, board0], 'g_self') 56 | # onehot to get self 57 | onehot = model.Cast(onehot, 'onehotb', to=BOOL) 58 | # group of current position 59 | g_self = model.Or([g_self, onehotb], 'g_self') 60 | 61 | # liberties of current group = SUM(liberties of neighbor) - 4 + 2 * SUM(liberties of self) 62 | # board2 contains all empty 63 | board2 = model.Slice([data, [N,2,0,0],[N,2,19,19]], 'board2') 64 | # liberties of self can be counted from board2 65 | l_self = model.Add(board2[label-1], board2[label+1], board2[label-19], board2[label+19], 'l_self') 66 | # liberties of neighbor can be counted from liberties 67 | l_neighbor = model.Add(liberties[label-1], liberties[label+1], liberties[label-19], liberties[label+19], 'l_neighbor') 68 | l_self = 2 * l_self + l_neighbor - 4 69 | 70 | # liberties of neighbor opponent = 71 | model.Substract(neighbor,1) # only if neighbor is independant group 72 | 73 | # Captures of current move = 74 | c_self = None 75 | 76 | # all stones in current group will update 77 | indices = model.LengthsRangeFill([361,], 'indices') # [0,1,...360] 78 | indices = model.BooleanMask([indices, g_self], 'indices') 79 | # 80 | stones = model.ScatterAssign([stones, indices, slice], g_self) # update inplace 81 | liberties = model.ScatterAssign([liberties, indices, slice], l_self) 82 | captures = model.ScatterAssign([captures, indices, slice], c_self) 83 | 84 | return stones, liberties, captures 85 | 86 | def UpdateLiberties(model, groups_after, data, label, player, batch_size=64): 87 | ''' 88 | groups_after: shape (mini_batch, 19x19, 19x19) type=BOOL 89 | ''' 90 | neighbors = np.zeros((19,19,21,21), dtype=np.bool) # constant represents neighbors, including borders 91 | for i in range(19): 92 | for j in range(19): 93 | neighbors[i, j, i, j+1] = True # ◌ ◌ ● ◌ ◌ 94 | neighbors[i, j, i+1, j] = True # ◌ ● ◌ ● ◌ 95 | neighbors[i, j, i+1, j+2] = True # 96 | neighbors[i, j, i+2, j+1] = True # ◌ ◌ ● ◌ ◌ 97 | # remove borders (19,19,21,21) => (19,19,19,19) 98 | neighbors = np.delete(neighbors, [0,20], axis=2) 99 | neighbors = np.delete(neighbors, [0,20], axis=3) 100 | NEIGHBORS = model.GivenTensorBoolFill([], 'neighbors', shape=[batch_size,361,361], values=neighbors) # 101 | # 102 | INDICES = model.LengthsRangeFill([361]*batch_size, 'indices') # N*[0,1,...360] 103 | 104 | current_group = model.BooleanMask([INDICES, groups_after[label]], 'current_group') # (N,361) 105 | group_neighbors = model.Or(NEIGHBORS[current_group], 'group_neighbors' ,axis=1) # (N,?) 106 | empties = model.Slice([data, [N,2,0,0],[N,2,19,19]], 'empties') # all empties on board[2] 107 | liberties_pos = model.And([group_neighbors, empties], 'liberties_pos') # (N,361) 108 | liberties_count = model.countTrue(liberties_pos, 'liberties_count', axis=1) # (N,) 109 | liberties_after = groups 110 | return liberties_after 111 | 112 | def UpdateGameStatus(model, data, predict): 113 | ''' UpdateGameStatus 114 | It does not consider symmetric, all games are treated independantly. 115 | Input: data with shape (N, C, H, W) 116 | predict with shape (N, C, H, W) 117 | Output: data with shape (N, C, H, W) 118 | ''' 119 | BOARD_SIZE = model.ConstantFill([], 'board_size', shape=[1,], value=361) # constant 120 | SPLIT_SIZE = model.GivenTensorIntFill([], 'split_size', shape=[15,], values=np.array([1,1,1,1,6,1,1,8,8,8,8,1,1,1,1])) # constant 121 | 122 | board0, board1, board2, ones3, \ 123 | turns_since4to9, turns_since10, turns_since11, liberties12to19, \ 124 | capture_size20to27, self_atari_size28to35, liberties_after36to43, \ 125 | ladder_capture44, ladder_escape45, sensibleness46, zeros47 = model.Split([data, SPLIT_SIZE], \ 126 | ['board0', 'board1', 'board2','ones3', \ 127 | 'turns_since4to9', 'turns_since10', 'turns_since11', 'liberties12to19', \ 128 | 'capture_size20to27', 'self_atari_size28to35', 'liberties_after36to43', \ 129 | 'ladder_capture44', 'ladder_escape45', 'sensibleness46', 'zeros47'], \ 130 | axis=1) 131 | 132 | _topk, topk_indices = model.TopK(predict, ['_topk', 'topk_indices'], k=1) #shape=(mini_batch,1) 133 | label = model.FlattenToVec([topk_indices], ['label']) # shape=(mini_batch,) 134 | 135 | onehot2d = model.OneHot([label, BOARD_SIZE], 'onehot2d') # shape=(mini_batch,361) 136 | onehot, _shape = model.Reshape(['onehot2d'], ['onehot', '_shape'], shape=(0,1,19,19)) #shape=(mini_batch,1,19,19) 137 | 138 | ## board 139 | # player of this turn = opponent of last turn 140 | board0n = board1 141 | # opponent of this turn = player of last turn 142 | board1n = model.Add([board0, onehot], 'board1n') 143 | # empty 144 | board2n = model.Sub([board2, onehot], 'board2n') 145 | ## ones 146 | ones3n = ones3 # all ONE 147 | ## turns since --- age the stones 148 | # for new move set age = 0 149 | turns_since4n = onehot 150 | # for age in [1..6] set age += 1 151 | turns_since5to10n = turns_since4to9 152 | # for age >= 7 set age = 8 153 | turns_since11n = model.Add([ turns_since10, turns_since11], ' turns_since11n') 154 | # liberties = liberties after move of last move 155 | liberties12to19n = liberties_after36to43 156 | # TBD: 157 | capture_size20to27n = capture_size20to27 158 | # TBD: 159 | self_atari_size28to35n = self_atari_size28to35 160 | # TBD: liberties after move 161 | liberties_after36to43n = liberties_after36to43 162 | # after this move, this stone (not group) has N vacant neighbor (N=0..3) 163 | # for neighbor opponent group, minus 1 liberties 164 | # if opponent group reaches 0 liberties, remove the stones 165 | # for neighbor self group, plus N-1 liberties 166 | # TBD: 167 | ladder_capture44n = ladder_capture44 168 | ladder_escape45n = ladder_escape45 169 | sensibleness46n = board2n 170 | ## zeros 171 | zeros47n = zeros47 172 | ### 173 | data, _dim = model.Concat([board0n, board1n, board2n, ones3n, \ 174 | turns_since4n, turns_since5to10n, turns_since11n, liberties12to19n, \ 175 | capture_size20to27n, self_atari_size28to35n, liberties_after36to43n, \ 176 | ladder_capture44n, ladder_escape45n, sensibleness46n, zeros47n], \ 177 | ['data','_dim'], axis=1) 178 | return data 179 | 180 | #def InitGame(model, mini_batch=64): 181 | # ZERO = np.zeros((mini_batch,1,19,19), dtype=np.float32) 182 | # ONE = np.ones((mini_batch,1,19,19), dtype=np.float32) 183 | # init_data = np.concatenate((ZERO,ZERO,ONE,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ZERO,ONE), axis=1) 184 | # workspace.FeedBlob("data", init_data) 185 | # 186 | # model = model_helper.ModelHelper(name="model", arg_scope={"order": "NCHW"}, init_params=True) 187 | # AddConvModel(model, "data", dim_in=13) 188 | # AddGamePlay(model, "data", "predict", mini_batch=mini_batch) 189 | # 190 | # workspace.RunNetOnce(model.param_init_net) 191 | # workspace.CreateNet(model.net, overwrite=True) 192 | # workspace.RunNet(model.net) 193 | # 194 | # init_move = np.reshape(workspace.FetchBlob('predict')[0], (-1)) # shape=(361,) 195 | # top_choice = np.argsort(-init_move)[0:mini_batch] # the top K step 196 | # 197 | # for i in range(mini_batch): 198 | # x = top_choice[i]/19 199 | # y = top_choice[i]%19 200 | # init_data[i,1,x,y] = 1 # opponent plus (x,y) 201 | # init_data[i,2,x,y] = 0 # empty minus (x,y) 202 | # init_data[i,4,x,y] = 1 # last 1 step plus (x,y) 203 | # init_data[i,12] = -1 204 | # 205 | # workspace.FeedBlob("data", init_data) 206 | # return data 207 | 208 | #def Symmetric(model, predict): 209 | # ''' Symmetric is optional 210 | # Input: predict with shape (N*8, C, H, W) 211 | # Output: symm_predict with shape (N*8, C, H, W) 212 | # ''' 213 | # # Unify 214 | # symm0, symm1, symm2, symm3, \ 215 | # symm4, symm5, symm6, symm7 = model.Split([predict], ['symm0', 'symm1', 'symm2', 'symm3', 216 | # 'symm4', 'symm5', 'symm6', 'symm7'], axis=0) 217 | # symm0u = symm0 218 | # symm1u = model.Flip(symm1, axes(3)) 219 | # symm2u = model.Flip(symm2, axes=(2)) 220 | # symm3u = model.Flip(symm3, axes=(2,3)) 221 | # symm4u = model.Transpose(symm4, axes=(0,1,3,2)) 222 | # symm5u = model.Flip(symm5, axes=(3)) 223 | # symm6u = model.Flip(symm6, axes=(2)) 224 | # symm7u = model.Flip(symm7, axes=(2,3)) 225 | # # Average 226 | # unify_predict = model.avg(symm0r, symm1r, ... symm7r) 227 | # # Diversify 228 | # symm0d = model.Reshape(unify_predict, Nx1x19x19) 229 | # symm1d = model.Flip(symm0d, axes=(3)) 230 | # symm2d = model.Flip(symm0d, axes=(2)) 231 | # symm3d = model.Flip(symm0d, axes=(2,3)) 232 | # symm4d = model.Transpose(symm0d, axes=(0,1,3,2)) 233 | # symm5d = model.Flip(symm4d, axes=(3)) 234 | # symm6d = model.Flip(symm4d, axes=(2)) 235 | # symm7d = model.Flip(symm4d, axes=(2,3)) 236 | # # shape(symm_predict) = [N*8,C,H,W] 237 | # symm_predict = model.concatenate(symm0, ... symm7) 238 | # return symm_predict 239 | 240 | -------------------------------------------------------------------------------- /modeling.py: -------------------------------------------------------------------------------- 1 | from caffe2.python import core, model_helper, brew, utils 2 | from caffe2.proto import caffe2_pb2 3 | 4 | def AddInput(model, batch_size, db, db_type): 5 | # Data is stored in INT8 while label is stored in UINT16 6 | # This will save disk storage 7 | data_int8, label_uint16 = model.TensorProtosDBInput( 8 | [], ['data_int8', 'label_uint16'], batch_size=batch_size, 9 | db=db, db_type=db_type) 10 | # cast data to float 11 | data = model.Cast(data_int8, 'data', to=core.DataType.FLOAT) 12 | # cast to int 13 | label_int32 = model.Cast(label_uint16, 'label_int32', to=core.DataType.INT32) 14 | label = model.FlattenToVec(label_int32, 'label') 15 | # don't need the gradient for the backward pass 16 | data = model.StopGradient(data, data) 17 | label = model.StopGradient(label, label) 18 | return data, label 19 | 20 | def AddConvModel(model, data, conv_level=13, filters=192, dim_in=48): 21 | # Layer 1: 48 x 19 x 19 -pad-> 48 x 23 x 23 -conv-> 192 x 19 x 19 22 | pad1 = model.PadImage(data, 'pad1', pad_t=2, pad_l=2, pad_b=2, pad_r=2, mode="constant", value=0.) 23 | conv1 = brew.conv(model, pad1, 'conv1', dim_in=dim_in, dim_out=filters, kernel=5) 24 | input = brew.relu(model, conv1, 'relu1') 25 | # Layer 2-12: 192 x 19 x 19 -pad-> 192 x 21 x 21 -conv-> 192 x 19 x 19 26 | def AddConvLevel(model, input, i, filters): 27 | pad = model.PadImage(input, 'pad{}'.format(i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.) 28 | conv = brew.conv(model, pad, 'conv{}'.format(i), dim_in=filters, dim_out=filters, kernel=3) 29 | relu = brew.relu(model, conv, 'relu{}'.format(i)) 30 | return relu 31 | for i in range(2, conv_level): 32 | output = AddConvLevel(model, input, i, filters) 33 | input = output 34 | # Layer 13: 192 x 19 x 19 -conv-> 1 x 19 x 19 -softmax-> 361 35 | conv13 = brew.conv(model, output, 'conv13', dim_in=filters, dim_out=1, kernel=1) 36 | predict = model.Flatten(conv13, 'predict') 37 | return predict 38 | 39 | def AddSoftmax(model, predict): 40 | softmax = brew.softmax(model, predict, 'softmax') 41 | return softmax 42 | 43 | def AddAccuracy(model, softmax, label, log=True): 44 | """Adds an accuracy op to the model""" 45 | accuracy = brew.accuracy(model, [softmax, label], "accuracy") 46 | if log: 47 | model.Print('accuracy', [], to_file=1) 48 | return accuracy 49 | 50 | def AddOneHot(model, label): 51 | """Decode onehot at modelling, not on the fly 52 | """ 53 | with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): 54 | BOARD_SIZE = model.param_init_net.ConstantFill([], 'BOARD_SIZE', shape=[1,], value=361) # constant 55 | label_int64 = model.Cast(label, 'label_int64', to=core.DataType.INT64) 56 | onehot = model.OneHot([label_int64, BOARD_SIZE], 'onehot') # shape=(mini_batch,361) 57 | onehot = model.StopGradient(onehot, onehot) 58 | return onehot 59 | 60 | def AddTrainingOperators(model, predict, label, expect, base_lr, log=True): 61 | """Adds training operators to the model. 62 | predict: Predicted distribution by Policy Model 63 | expect: Expected distribution by MCTS, or transformed from Policy Model 64 | base_lr: Base Learning Rate. Always fixed 65 | """ 66 | # compute the expected loss 67 | if label: 68 | onehot = AddOneHot(model, label) 69 | softmax, xent = model.SoftmaxWithLoss([predict, onehot], ['softmax', 'xent'], label_prob=1) 70 | AddAccuracy(model, softmax, label, log) 71 | else: 72 | softmax, xent = model.SoftmaxWithLoss([predict, expect], ['softmax', 'xent'], label_prob=1) 73 | loss = model.AveragedLoss(xent, "loss") 74 | # use the average loss we just computed to add gradient operators to the model 75 | model.AddGradientOperators([loss]) 76 | # do a simple stochastic gradient descent 77 | ITER = brew.iter(model, "iter") 78 | # set the learning rate schedule 79 | LR = model.LearningRate(ITER, "LR", base_lr=base_lr, policy="fixed") # when policy=fixed, stepsize and gamma are ignored 80 | # ONE is a constant value that is used in the gradient update. We only need 81 | # to create it once, so it is explicitly placed in param_init_net. 82 | ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) 83 | # Now, for each parameter, we do the gradient updates. 84 | for param in model.params: 85 | # Note how we get the gradient of each parameter - ModelHelper keeps 86 | # track of that. 87 | param_grad = model.param_to_grad[param] 88 | # The update is a simple weighted sum: param = param + param_grad * LR 89 | model.WeightedSum([param, ONE, param_grad, LR], param) 90 | if log: 91 | model.Print('loss', [], to_file=1) 92 | -------------------------------------------------------------------------------- /modelingZero.py: -------------------------------------------------------------------------------- 1 | from caffe2.python import core, model_helper, brew, utils, workspace 2 | from caffe2.proto import caffe2_pb2 3 | 4 | def AddInput(model, batch_size, db, db_type): 5 | # Data is stored in INT8 while label is stored in INT32 and reward is stored in FLOAT 6 | # This will save disk storage 7 | data_int8, label_int32, reward_float = model.TensorProtosDBInput( 8 | [], ['data_int8', 'label_int32', 'reward_float'], batch_size=batch_size, 9 | db=db, db_type=db_type) 10 | # cast data to float 11 | data = model.Cast(data_int8, 'data', to=core.DataType.FLOAT) 12 | label = model.Cast(label_int32, 'label', to=core.DataType.INT32) 13 | reward = model.Cast(reward_float, 'reward', to=core.DataType.FLOAT) 14 | # don't need the gradient for the backward pass 15 | data = model.StopGradient(data, data) 16 | label = model.StopGradient(label, label) 17 | reward = model.StopGradient(reward, reward) 18 | return data, label, reward 19 | 20 | def AddResNetModel(model, data, num_blocks=19, filters=256, dim_in=17, is_test=True): 21 | """ 22 | params 23 | data: Data Input in shape of NCHW. 24 | num_blocks: Number of Residual Tower. Each block contains 2 convolution. Default 19 25 | filters: Number of output Channel of NCHW. Default 256 26 | dim_in: Number of input Channel of NCHW. Default 17. i.e. 27 | returns 28 | predict: unscaled prediction, need Softmax to translate to probabilities 29 | value: scaled value [-1,1] 30 | """ 31 | # Layer 1: 17 x 19 x 19 -pad-> 17 x 21 x 21 -conv-> 256 x 19 x 19 32 | pad1 = model.PadImage(data, 'pad1', pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.) 33 | conv1 = brew.conv(model, pad1, 'conv1', dim_in=dim_in, dim_out=filters, kernel=3) 34 | norm1 = brew.spatial_bn(model, conv1, 'norm1', filters, is_test=is_test) 35 | res_in = brew.relu(model, norm1, 'relu1') 36 | # Blocks: 256 x 19 x 19 -conv-> -normalize-> -relu-> -conv-> -normalize-> +INPUT -relu-> 256 x 19 x 19 37 | def AddResBlock(model, input, i, filters, scope='res'): 38 | pad1 = model.PadImage(input, '{}/{}/pad1'.format(scope,i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.) 39 | conv1 = brew.conv(model, pad1, '{}/{}/conv1'.format(scope,i), dim_in=filters, dim_out=filters, kernel=3) 40 | norm1 = brew.spatial_bn(model, conv1, '{}/{}/norm1'.format(scope,i), filters, is_test=is_test) 41 | relu1 = brew.relu(model, norm1, '{}/{}/relu1'.format(scope,i)) 42 | pad2 = model.PadImage(relu1, '{}/{}/pad2'.format(scope,i), pad_t=1, pad_l=1, pad_b=1, pad_r=1, mode="constant", value=0.) 43 | conv2 = brew.conv(model, pad2, '{}/{}/conv2'.format(scope,i), dim_in=filters, dim_out=filters, kernel=3) 44 | norm2 = brew.spatial_bn(model, conv2, '{}/{}/norm2'.format(scope,i), filters, is_test=is_test) 45 | res = model.Add([norm2, input], '{}/{}/res'.format(scope,i)) 46 | output = brew.relu(model, res, '{}/{}/relu2'.format(scope,i)) 47 | return output 48 | for i in range(num_blocks): 49 | res_out = AddResBlock(model, res_in, i, filters) 50 | res_in = res_out 51 | # Policy Head: 256 x 19 x 19 -conv-> 2 x 19 x 19 -normalize-> -relu-> -FC-> 362 52 | ph_conv1 = brew.conv(model, res_out, 'ph/conv1', dim_in=filters, dim_out=2, kernel=1) 53 | ph_norm1 = brew.spatial_bn(model, ph_conv1, 'ph/norm1', 2, is_test=is_test) 54 | ph_relu1 = brew.relu(model, ph_norm1, 'ph/relu1') 55 | ph_fc = brew.fc(model, ph_relu1, 'ph/fc', dim_in=2*19*19, dim_out=362) 56 | predict = model.Flatten(ph_fc, 'predict') 57 | # Value Head: 256 x 19 x 19 -conv-> 1 x 19 x 19 -> -normalize-> -relu-> -FC-> 256 x 19 x19 -relu-> -FC-> 1(scalar) -tanh-> 58 | vh_conv1 = brew.conv(model, res_out, 'vh/conv1', dim_in=filters, dim_out=1, kernel=1) 59 | vh_norm1 = brew.spatial_bn(model, vh_conv1, 'vh/norm1', 1, is_test=is_test) 60 | vh_relu1 = brew.relu(model, vh_norm1, 'vh/relu1') 61 | vh_fc1 = brew.fc(model, vh_relu1, 'vh/fc1', dim_in=1*19*19, dim_out=filters*19*19) 62 | vh_relu2 = brew.relu(model, vh_fc1, 'vh/relu2') 63 | vh_fc2 = brew.fc(model, vh_relu2, 'vh/fc2', dim_in=filters*19*19, dim_out=1) 64 | vh_tanh = brew.tanh(model, vh_fc2, 'vh/tanh') 65 | value = model.FlattenToVec(vh_tanh, 'value') 66 | return predict, value 67 | 68 | def AddSoftmax(model, predict): 69 | softmax = brew.softmax(model, predict, 'softmax') 70 | return softmax 71 | 72 | def AddAccuracy(model, softmax, label, log=True): 73 | """Adds an accuracy op to the model""" 74 | accuracy = brew.accuracy(model, [softmax, label], "accuracy") 75 | if log: 76 | model.Print('accuracy', [], to_file=1) 77 | return accuracy 78 | 79 | def AddOneHot(model, label): 80 | """Decode onehot at modelling, not on the fly 81 | """ 82 | with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): 83 | BOARD_SIZE = model.param_init_net.ConstantFill([], 'BOARD_SIZE', shape=[1,], value=362) # constant 84 | label_int64 = model.Cast(label, 'label_int64', to=core.DataType.INT64) 85 | onehot = model.OneHot([label_int64, BOARD_SIZE], 'onehot') # shape=(mini_batch,362) 86 | onehot = model.StopGradient(onehot, onehot) 87 | return onehot 88 | 89 | def AddTrainingOperators(model, predict, expect, value, reward, 90 | base_lr=-0.1, policy='fixed', stepsize=200000, gamma=0.1, log=True): 91 | """Adds training operators to the model. 92 | params 93 | predict: Predicted move by Policy Model, unscaled, in shape (N,362) 94 | label: Labelled move in shape (N,) 95 | expect: Expected distribution by MCTS, in shape (N,362) 96 | value: Predicted value by Value Model, scalar value in (-1,1) 97 | reward: Labelled value, scalar value in {-1,1} 98 | base_lr: Base Learning Rate. Policy is always fixed 99 | log: Whether to log the loss and accuracy in file, default True 100 | """ 101 | _, xent = model.SoftmaxWithLoss([predict, expect], ['_', 'xent'], label_prob=1) 102 | #loss1 = model.AveragedLoss(xent, 'loss1') 103 | msqrl2 = model.AveragedLoss(model.SquaredL2Distance([value, reward], None), 'msqrl2') 104 | loss = model.Add([xent, msqrl2], 'loss') 105 | # use the average loss we just computed to add gradient operators to the model 106 | #model.AddGradientOperators([xent, loss2]) 107 | model.AddGradientOperators([loss]) 108 | # do a simple stochastic gradient descent 109 | ITER = brew.iter(model, "iter") 110 | # set the learning rate schedule 111 | LR = model.LearningRate(ITER, "LR", base_lr=base_lr, policy=policy, stepsize=stepsize, gamma=gamma) 112 | # ONE is a constant value that is used in the gradient update. We only need 113 | # to create it once, so it is explicitly placed in param_init_net. 114 | ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) 115 | # Now, for each parameter, we do the gradient updates. 116 | for param in model.params: 117 | # Note how we get the gradient of each parameter - ModelHelper keeps 118 | # track of that. 119 | param_grad = model.param_to_grad[param] 120 | # The update is a simple weighted sum: param = param + param_grad * LR 121 | model.WeightedSum([param, ONE, param_grad, LR], param) 122 | if log: 123 | model.Print('loss', [], to_file=1) 124 | model.Print('xent', [], to_file=1) 125 | model.Print('msqrl2', [], to_file=1) 126 | 127 | def LoadParams(load_from): 128 | init_def = caffe2_pb2.NetDef() 129 | with open(load_from, 'r') as f: 130 | init_def.ParseFromString(f.read()) 131 | #init_def.device_option.CopyFrom(device_opts) 132 | workspace.RunNetOnce(init_def.SerializeToString()) 133 | 134 | def SaveParams(model, save_to) : 135 | init_net = caffe2_pb2.NetDef() 136 | for param in model.params: 137 | blob = workspace.FetchBlob(param) 138 | shape = blob.shape 139 | op = core.CreateOperator("GivenTensorFill", 140 | [], 141 | [param], 142 | arg=[utils.MakeArgument("shape", shape),utils.MakeArgument("values", blob)]) 143 | init_net.op.extend([op]) 144 | with open(save_to, 'wb') as f: 145 | f.write(init_net.SerializeToString()) 146 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | from distutils.core import setup 4 | from Cython.Build import cythonize 5 | 6 | setup( 7 | 8 | name='RocAlphaGo', 9 | # list with files to be cythonized 10 | ext_modules=cythonize(["RocAlphaGo/go.pyx", "RocAlphaGo/go_data.pyx", 11 | "RocAlphaGo/preprocessing.pyx", "RocAlphaGo/preprocessing_rollout.pyx"]), 12 | # include numpy 13 | include_dirs=[numpy.get_include(), 14 | os.path.join(numpy.get_include(), 'numpy')] 15 | ) 16 | 17 | """ 18 | install all necessary dependencies using: 19 | pip install -r requirements.txt 20 | 21 | run setup with command: 22 | python setup.py build_ext --inplace 23 | 24 | be aware cython uses a depricaped version of numpy this results in a lot of warnings 25 | 26 | you can run all unittests to verify everything works as it should: 27 | python -m unittest discover 28 | """ 29 | -------------------------------------------------------------------------------- /sgfutil.py: -------------------------------------------------------------------------------- 1 | import sgf, os 2 | from go import GameState, BLACK, WHITE, EMPTY 3 | from datetime import datetime 4 | 5 | # BOARD_POSITION contains SGF symbol which represents each row (or column) of the board 6 | # It can be used to convert between 0,1,2,3... and a,b,c,d... 7 | # Symbol [tt] or [] represents PASS in SGF, therefore is omitted 8 | BOARD_POSITION = 'abcdefghijklmnopqrs' 9 | 10 | def GetWinner(game_state): 11 | winner = game_state.get_winner() 12 | if winner == BLACK: 13 | return 'B+' 14 | elif winner == WHITE: 15 | return 'W+' 16 | else: 17 | return 'T' 18 | 19 | def WriteBackSGF(winner, history, filename, PB=None, PW=None, Komi='7.5'): 20 | parser = sgf.Parser() 21 | collection = sgf.Collection(parser) 22 | # game properties 23 | parser.start_gametree() 24 | parser.start_node() 25 | parser.start_property('FF') # SGF format version 26 | parser.add_prop_value('4') 27 | parser.end_property() 28 | parser.start_property('SZ') # Board Size = 19 29 | parser.add_prop_value('19') 30 | parser.end_property() 31 | parser.start_property('KM') # default Komi = 7.5 32 | parser.add_prop_value(str(Komi)) 33 | parser.end_property() 34 | parser.start_property('PB') # Black Player = Supervised Learning / Reinforced Learning 35 | parser.add_prop_value('RL-{}'.format(PB)) 36 | parser.end_property() 37 | parser.start_property('PW') # White Player = Supervised Learning / Reinforced Learning 38 | parser.add_prop_value('SL-{}'.format(PW)) 39 | parser.end_property() 40 | parser.start_property('DT') # Game Date 41 | parser.add_prop_value(datetime.now().strftime("%Y-%m-%d")) 42 | parser.end_property() 43 | parser.start_property('RE') # Result = B+, W+, T 44 | parser.add_prop_value(str(winner)) 45 | parser.end_property() 46 | parser.end_node() 47 | # start of game 48 | for step in history: 49 | parser.start_node() 50 | parser.start_property(step[0]) # or W 51 | parser.add_prop_value(BOARD_POSITION[step[1]/19]+BOARD_POSITION[step[1]%19]) 52 | parser.end_property() 53 | parser.end_node() 54 | # end of game 55 | parser.end_gametree() 56 | # record the game in SGF 57 | with open(os.path.join('{}.sgf'.format(filename)), "w") as f: 58 | collection.output(f) 59 | --------------------------------------------------------------------------------