├── .gitignore
├── C0uci.bat
├── LICENSE.txt
├── data
    └── model
    │   ├── model_best_config.json
    │   └── model_best_weight.h5
├── model.png
├── readme.md
├── requirements.txt
└── src
    └── chess_zero
        ├── agent
            ├── api_chess.py
            ├── model_chess.py
            └── player_chess.py
        ├── config.py
        ├── configs
            ├── distributed.py
            ├── mini.py
            └── normal.py
        ├── env
            └── chess_env.py
        ├── lib
            ├── data_helper.py
            ├── logger.py
            ├── model_helper.py
            └── tf_util.py
        ├── manager.py
        ├── play_game
            └── uci.py
        ├── run.py
        ├── stacktracer.py
        └── worker
            ├── evaluate.py
            ├── optimize.py
            ├── self_play.py
            └── sl.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | .python-version
 3 | *.mdl
 4 | *.png
 5 | *.pyc
 6 | __pycache__
 7 | .DS_Store
 8 | *.pkl
 9 | *.csv
10 | *.gz
11 | /data/
12 | .ipynb_checkpoints/
13 | tmp/
14 | tmp.*
15 | .env
16 | *.bin
17 | /bin/
18 | /keys/*
19 | video/*
20 | *.h5
21 | logs/
22 | *.sh


--------------------------------------------------------------------------------
/C0uci.bat:
--------------------------------------------------------------------------------
1 | python src/chess_zero/run.py uci


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Samuel Gravan (part of the code is due to Ken Morishita)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/data/model/model_best_config.json:
--------------------------------------------------------------------------------
1 | {"name": "chess_model", "layers": [{"name": "input_1", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 18, 8, 8], "dtype": "float32", "sparse": false, "name": "input_1"}, "inbound_nodes": []}, {"name": "input_conv-5-256", "class_name": "Conv2D", "config": {"name": "input_conv-5-256", "trainable": true, "filters": 256, "kernel_size": [5, 5], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"name": "input_batchnorm", "class_name": "BatchNormalization", "config": {"name": "input_batchnorm", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["input_conv-5-256", 0, 0, {}]]]}, {"name": "input_relu", "class_name": "Activation", "config": {"name": "input_relu", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["input_batchnorm", 0, 0, {}]]]}, {"name": "res1_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res1_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_relu", 0, 0, {}]]]}, {"name": "res1_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res1_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res1_conv1-3-256", 0, 0, {}]]]}, {"name": "res1_relu1", "class_name": "Activation", "config": {"name": "res1_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res1_batchnorm1", 0, 0, {}]]]}, {"name": "res1_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res1_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res1_relu1", 0, 0, {}]]]}, {"name": "res1_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res1_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res1_conv2-3-256", 0, 0, {}]]]}, {"name": "res1_add", "class_name": "Add", "config": {"name": "res1_add", "trainable": true}, "inbound_nodes": [[["input_relu", 0, 0, {}], ["res1_batchnorm2", 0, 0, {}]]]}, {"name": "res1_relu2", "class_name": "Activation", "config": {"name": "res1_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res1_add", 0, 0, {}]]]}, {"name": "res2_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res2_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res1_relu2", 0, 0, {}]]]}, {"name": "res2_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res2_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res2_conv1-3-256", 0, 0, {}]]]}, {"name": "res2_relu1", "class_name": "Activation", "config": {"name": "res2_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res2_batchnorm1", 0, 0, {}]]]}, {"name": "res2_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res2_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res2_relu1", 0, 0, {}]]]}, {"name": "res2_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res2_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res2_conv2-3-256", 0, 0, {}]]]}, {"name": "res2_add", "class_name": "Add", "config": {"name": "res2_add", "trainable": true}, "inbound_nodes": [[["res1_relu2", 0, 0, {}], ["res2_batchnorm2", 0, 0, {}]]]}, {"name": "res2_relu2", "class_name": "Activation", "config": {"name": "res2_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res2_add", 0, 0, {}]]]}, {"name": "res3_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res3_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res2_relu2", 0, 0, {}]]]}, {"name": "res3_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res3_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res3_conv1-3-256", 0, 0, {}]]]}, {"name": "res3_relu1", "class_name": "Activation", "config": {"name": "res3_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res3_batchnorm1", 0, 0, {}]]]}, {"name": "res3_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res3_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res3_relu1", 0, 0, {}]]]}, {"name": "res3_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res3_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res3_conv2-3-256", 0, 0, {}]]]}, {"name": "res3_add", "class_name": "Add", "config": {"name": "res3_add", "trainable": true}, "inbound_nodes": [[["res2_relu2", 0, 0, {}], ["res3_batchnorm2", 0, 0, {}]]]}, {"name": "res3_relu2", "class_name": "Activation", "config": {"name": "res3_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res3_add", 0, 0, {}]]]}, {"name": "res4_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res4_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res3_relu2", 0, 0, {}]]]}, {"name": "res4_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res4_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res4_conv1-3-256", 0, 0, {}]]]}, {"name": "res4_relu1", "class_name": "Activation", "config": {"name": "res4_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res4_batchnorm1", 0, 0, {}]]]}, {"name": "res4_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res4_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res4_relu1", 0, 0, {}]]]}, {"name": "res4_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res4_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res4_conv2-3-256", 0, 0, {}]]]}, {"name": "res4_add", "class_name": "Add", "config": {"name": "res4_add", "trainable": true}, "inbound_nodes": [[["res3_relu2", 0, 0, {}], ["res4_batchnorm2", 0, 0, {}]]]}, {"name": "res4_relu2", "class_name": "Activation", "config": {"name": "res4_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res4_add", 0, 0, {}]]]}, {"name": "res5_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res5_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res4_relu2", 0, 0, {}]]]}, {"name": "res5_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res5_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res5_conv1-3-256", 0, 0, {}]]]}, {"name": "res5_relu1", "class_name": "Activation", "config": {"name": "res5_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res5_batchnorm1", 0, 0, {}]]]}, {"name": "res5_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res5_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res5_relu1", 0, 0, {}]]]}, {"name": "res5_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res5_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res5_conv2-3-256", 0, 0, {}]]]}, {"name": "res5_add", "class_name": "Add", "config": {"name": "res5_add", "trainable": true}, "inbound_nodes": [[["res4_relu2", 0, 0, {}], ["res5_batchnorm2", 0, 0, {}]]]}, {"name": "res5_relu2", "class_name": "Activation", "config": {"name": "res5_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res5_add", 0, 0, {}]]]}, {"name": "res6_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res6_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res5_relu2", 0, 0, {}]]]}, {"name": "res6_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res6_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res6_conv1-3-256", 0, 0, {}]]]}, {"name": "res6_relu1", "class_name": "Activation", "config": {"name": "res6_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res6_batchnorm1", 0, 0, {}]]]}, {"name": "res6_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res6_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res6_relu1", 0, 0, {}]]]}, {"name": "res6_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res6_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res6_conv2-3-256", 0, 0, {}]]]}, {"name": "res6_add", "class_name": "Add", "config": {"name": "res6_add", "trainable": true}, "inbound_nodes": [[["res5_relu2", 0, 0, {}], ["res6_batchnorm2", 0, 0, {}]]]}, {"name": "res6_relu2", "class_name": "Activation", "config": {"name": "res6_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res6_add", 0, 0, {}]]]}, {"name": "res7_conv1-3-256", "class_name": "Conv2D", "config": {"name": "res7_conv1-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res6_relu2", 0, 0, {}]]]}, {"name": "res7_batchnorm1", "class_name": "BatchNormalization", "config": {"name": "res7_batchnorm1", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res7_conv1-3-256", 0, 0, {}]]]}, {"name": "res7_relu1", "class_name": "Activation", "config": {"name": "res7_relu1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res7_batchnorm1", 0, 0, {}]]]}, {"name": "res7_conv2-3-256", "class_name": "Conv2D", "config": {"name": "res7_conv2-3-256", "trainable": true, "filters": 256, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res7_relu1", 0, 0, {}]]]}, {"name": "res7_batchnorm2", "class_name": "BatchNormalization", "config": {"name": "res7_batchnorm2", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["res7_conv2-3-256", 0, 0, {}]]]}, {"name": "res7_add", "class_name": "Add", "config": {"name": "res7_add", "trainable": true}, "inbound_nodes": [[["res6_relu2", 0, 0, {}], ["res7_batchnorm2", 0, 0, {}]]]}, {"name": "res7_relu2", "class_name": "Activation", "config": {"name": "res7_relu2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["res7_add", 0, 0, {}]]]}, {"name": "value_conv-1-4", "class_name": "Conv2D", "config": {"name": "value_conv-1-4", "trainable": true, "filters": 4, "kernel_size": [1, 1], "strides": [1, 1], "padding": "valid", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res7_relu2", 0, 0, {}]]]}, {"name": "policy_conv-1-2", "class_name": "Conv2D", "config": {"name": "policy_conv-1-2", "trainable": true, "filters": 2, "kernel_size": [1, 1], "strides": [1, 1], "padding": "valid", "data_format": "channels_first", "dilation_rate": [1, 1], "activation": "linear", "use_bias": false, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["res7_relu2", 0, 0, {}]]]}, {"name": "value_batchnorm", "class_name": "BatchNormalization", "config": {"name": "value_batchnorm", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["value_conv-1-4", 0, 0, {}]]]}, {"name": "policy_batchnorm", "class_name": "BatchNormalization", "config": {"name": "policy_batchnorm", "trainable": true, "axis": 1, "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["policy_conv-1-2", 0, 0, {}]]]}, {"name": "value_relu", "class_name": "Activation", "config": {"name": "value_relu", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["value_batchnorm", 0, 0, {}]]]}, {"name": "policy_relu", "class_name": "Activation", "config": {"name": "policy_relu", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["policy_batchnorm", 0, 0, {}]]]}, {"name": "value_flatten", "class_name": "Flatten", "config": {"name": "value_flatten", "trainable": true}, "inbound_nodes": [[["value_relu", 0, 0, {}]]]}, {"name": "policy_flatten", "class_name": "Flatten", "config": {"name": "policy_flatten", "trainable": true}, "inbound_nodes": [[["policy_relu", 0, 0, {}]]]}, {"name": "value_dense", "class_name": "Dense", "config": {"name": "value_dense", "trainable": true, "units": 256, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["value_flatten", 0, 0, {}]]]}, {"name": "policy_out", "class_name": "Dense", "config": {"name": "policy_out", "trainable": true, "units": 1968, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["policy_flatten", 0, 0, {}]]]}, {"name": "value_out", "class_name": "Dense", "config": {"name": "value_out", "trainable": true, "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 9.999999747378752e-05}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["value_dense", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["policy_out", 0, 0], ["value_out", 0, 0]]}


--------------------------------------------------------------------------------
/data/model/model_best_weight.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Akababa/Chess-Zero/90a5aad05656131506239388557b9f60d16235a3/data/model/model_best_weight.h5


--------------------------------------------------------------------------------
/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Akababa/Chess-Zero/90a5aad05656131506239388557b9f60d16235a3/model.png


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | About
  2 | =====
  3 | 
  4 | Chess reinforcement learning by [AlphaGo Zero](https://deepmind.com/blog/alphago-zero-learning-scratch/) methods.
  5 | 
  6 | This project is based on these main resources:
  7 | 1) DeepMind's Oct 19th publication: [Mastering the Game of Go without Human Knowledge](https://www.nature.com/articles/nature24270.epdf?author_access_token=VJXbVjaSHxFoctQQ4p2k4tRgN0jAjWel9jnR3ZoTv0PVW4gB86EEpGqTRDtpIz-2rmo8-KG06gqVobU5NSCFeHILHcVFUeMsbvwS-lxjqQGg98faovwjxeTUgZAUMnRQ).
  8 | 2) The <b>great</b> Reversi development of the DeepMind ideas that @mokemokechicken did in his repo: https://github.com/mokemokechicken/reversi-alpha-zero
  9 | 3) DeepMind just released a new version of AlphaGo Zero (named now AlphaZero) where they master chess from scratch: 
 10 | https://arxiv.org/pdf/1712.01815.pdf. In fact, in chess AlphaZero outperformed Stockfish after just 4 hours (300k steps) Wow!
 11 | 
 12 | See the [wiki](https://github.com/Akababa/Chess-Zero/wiki) for more details.
 13 | 
 14 | Note: <b>This project is still under construction!!</b>
 15 | 
 16 | Environment
 17 | -----------
 18 | 
 19 | * Python 3.6.3
 20 | * tensorflow-gpu: 1.3.0
 21 | * Keras: 2.0.8
 22 | 
 23 | ### Results so far
 24 | 
 25 | Using supervised learning on about 10k games, I trained a model (7 residual blocks of 256 filters) to a guesstimate of 1200 elo with 1200 sims/move. One of the strengths of MCTS is it scales quite well with computing power.
 26 | 
 27 | Here you can see an example of a game I (white, ~2000 elo) played against the model in this repo (black):
 28 | 
 29 | ![img](https://user-images.githubusercontent.com/4205182/34323276-ecd2a7b6-e806-11e7-856a-4e2394bd75df.gif)
 30 | 
 31 | Modules
 32 | -------
 33 | 
 34 | ### Supervised Learning
 35 | 
 36 | I've done a supervised learning new pipeline step (to use those human games files "PGN" we can find in internet as play-data generator).
 37 | This SL step was also used in the first and original version of AlphaGo and maybe chess is a some complex game that we have to pre-train first the policy model before starting the self-play process (i.e., maybe chess is too much complicated for a self training alone).
 38 | 
 39 | To use the new SL process is as simple as running in the beginning instead of the worker "self" the new worker "sl".
 40 | Once the model converges enough with SL play-data we just stop the worker "sl" and start the worker "self" so the model will start improving now due to self-play data.
 41 | 
 42 | ```bash
 43 | python src/chess_zero/run.py sl
 44 | ```
 45 | If you want to use this new SL step you will have to download big PGN files (chess files) and paste them into the `data/play_data` folder ([FICS](http://ficsgames.org/download.html) is a good source of data). You can also use the [SCID program](http://scid.sourceforge.net/) to filter by headers like player ELO, game result and more.
 46 | 
 47 | **To avoid overfitting, I recommend using data sets of at least 3000 games and running at most 3-4 epochs.**
 48 | 
 49 | ### Reinforcement Learning
 50 | 
 51 | This AlphaGo Zero implementation consists of three workers: `self`, `opt` and `eval`.
 52 | 
 53 | * `self` is Self-Play to generate training data by self-play using BestModel.
 54 | * `opt` is Trainer to train model, and generate next-generation models.
 55 | * `eval` is Evaluator to evaluate whether the next-generation model is better than BestModel. If better, replace BestModel.
 56 | 
 57 | 
 58 | ### Distributed Training
 59 | 
 60 | Now it's possible to train the model in a distributed way. The only thing needed is to use the new parameter:
 61 | 
 62 | * `--type distributed`: use mini config for testing, (see `src/chess_zero/configs/distributed.py`)
 63 | 
 64 | So, in order to contribute to the distributed team you just need to run the three workers locally like this:
 65 | 
 66 | ```bash
 67 | python src/chess_zero/run.py self --type distributed (or python src/chess_zero/run.py sl --type distributed)
 68 | python src/chess_zero/run.py opt --type distributed
 69 | python src/chess_zero/run.py eval --type distributed
 70 | ```
 71 | 
 72 | ### GUI
 73 | * `uci` launches the Universal Chess Interface, for use in a GUI.
 74 | 
 75 | To set up ChessZero with a GUI, point it to `C0uci.bat` (or rename to .sh).
 76 | For example, this is screenshot of the random model using Arena's self-play feature:
 77 | ![capture](https://user-images.githubusercontent.com/4205182/34057277-e9c99118-e19b-11e7-91ee-dd717f7efe9d.PNG)
 78 | 
 79 | Data
 80 | -----
 81 | 
 82 | * `data/model/model_best_*`: BestModel.
 83 | * `data/model/next_generation/*`: next-generation models.
 84 | * `data/play_data/play_*.json`: generated training data.
 85 | * `logs/main.log`: log file.
 86 |   
 87 | If you want to train the model from the beginning, delete the above directories.
 88 | 
 89 | How to use
 90 | ==========
 91 | 
 92 | Setup
 93 | -------
 94 | ### install libraries
 95 | ```bash
 96 | pip install -r requirements.txt
 97 | ```
 98 | 
 99 | If you want to use GPU,
100 | 
101 | ```bash
102 | pip install tensorflow-gpu
103 | ```
104 | 
105 | Make sure Keras is using Tensorflow and you have Python 3.6.3+.
106 | 
107 | 
108 | Basic Usage
109 | ------------
110 | 
111 | For training model, execute `Self-Play`, `Trainer` and `Evaluator`. 
112 | 
113 | 
114 | Self-Play
115 | --------
116 | 
117 | ```bash
118 | python src/chess_zero/run.py self
119 | ```
120 | 
121 | When executed, Self-Play will start using BestModel.
122 | If the BestModel does not exist, new random model will be created and become BestModel.
123 | 
124 | ### options
125 | * `--new`: create new BestModel
126 | * `--type mini`: use mini config for testing, (see `src/chess_zero/configs/mini.py`)
127 | 
128 | Trainer
129 | -------
130 | 
131 | ```bash
132 | python src/chess_zero/run.py opt
133 | ```
134 | 
135 | When executed, Training will start.
136 | A base model will be loaded from latest saved next-generation model. If not existed, BestModel is used.
137 | Trained model will be saved every epoch. 
138 | 
139 | ### options
140 | * `--type mini`: use mini config for testing, (see `src/chess_zero/configs/mini.py`)
141 | * `--total-step`: specify total step(mini-batch) numbers. The total step affects learning rate of training. 
142 | 
143 | Evaluator
144 | ---------
145 | 
146 | ```bash
147 | python src/chess_zero/run.py eval
148 | ```
149 | 
150 | When executed, Evaluation will start.
151 | It evaluates BestModel and the latest next-generation model by playing about 200 games.
152 | If next-generation model wins, it becomes BestModel. 
153 | 
154 | ### options
155 | * `--type mini`: use mini config for testing, (see `src/chess_zero/configs/mini.py`)
156 | 
157 | 
158 | Tips and Memory
159 | ====
160 | 
161 | GPU Memory
162 | ----------
163 | 
164 | Usually the lack of memory cause warnings, not error.
165 | If error happens, try to change `vram_frac` in `src/configs/mini.py`,
166 | 
167 | ```python
168 | self.vram_frac = 1.0
169 | ```
170 | 
171 | Smaller batch_size will reduce memory usage of `opt`.
172 | Try to change `TrainerConfig#batch_size` in `MiniConfig`.
173 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu
2 | keras
3 | profilehooks
4 | numpy
5 | pyperclip
6 | python-chess
7 | ujson
8 | h5py


--------------------------------------------------------------------------------
/src/chess_zero/agent/api_chess.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import connection, Pipe
 2 | import time
 3 | from threading import Thread
 4 | 
 5 | import numpy as np
 6 | 
 7 | from chess_zero.config import Config
 8 | 
 9 | 
10 | class ChessModelAPI:
11 |     # noinspection PyUnusedLocal
12 |     def __init__(self, config: Config, agent_model):  # ChessModel
13 |         self.agent_model = agent_model
14 |         self.pipes = []
15 | 
16 |     def start(self):
17 |         prediction_worker = Thread(target=self.predict_batch_worker, name="prediction_worker")
18 |         prediction_worker.daemon = True
19 |         prediction_worker.start()
20 | 
21 |     def get_pipe(self):
22 |         me, you = Pipe()
23 |         self.pipes.append(me)
24 |         return you
25 | 
26 |     def predict_batch_worker(self):
27 |         while True:
28 |             ready = connection.wait(self.pipes,timeout=0.001)
29 |             if not ready:
30 |                 continue
31 |             data, result_pipes = [], []
32 |             for pipe in ready:
33 |                 while pipe.poll():
34 |                     data.append(pipe.recv())
35 |                     result_pipes.append(pipe)
36 |             # print(f"predicting {len(result_pipes)} items")
37 |             data = np.asarray(data, dtype=np.float32)
38 |             policy_ary, value_ary = self.agent_model.model.predict_on_batch(data)
39 |             for pipe, p, v in zip(result_pipes, policy_ary, value_ary):
40 |                 pipe.send((p, float(v)))
41 | 


--------------------------------------------------------------------------------
/src/chess_zero/agent/model_chess.py:
--------------------------------------------------------------------------------
  1 | import ftplib
  2 | import hashlib
  3 | import json
  4 | import os
  5 | from logging import getLogger
  6 | 
  7 | from keras.engine.topology import Input
  8 | from keras.engine.training import Model
  9 | from keras.layers.convolutional import Conv2D
 10 | from keras.layers.core import Activation, Dense, Flatten
 11 | from keras.layers.merge import Add
 12 | from keras.layers.normalization import BatchNormalization
 13 | from keras.regularizers import l2
 14 | 
 15 | from chess_zero.agent.api_chess import ChessModelAPI
 16 | from chess_zero.config import Config
 17 | 
 18 | # noinspection PyPep8Naming
 19 | 
 20 | logger = getLogger(__name__)
 21 | 
 22 | 
 23 | class ChessModel:
 24 |     def __init__(self, config: Config):
 25 |         self.config = config
 26 |         self.model = None  # type: Model
 27 |         self.digest = None
 28 |         self.api = None
 29 | 
 30 |     def get_pipes(self, num = 1):
 31 |         if self.api is None:
 32 |             self.api = ChessModelAPI(self.config, self)
 33 |             self.api.start()
 34 |         return [self.api.get_pipe() for _ in range(num)]
 35 | 
 36 |     def build(self):
 37 |         mc = self.config.model
 38 |         in_x = x = Input((18, 8, 8))
 39 | 
 40 |         # (batch, channels, height, width)
 41 |         x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_first_filter_size, padding="same",
 42 |                    data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
 43 |                    name="input_conv-"+str(mc.cnn_first_filter_size)+"-"+str(mc.cnn_filter_num))(x)
 44 |         x = BatchNormalization(axis=1, name="input_batchnorm")(x)
 45 |         x = Activation("relu", name="input_relu")(x)
 46 | 
 47 |         for i in range(mc.res_layer_num):
 48 |             x = self._build_residual_block(x, i + 1)
 49 | 
 50 |         res_out = x
 51 |         
 52 |         # for policy output
 53 |         x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
 54 |                     name="policy_conv-1-2")(res_out)
 55 |         x = BatchNormalization(axis=1, name="policy_batchnorm")(x)
 56 |         x = Activation("relu", name="policy_relu")(x)
 57 |         x = Flatten(name="policy_flatten")(x)
 58 |         # no output for 'pass'
 59 |         policy_out = Dense(self.config.n_labels, kernel_regularizer=l2(mc.l2_reg), activation="softmax", name="policy_out")(x)
 60 |         
 61 | 
 62 |         # for value output
 63 |         x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg),
 64 |                     name="value_conv-1-4")(res_out)
 65 |         x = BatchNormalization(axis=1, name="value_batchnorm")(x)
 66 |         x = Activation("relu",name="value_relu")(x)
 67 |         x = Flatten(name="value_flatten")(x)
 68 |         x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg), activation="relu", name="value_dense")(x)
 69 |         value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), activation="tanh", name="value_out")(x)
 70 | 
 71 |         self.model = Model(in_x, [policy_out, value_out], name="chess_model")
 72 | 
 73 |     def _build_residual_block(self, x, index):
 74 |         mc = self.config.model
 75 |         in_x = x
 76 |         res_name = "res"+str(index)
 77 |         x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same",
 78 |                    data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), 
 79 |                    name=res_name+"_conv1-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x)
 80 |         x = BatchNormalization(axis=1, name=res_name+"_batchnorm1")(x)
 81 |         x = Activation("relu",name=res_name+"_relu1")(x)
 82 |         x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same",
 83 |                    data_format="channels_first", use_bias=False, kernel_regularizer=l2(mc.l2_reg), 
 84 |                    name=res_name+"_conv2-"+str(mc.cnn_filter_size)+"-"+str(mc.cnn_filter_num))(x)
 85 |         x = BatchNormalization(axis=1, name="res"+str(index)+"_batchnorm2")(x)
 86 |         x = Add(name=res_name+"_add")([in_x, x])
 87 |         x = Activation("relu", name=res_name+"_relu2")(x)
 88 |         return x
 89 | 
 90 |     @staticmethod
 91 |     def fetch_digest(weight_path):
 92 |         if os.path.exists(weight_path):
 93 |             m = hashlib.sha256()
 94 |             with open(weight_path, "rb") as f:
 95 |                 m.update(f.read())
 96 |             return m.hexdigest()
 97 | 
 98 |     def load(self, config_path, weight_path):
 99 |         mc = self.config.model
100 |         resources = self.config.resource
101 |         if mc.distributed and config_path == resources.model_best_config_path:
102 |             try:
103 |                 logger.debug("loading model from server")
104 |                 ftp_connection = ftplib.FTP(resources.model_best_distributed_ftp_server,
105 |                                             resources.model_best_distributed_ftp_user,
106 |                                             resources.model_best_distributed_ftp_password)
107 |                 ftp_connection.cwd(resources.model_best_distributed_ftp_remote_path)
108 |                 ftp_connection.retrbinary("RETR model_best_config.json", open(config_path, 'wb').write)
109 |                 ftp_connection.retrbinary("RETR model_best_weight.h5", open(weight_path, 'wb').write)
110 |                 ftp_connection.quit()
111 |             except:
112 |                 pass
113 |         if os.path.exists(config_path) and os.path.exists(weight_path):
114 |             logger.debug(f"loading model from {config_path}")
115 |             with open(config_path, "rt") as f:
116 |                 self.model = Model.from_config(json.load(f))
117 |             self.model.load_weights(weight_path)
118 |             self.model._make_predict_function()
119 |             self.digest = self.fetch_digest(weight_path)
120 |             logger.debug(f"loaded model digest = {self.digest}")
121 |             #print(self.model.summary)
122 |             return True
123 |         else:
124 |             logger.debug(f"model files does not exist at {config_path} and {weight_path}")
125 |             return False
126 | 
127 |     def save(self, config_path, weight_path):
128 |         logger.debug(f"save model to {config_path}")
129 |         with open(config_path, "wt") as f:
130 |             json.dump(self.model.get_config(), f)
131 |             self.model.save_weights(weight_path)
132 |         self.digest = self.fetch_digest(weight_path)
133 |         logger.debug(f"saved model digest {self.digest}")
134 | 
135 |         mc = self.config.model
136 |         resources = self.config.resource
137 |         if mc.distributed and config_path == resources.model_best_config_path:
138 |             try:
139 |                 logger.debug("saving model to server")
140 |                 ftp_connection = ftplib.FTP(resources.model_best_distributed_ftp_server,
141 |                                             resources.model_best_distributed_ftp_user,
142 |                                             resources.model_best_distributed_ftp_password)
143 |                 ftp_connection.cwd(resources.model_best_distributed_ftp_remote_path)
144 |                 fh = open(config_path, 'rb')
145 |                 ftp_connection.storbinary('STOR model_best_config.json', fh)
146 |                 fh.close()
147 | 
148 |                 fh = open(weight_path, 'rb')
149 |                 ftp_connection.storbinary('STOR model_best_weight.h5', fh)
150 |                 fh.close()
151 |                 ftp_connection.quit()
152 |             except:
153 |                 pass
154 | 


--------------------------------------------------------------------------------
/src/chess_zero/agent/player_chess.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from concurrent.futures import ThreadPoolExecutor
  3 | from logging import getLogger
  4 | from threading import Lock
  5 | 
  6 | import chess
  7 | import numpy as np
  8 | 
  9 | from chess_zero.config import Config
 10 | from chess_zero.env.chess_env import ChessEnv, Winner
 11 | 
 12 | #from chess_zero.play_game.uci import info
 13 | 
 14 | logger = getLogger(__name__)
 15 | 
 16 | # these are from AGZ nature paper
 17 | class VisitStats:
 18 | 	def __init__(self):
 19 | 		self.a = defaultdict(ActionStats)
 20 | 		self.sum_n = 0
 21 | 
 22 | class ActionStats:
 23 | 	def __init__(self):
 24 | 		self.n = 0
 25 | 		self.w = 0
 26 | 		self.q = 0
 27 | 
 28 | class ChessPlayer:
 29 | 	# dot = False
 30 | 	def __init__(self, config: Config, pipes=None, play_config=None, dummy=False):
 31 | 		self.moves = []
 32 | 
 33 | 		self.config = config
 34 | 		self.play_config = play_config or self.config.play
 35 | 		self.labels_n = config.n_labels
 36 | 		self.labels = config.labels
 37 | 		self.move_lookup = {chess.Move.from_uci(move): i for move, i in zip(self.labels, range(self.labels_n))}
 38 | 		if dummy:
 39 | 			return
 40 | 
 41 | 		self.pipe_pool = pipes
 42 | 		self.node_lock = defaultdict(Lock)
 43 | 
 44 | 	def reset(self):
 45 | 		self.tree = defaultdict(VisitStats)
 46 | 
 47 | 	def deboog(self, env):
 48 | 		print(env.testeval())
 49 | 
 50 | 		state = state_key(env)
 51 | 		my_visit_stats = self.tree[state]
 52 | 		stats = []
 53 | 		for action, a_s in my_visit_stats.a.items():
 54 | 			moi = self.move_lookup[action]
 55 | 			stats.append(np.asarray([a_s.n, a_s.w, a_s.q, a_s.p, moi]))
 56 | 		stats = np.asarray(stats)
 57 | 		a = stats[stats[:,0].argsort()[::-1]]
 58 | 
 59 | 		for s in a:
 60 | 			print(f'{self.labels[int(s[4])]:5}: '
 61 | 				  f'n: {s[0]:3.0f} '
 62 | 				  f'w: {s[1]:7.3f} '
 63 | 				  f'q: {s[2]:7.3f} '
 64 | 				  f'p: {s[3]:7.5f}')
 65 | 
 66 | 	def action(self, env, can_stop = True) -> str:
 67 | 		self.reset()
 68 | 
 69 | 		# for tl in range(self.play_config.thinking_loop):
 70 | 		root_value, naked_value = self.search_moves(env)
 71 | 		policy = self.calc_policy(env)
 72 | 		my_action = int(np.random.choice(range(self.labels_n), p = self.apply_temperature(policy, env.num_halfmoves)))
 73 | 		#print(naked_value)
 74 | 		#self.deboog(env)
 75 | 		if can_stop and self.play_config.resign_threshold is not None and \
 76 | 						root_value <= self.play_config.resign_threshold \
 77 | 						and env.num_halfmoves > self.play_config.min_resign_turn:
 78 | 			# noinspection PyTypeChecker
 79 | 			return None
 80 | 		else:
 81 | 			self.moves.append([env.observation, list(policy)])
 82 | 			return self.config.labels[my_action]
 83 | 
 84 | 	def search_moves(self, env) -> (float, float):
 85 | 		# if ChessPlayer.dot == False:
 86 | 		#     import stacktracer
 87 | 		#     stacktracer.trace_start("trace.html")
 88 | 		#     ChessPlayer.dot = True
 89 | 
 90 | 		futures = []
 91 | 		with ThreadPoolExecutor(max_workers=self.play_config.search_threads) as executor:
 92 | 			for _ in range(self.play_config.simulation_num_per_move):
 93 | 				futures.append(executor.submit(self.search_my_move,env=env.copy(),is_root_node=True))
 94 | 
 95 | 		vals = [f.result() for f in futures]
 96 | 		#vals=[self.search_my_move(env.copy(),True) for _ in range(self.play_config.simulation_num_per_move)]
 97 | 
 98 | 		return np.max(vals), vals[0] # vals[0] is kind of racy
 99 | 
100 | 	def search_my_move(self, env: ChessEnv, is_root_node=False) -> float:
101 | 		"""
102 | 		Q, V is value for this Player(always white).
103 | 		P is value for the player of next_player (black or white)
104 | 		:return: leaf value
105 | 		"""
106 | 		if env.done:
107 | 			if env.winner == Winner.draw:
108 | 				return 0
109 | 			# assert env.whitewon != env.white_to_move # side to move can't be winner!
110 | 			return -1
111 | 
112 | 		state = state_key(env)
113 | 
114 | 		with self.node_lock[state]:
115 | 			if state not in self.tree:
116 | 				leaf_p, leaf_v = self.expand_and_evaluate(env)
117 | 				self.tree[state].p = leaf_p
118 | 				return leaf_v # I'm returning everything from the POV of side to move
119 | 			#assert state in self.tree
120 | 
121 | 			# SELECT STEP
122 | 			action_t = self.select_action_q_and_u(env, is_root_node)
123 | 
124 | 			virtual_loss = self.play_config.virtual_loss
125 | 
126 | 			my_visit_stats = self.tree[state]
127 | 			my_stats = my_visit_stats.a[action_t]
128 | 
129 | 			my_visit_stats.sum_n += virtual_loss
130 | 			my_stats.n += virtual_loss
131 | 			my_stats.w += -virtual_loss
132 | 			my_stats.q = my_stats.w / my_stats.n
133 | 
134 | 		env.step(action_t.uci())
135 | 		leaf_v = self.search_my_move(env)  # next move from enemy POV
136 | 		leaf_v = -leaf_v
137 | 
138 | 		# BACKUP STEP
139 | 		# on returning search path
140 | 		# update: N, W, Q
141 | 		with self.node_lock[state]:
142 | 			my_visit_stats.sum_n += -virtual_loss + 1
143 | 			my_stats.n += -virtual_loss + 1
144 | 			my_stats.w += virtual_loss + leaf_v
145 | 			my_stats.q = my_stats.w / my_stats.n
146 | 
147 | 		return leaf_v
148 | 
149 | 	def expand_and_evaluate(self, env) -> (np.ndarray, float):
150 | 		""" expand new leaf, this is called only once per state
151 | 		this is called with state locked
152 | 		insert P(a|s), return leaf_v
153 | 		"""
154 | 		state_planes = env.canonical_input_planes()
155 | 
156 | 		leaf_p, leaf_v = self.predict(state_planes)
157 | 		# these are canonical policy and value (i.e. side to move is "white")
158 | 
159 | 		if not env.white_to_move:
160 | 			leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form
161 | 		#np.testing.assert_array_equal(Config.flip_policy(Config.flip_policy(leaf_p)), leaf_p)
162 | 
163 | 		return leaf_p, leaf_v
164 | 
165 | 	def predict(self, state_planes):
166 | 		pipe = self.pipe_pool.pop()
167 | 		pipe.send(state_planes)
168 | 		ret = pipe.recv()
169 | 		self.pipe_pool.append(pipe)
170 | 		return ret
171 | 
172 | 	#@profile
173 | 	def select_action_q_and_u(self, env, is_root_node) -> chess.Move:
174 | 		# this method is called with state locked
175 | 		state = state_key(env)
176 | 
177 | 		my_visitstats = self.tree[state]
178 | 
179 | 		if my_visitstats.p is not None: #push p to edges
180 | 			tot_p = 1e-8
181 | 			for mov in env.board.legal_moves:
182 | 				mov_p = my_visitstats.p[self.move_lookup[mov]]
183 | 				my_visitstats.a[mov].p = mov_p
184 | 				tot_p += mov_p
185 | 			for a_s in my_visitstats.a.values():
186 | 				a_s.p /= tot_p
187 | 			my_visitstats.p = None
188 | 
189 | 		xx_ = np.sqrt(my_visitstats.sum_n + 1)  # sqrt of sum(N(s, b); for all b)
190 | 
191 | 		e = self.play_config.noise_eps
192 | 		c_puct = self.play_config.c_puct
193 | 		dir_alpha = self.play_config.dirichlet_alpha
194 | 
195 | 		best_s = -999
196 | 		best_a = None
197 | 
198 | 		for action, a_s in my_visitstats.a.items():
199 | 			p_ = a_s.p
200 | 			if is_root_node:
201 | 				p_ = (1-e) * p_ + e * np.random.dirichlet([dir_alpha])
202 | 			b = a_s.q + c_puct * p_ * xx_ / (1 + a_s.n)
203 | 			if b > best_s:
204 | 				best_s = b
205 | 				best_a = action
206 | 
207 | 		return best_a
208 | 
209 | 	def apply_temperature(self, policy, turn):
210 | 		tau = np.power(self.play_config.tau_decay_rate, turn + 1)
211 | 		if tau < 0.1:
212 | 			tau = 0
213 | 		if tau == 0:
214 | 			action = np.argmax(policy)
215 | 			ret = np.zeros(self.labels_n)
216 | 			ret[action] = 1.0
217 | 			return ret
218 | 		else:
219 | 			ret = np.power(policy, 1/tau)
220 | 			ret /= np.sum(ret)
221 | 			return ret
222 | 
223 | 	def calc_policy(self, env):
224 | 		"""calc π(a|s0)
225 | 		:return:
226 | 		"""
227 | 		state = state_key(env)
228 | 		my_visitstats = self.tree[state]
229 | 		policy = np.zeros(self.labels_n)
230 | 		for action, a_s in my_visitstats.a.items():
231 | 			policy[self.move_lookup[action]] = a_s.n
232 | 
233 | 		policy /= np.sum(policy)
234 | 		return policy
235 | 
236 | 	def sl_action(self, observation, my_action, weight=1):
237 | 		policy = np.zeros(self.labels_n)
238 | 
239 | 		k = self.move_lookup[chess.Move.from_uci(my_action)]
240 | 		policy[k] = weight
241 | 
242 | 		self.moves.append([observation, list(policy)])
243 | 		return my_action
244 | 
245 | 	def finish_game(self, z):
246 | 		"""
247 | 		:param self:
248 | 		:param z: win=1, lose=-1, draw=0
249 | 		:return:
250 | 		"""
251 | 		for move in self.moves:  # add this game winner result to all past moves.
252 | 			move += [z]
253 | 
254 | def state_key(env: ChessEnv) -> str:
255 | 	fen = env.board.fen().rsplit(' ', 1) # drop the move clock
256 | 	return fen[0]


--------------------------------------------------------------------------------
/src/chess_zero/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | 
  5 | class PlayWithHumanConfig:
  6 |     def __init__(self):
  7 |         self.simulation_num_per_move = 1200
  8 |         self.threads_multiplier = 2
  9 |         self.c_puct = 1 # lower  = prefer mean action value
 10 |         self.noise_eps = 0
 11 |         self.tau_decay_rate = 0  # start deterministic mode
 12 |         self.resign_threshold = None
 13 | 
 14 |     def update_play_config(self, pc):
 15 |         """
 16 |         :param PlayConfig pc:
 17 |         :return:
 18 |         """
 19 |         pc.simulation_num_per_move = self.simulation_num_per_move
 20 |         pc.search_threads *= self.threads_multiplier
 21 |         pc.c_puct = self.c_puct
 22 |         pc.noise_eps = self.noise_eps
 23 |         pc.tau_decay_rate = self.tau_decay_rate
 24 |         pc.resign_threshold = self.resign_threshold
 25 |         pc.max_game_length = 999999
 26 | 
 27 | 
 28 | class Options:
 29 |     new = False
 30 | 
 31 | 
 32 | class ResourceConfig:
 33 |     def __init__(self):
 34 |         self.project_dir = os.environ.get("PROJECT_DIR", _project_dir())
 35 |         self.data_dir = os.environ.get("DATA_DIR", _data_dir())
 36 | 
 37 |         self.model_dir = os.environ.get("MODEL_DIR", os.path.join(self.data_dir, "model"))
 38 |         self.model_best_config_path = os.path.join(self.model_dir, "model_best_config.json")
 39 |         self.model_best_weight_path = os.path.join(self.model_dir, "model_best_weight.h5")
 40 | 
 41 |         self.model_best_distributed_ftp_server = "alpha-chess-zero.mygamesonline.org"
 42 |         self.model_best_distributed_ftp_user = "2537576_chess"
 43 |         self.model_best_distributed_ftp_password = "alpha-chess-zero-2"
 44 |         self.model_best_distributed_ftp_remote_path = "/alpha-chess-zero.mygamesonline.org/"
 45 | 
 46 |         self.next_generation_model_dir = os.path.join(self.model_dir, "next_generation")
 47 |         self.next_generation_model_dirname_tmpl = "model_%s"
 48 |         self.next_generation_model_config_filename = "model_config.json"
 49 |         self.next_generation_model_weight_filename = "model_weight.h5"
 50 | 
 51 |         self.play_data_dir = os.path.join(self.data_dir, "play_data")
 52 |         self.play_data_filename_tmpl = "play_%s.json"
 53 | 
 54 |         self.log_dir = os.path.join(self.project_dir, "logs")
 55 |         self.main_log_path = os.path.join(self.log_dir, "main.log")
 56 | 
 57 |     def create_directories(self):
 58 |         dirs = [self.project_dir, self.data_dir, self.model_dir, self.play_data_dir, self.log_dir,
 59 |                 self.next_generation_model_dir]
 60 |         for d in dirs:
 61 |             if not os.path.exists(d):
 62 |                 os.makedirs(d)
 63 | 
 64 | def flipped_uci_labels():
 65 |     def repl(x):
 66 |         return "".join([(str(9 - int(a)) if a.isdigit() else a) for a in x])
 67 | 
 68 |     return [repl(x) for x in create_uci_labels()]
 69 | 
 70 | 
 71 | def create_uci_labels():
 72 |     labels_array = []
 73 |     letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
 74 |     numbers = ['1', '2', '3', '4', '5', '6', '7', '8']
 75 |     promoted_to = ['q', 'r', 'b', 'n']
 76 | 
 77 |     for l1 in range(8):
 78 |         for n1 in range(8):
 79 |             destinations = [(t, n1) for t in range(8)] + \
 80 |                            [(l1, t) for t in range(8)] + \
 81 |                            [(l1 + t, n1 + t) for t in range(-7, 8)] + \
 82 |                            [(l1 + t, n1 - t) for t in range(-7, 8)] + \
 83 |                            [(l1 + a, n1 + b) for (a, b) in
 84 |                             [(-2, -1), (-1, -2), (-2, 1), (1, -2), (2, -1), (-1, 2), (2, 1), (1, 2)]]
 85 |             for (l2, n2) in destinations:
 86 |                 if (l1, n1) != (l2, n2) and l2 in range(8) and n2 in range(8):
 87 |                     move = letters[l1] + numbers[n1] + letters[l2] + numbers[n2]
 88 |                     labels_array.append(move)
 89 |     for l1 in range(8):
 90 |         l = letters[l1]
 91 |         for p in promoted_to:
 92 |             labels_array.append(l + '2' + l + '1' + p)
 93 |             labels_array.append(l + '7' + l + '8' + p)
 94 |             if l1 > 0:
 95 |                 l_l = letters[l1 - 1]
 96 |                 labels_array.append(l + '2' + l_l + '1' + p)
 97 |                 labels_array.append(l + '7' + l_l + '8' + p)
 98 |             if l1 < 7:
 99 |                 l_r = letters[l1 + 1]
100 |                 labels_array.append(l + '2' + l_r + '1' + p)
101 |                 labels_array.append(l + '7' + l_r + '8' + p)
102 |     return labels_array
103 | 
104 | 
105 | class Config:
106 |     labels = create_uci_labels()
107 |     n_labels = int(len(labels))
108 |     flipped_labels = flipped_uci_labels()
109 |     unflipped_index = None
110 | 
111 |     def __init__(self, config_type="mini"):
112 |         self.opts = Options()
113 |         self.resource = ResourceConfig()
114 | 
115 |         if config_type == "mini":
116 |             import chess_zero.configs.mini as c
117 |         elif config_type == "normal":
118 |             import chess_zero.configs.normal as c
119 |         elif config_type == "distributed":
120 |             import chess_zero.configs.distributed as c
121 |         else:
122 |             raise RuntimeError(f"unknown config_type: {config_type}")
123 |         self.model = c.ModelConfig()
124 |         self.play = c.PlayConfig()
125 |         self.play_data = c.PlayDataConfig()
126 |         self.trainer = c.TrainerConfig()
127 |         self.eval = c.EvaluateConfig()
128 |         self.labels = Config.labels
129 |         self.n_labels = Config.n_labels
130 |         self.flipped_labels = Config.flipped_labels
131 | 
132 |     @staticmethod
133 |     def flip_policy(pol):
134 |         return np.asarray([pol[ind] for ind in Config.unflipped_index])
135 | 
136 | 
137 | Config.unflipped_index = [Config.labels.index(x) for x in Config.flipped_labels]
138 | 
139 | 
140 | # print(Config.labels)
141 | # print(Config.flipped_labels)
142 | 
143 | 
144 | def _project_dir():
145 |     d = os.path.dirname
146 |     return d(d(d(os.path.abspath(__file__))))
147 | 
148 | 
149 | def _data_dir():
150 |     return os.path.join(_project_dir(), "data")


--------------------------------------------------------------------------------
/src/chess_zero/configs/distributed.py:
--------------------------------------------------------------------------------
 1 | class EvaluateConfig:
 2 |     def __init__(self):
 3 |         self.game_num = 400
 4 |         self.replace_rate = 0.55
 5 |         self.play_config = PlayConfig()
 6 |         self.play_config.simulation_num_per_move = 200
 7 |         self.play_config.thinking_loop = 1
 8 |         self.play_config.c_puct = 1
 9 |         self.play_config.change_tau_turn = 0
10 |         self.play_config.noise_eps = 0
11 |         self.evaluate_latest_first = True
12 | 
13 | 
14 | class PlayDataConfig:
15 |     def __init__(self):
16 |         self.sl_nb_game_in_file = 100
17 |         self.nb_game_in_file = 100
18 |         self.max_file_num = 200
19 | 
20 | 
21 | class PlayConfig:
22 |     def __init__(self):
23 |         self.simulation_num_per_move = 200
24 |         self.thinking_loop = 1
25 |         self.logging_thinking = False
26 |         self.c_puct = 1.5
27 |         self.noise_eps = 0.25
28 |         self.dirichlet_alpha = 0.3
29 |         self.change_tau_turn = 10
30 |         self.virtual_loss = 3
31 |         self.prediction_queue_size = 16
32 |         self.search_threads = 16
33 |         self.prediction_worker_sleep_sec = 0.00001
34 |         self.wait_for_expanding_sleep_sec = 0.000001
35 |         self.resign_threshold = -0.8
36 |         self.min_resign_turn = 5
37 |         self.average_chess_movements = 50
38 | 
39 | 
40 | class TrainerConfig:
41 |     def __init__(self):
42 |         self.batch_size = 2048
43 |         self.epoch_to_checkpoint = 1
44 |         self.start_total_steps = 0
45 |         self.save_model_steps = 2000
46 |         self.load_data_steps = 1000
47 |         self.loss_weights = [1.0, 1.0] # prevent value overfit in SL
48 | 
49 | 
50 | class ModelConfig:
51 |     cnn_filter_num = 256
52 |     cnn_filter_size = 3
53 |     res_layer_num = 7
54 |     l2_reg = 1e-4
55 |     value_fc_size = 256
56 |     distributed = True
57 | 


--------------------------------------------------------------------------------
/src/chess_zero/configs/mini.py:
--------------------------------------------------------------------------------
 1 | class EvaluateConfig:
 2 |     def __init__(self):
 3 |         self.vram_frac = 1.0
 4 |         self.game_num = 50
 5 |         self.replace_rate = 0.55
 6 |         self.play_config = PlayConfig()
 7 |         self.play_config.simulation_num_per_move = 200
 8 |         self.play_config.thinking_loop = 1
 9 |         self.play_config.c_puct = 1 # lower  = prefer mean action value
10 |         self.play_config.tau_decay_rate = 0.6 # I need a better distribution...
11 |         self.play_config.noise_eps = 0
12 |         self.evaluate_latest_first = True
13 |         self.max_game_length = 1000
14 | 
15 | 
16 | class PlayDataConfig:
17 |     def __init__(self):
18 |         self.min_elo_policy =  500 # 0 weight
19 |         self.max_elo_policy = 1800 # 1 weight
20 |         self.sl_nb_game_in_file = 250
21 |         self.nb_game_in_file = 50
22 |         self.max_file_num = 150
23 | 
24 | 
25 | class PlayConfig:
26 |     def __init__(self):
27 |         self.max_processes = 3
28 |         self.search_threads = 16
29 |         self.vram_frac = 1.0
30 |         self.simulation_num_per_move = 100
31 |         self.thinking_loop = 1
32 |         self.logging_thinking = False
33 |         self.c_puct = 1.5
34 |         self.noise_eps = 0.25
35 |         self.dirichlet_alpha = 0.3
36 |         self.tau_decay_rate = 0.99
37 |         self.virtual_loss = 3
38 |         self.resign_threshold = -0.8
39 |         self.min_resign_turn = 5
40 |         self.max_game_length = 1000
41 | 
42 | 
43 | class TrainerConfig:
44 |     def __init__(self):
45 |         self.min_data_size_to_learn = 0
46 |         self.cleaning_processes = 5 # RAM explosion...
47 |         self.vram_frac = 1.0
48 |         self.batch_size = 384 # tune this to your gpu memory
49 |         self.epoch_to_checkpoint = 1
50 |         self.dataset_size = 100000
51 |         self.start_total_steps = 0
52 |         self.save_model_steps = 25
53 |         self.load_data_steps = 100
54 |         self.loss_weights = [1.25, 1.0] # [policy, value] prevent value overfit in SL
55 | 
56 | 
57 | class ModelConfig:
58 |     cnn_filter_num = 256
59 |     cnn_first_filter_size = 5
60 |     cnn_filter_size = 3
61 |     res_layer_num = 7
62 |     l2_reg = 1e-4 # GO TO JSON TO SET THIS!!!! I don't have much worry for overfitting with only 1-2 epochs/dataset
63 |     value_fc_size = 256
64 |     distributed = False
65 |     input_depth = 18


--------------------------------------------------------------------------------
/src/chess_zero/configs/normal.py:
--------------------------------------------------------------------------------
 1 | class EvaluateConfig:
 2 |     def __init__(self):
 3 |         self.game_num = 400
 4 |         self.replace_rate = 0.55
 5 |         self.play_config = PlayConfig()
 6 |         self.play_config.simulation_num_per_move = 200
 7 |         self.play_config.thinking_loop = 1
 8 |         self.play_config.c_puct = 1
 9 |         self.play_config.change_tau_turn = 0
10 |         self.play_config.noise_eps = 0
11 |         self.evaluate_latest_first = True
12 | 
13 | 
14 | class PlayDataConfig:
15 |     def __init__(self):
16 |         self.sl_nb_game_in_file = 100
17 |         self.nb_game_in_file = 100
18 |         self.max_file_num = 200
19 | 
20 | 
21 | class PlayConfig:
22 |     def __init__(self):
23 |         self.simulation_num_per_move = 200
24 |         self.thinking_loop = 1
25 |         self.logging_thinking = False
26 |         self.c_puct = 1.5
27 |         self.noise_eps = 0.25
28 |         self.dirichlet_alpha = 0.3
29 |         self.change_tau_turn = 10
30 |         self.virtual_loss = 3
31 |         self.prediction_queue_size = 16
32 |         self.search_threads = 16
33 |         self.prediction_worker_sleep_sec = 0.00001
34 |         self.wait_for_expanding_sleep_sec = 0.000001
35 |         self.resign_threshold = -0.8
36 |         self.min_resign_turn = 5
37 |         self.average_chess_movements = 50
38 | 
39 | 
40 | class TrainerConfig:
41 |     def __init__(self):
42 |         self.batch_size = 2048
43 |         self.epoch_to_checkpoint = 1
44 |         self.start_total_steps = 0
45 |         self.save_model_steps = 2000
46 |         self.load_data_steps = 1000
47 |         self.loss_weights = [1.0, 1.0] # prevent value overfit in SL
48 | 
49 | 
50 | class ModelConfig:
51 |     cnn_filter_num = 256
52 |     cnn_filter_size = 3
53 |     res_layer_num = 7
54 |     l2_reg = 1e-4
55 |     value_fc_size = 256
56 |     distributed = False
57 | 


--------------------------------------------------------------------------------
/src/chess_zero/env/chess_env.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import chess.pgn
  3 | import numpy as np
  4 | import copy
  5 | 
  6 | from logging import getLogger
  7 | 
  8 | logger = getLogger(__name__)
  9 | 
 10 | # noinspection PyArgumentList
 11 | Winner = enum.Enum("Winner", "black white draw")
 12 | 
 13 | # input planes
 14 | # noinspection SpellCheckingInspection
 15 | pieces_order = 'KQRBNPkqrbnp' # 12x8x8
 16 | castling_order = 'KQkq'       # 4x8x8
 17 | # fifty-move-rule             # 1x8x8
 18 | # en en_passant               # 1x8x8
 19 | 
 20 | ind = {pieces_order[i]: i for i in range(12)}
 21 | 
 22 | class ChessEnv:
 23 | 
 24 |     def __init__(self):
 25 |         self.board = None
 26 |         self.num_halfmoves = 0
 27 |         self.winner = None  # type: Winner
 28 |         self.resigned = False
 29 |         self.result = None
 30 | 
 31 |     def reset(self):
 32 |         self.board = chess.Board()
 33 |         self.num_halfmoves = 0
 34 |         self.winner = None
 35 |         self.resigned = False
 36 |         return self
 37 | 
 38 |     def update(self, board):
 39 |         self.board = chess.Board(board)
 40 |         self.winner = None
 41 |         self.resigned = False
 42 |         return self
 43 | 
 44 |     @property
 45 |     def done(self):
 46 |         return self.winner is not None
 47 | 
 48 |     @property
 49 |     def white_won(self):
 50 |         return self.winner == Winner.white
 51 | 
 52 |     @property
 53 |     def white_to_move(self):
 54 |         return self.board.turn == chess.WHITE
 55 | 
 56 |     def step(self, action: str, check_over = True):
 57 |         """
 58 |         :param action:
 59 |         :param check_over:
 60 |         :return:
 61 |         """
 62 |         if check_over and action is None:
 63 |             self._resign()
 64 |             return
 65 | 
 66 |         self.board.push_uci(action)
 67 | 
 68 |         self.num_halfmoves += 1
 69 | 
 70 |         if check_over and self.board.result(claim_draw=True) != "*":
 71 |             self._game_over()
 72 | 
 73 |     def _game_over(self):
 74 |         if self.winner is None:
 75 |             self.result = self.board.result(claim_draw = True)
 76 |             if self.result == '1-0':
 77 |                 self.winner = Winner.white
 78 |             elif self.result == '0-1':
 79 |                 self.winner = Winner.black
 80 |             else:
 81 |                 self.winner = Winner.draw
 82 | 
 83 |     def _resign(self):
 84 |         self.resigned = True
 85 |         if self.white_to_move: # WHITE RESIGNED!
 86 |             self.winner = Winner.black
 87 |             self.result = "0-1"
 88 |         else:
 89 |             self.winner = Winner.white
 90 |             self.result = "1-0"
 91 | 
 92 |     def adjudicate(self):
 93 |         score = self.testeval(absolute = True)
 94 |         if abs(score) < 0.01:
 95 |             self.winner = Winner.draw
 96 |             self.result = "1/2-1/2"
 97 |         elif score > 0:
 98 |             self.winner = Winner.white
 99 |             self.result = "1-0"
100 |         else:
101 |             self.winner = Winner.black
102 |             self.result = "0-1"
103 | 
104 |     def ending_average_game(self):
105 |         self.winner = Winner.draw
106 |         self.result = "1/2-1/2"
107 | 
108 |     def copy(self):
109 |         env = copy.copy(self)
110 |         env.board = copy.copy(self.board)
111 |         return env
112 | 
113 |     def render(self):
114 |         print("\n")
115 |         print(self.board)
116 |         print("\n")
117 | 
118 |     @property
119 |     def observation(self):
120 |         return self.board.fen()
121 | 
122 |     def deltamove(self, fen_next):
123 |         moves = list(self.board.legal_moves)
124 |         for mov in moves:
125 |             self.board.push(mov)
126 |             fee = self.board.fen()
127 |             self.board.pop()
128 |             if fee == fen_next:
129 |                 return mov.uci()
130 |         return None
131 | 
132 |     def replace_tags(self):
133 |         return replace_tags_board(self.board.fen())
134 | 
135 |     def canonical_input_planes(self):
136 |         return canon_input_planes(self.board.fen())
137 | 
138 |     def testeval(self, absolute=False) -> float:
139 |         return testeval(self.board.fen(), absolute)
140 | 
141 | def testeval(fen, absolute = False) -> float:
142 |     piece_vals = {'K': 3, 'Q': 14, 'R': 5,'B': 3.25,'N': 3,'P': 1} # somehow it doesn't know how to keep its queen
143 |     ans = 0.0
144 |     tot = 0
145 |     for c in fen.split(' ')[0]:
146 |         if not c.isalpha():
147 |             continue
148 |         #assert c.upper() in piece_vals
149 |         if c.isupper():
150 |             ans += piece_vals[c]
151 |             tot += piece_vals[c]
152 |         else:
153 |             ans -= piece_vals[c.upper()]
154 |             tot += piece_vals[c.upper()]
155 |     v = ans/tot
156 |     if not absolute and is_black_turn(fen):
157 |         v = -v
158 |     assert abs(v) < 1
159 |     return np.tanh(v * 3) # arbitrary
160 | 
161 | def check_current_planes(realfen, planes):
162 |     cur = planes[0:12]
163 |     assert cur.shape == (12, 8, 8)
164 |     fakefen = ["1"] * 64
165 |     for i in range(12):
166 |         for rank in range(8):
167 |             for file in range(8):
168 |                 if cur[i][rank][file] == 1:
169 |                     assert fakefen[rank * 8 + file] == '1'
170 |                     fakefen[rank * 8 + file] = pieces_order[i]
171 | 
172 |     castling = planes[12:16]
173 |     fiftymove = planes[16][0][0]
174 |     ep = planes[17]
175 | 
176 |     castlingstring = ""
177 |     for i in range(4):
178 |         if castling[i][0][0] == 1:
179 |             castlingstring += castling_order[i]
180 | 
181 |     if len(castlingstring) == 0:
182 |         castlingstring = '-'
183 | 
184 |     epstr = "-"
185 |     for rank in range(8):
186 |         for file in range(8):
187 |             if ep[rank][file] == 1:
188 |                 epstr = coord_to_alg((rank, file))
189 | 
190 |     realfen = maybe_flip_fen(realfen, flip=is_black_turn(realfen))
191 |     realparts = realfen.split(' ')
192 |     assert realparts[1] == 'w'
193 |     assert realparts[2] == castlingstring
194 |     assert realparts[3] == epstr
195 |     assert int(realparts[4]) == fiftymove
196 |     # realparts[5] is the fifty-move clock, discard that
197 |     return "".join(fakefen) == replace_tags_board(realfen)
198 | 
199 | def canon_input_planes(fen):
200 |     fen = maybe_flip_fen(fen, is_black_turn(fen))
201 |     return all_input_planes(fen)
202 | 
203 | def all_input_planes(fen):
204 |     current_aux_planes = aux_planes(fen)
205 | 
206 |     history_both = to_planes(fen)
207 | 
208 |     ret = np.vstack((history_both, current_aux_planes))
209 |     assert ret.shape == (18, 8, 8)
210 |     return ret
211 | 
212 | def maybe_flip_fen(fen, flip = False):
213 |     if not flip:
214 |         return fen
215 |     foo = fen.split(' ')
216 |     rows = foo[0].split('/')
217 |     def swapcase(a):
218 |         if a.isalpha():
219 |             return a.lower() if a.isupper() else a.upper()
220 |         return a
221 |     def swapall(aa):
222 |         return "".join([swapcase(a) for a in aa])
223 |     return "/".join( [swapall(row) for row in reversed(rows)] ) \
224 |         + " " + ('w' if foo[1]=='b' else 'b') \
225 |         + " " + "".join( sorted( swapall(foo[2]) ) ) \
226 |         + " " + foo[3] + " " + foo[4] + " " + foo[5]
227 | 
228 | def aux_planes(fen):
229 |     foo = fen.split(' ')
230 | 
231 |     en_passant = np.zeros((8, 8), dtype=np.float32)
232 |     if foo[3] != '-':
233 |         eps = alg_to_coord(foo[3])
234 |         en_passant[eps[0]][eps[1]] = 1
235 | 
236 |     fifty_move_count = int(foo[4])
237 |     fifty_move = np.full((8,8), fifty_move_count, dtype=np.float32)
238 | 
239 |     castling = foo[2]
240 |     auxiliary_planes = [np.full((8,8), int('K' in castling), dtype=np.float32),
241 |                         np.full((8,8), int('Q' in castling), dtype=np.float32),
242 |                         np.full((8,8), int('k' in castling), dtype=np.float32),
243 |                         np.full((8,8), int('q' in castling), dtype=np.float32),
244 |                         fifty_move,
245 |                         en_passant]
246 | 
247 |     ret = np.asarray(auxiliary_planes, dtype=np.float32)
248 |     assert ret.shape == (6,8,8)
249 |     return ret
250 | 
251 | # FEN board is like this:
252 | # a8 b8 .. h8
253 | # a7 b7 .. h7
254 | # .. .. .. ..
255 | # a1 b1 .. h1
256 | # 
257 | # FEN string is like this:
258 | #  0  1 ..  7
259 | #  8  9 .. 15
260 | # .. .. .. ..
261 | # 56 57 .. 63
262 | 
263 | # my planes are like this:
264 | # 00 01 .. 07
265 | # 10 11 .. 17
266 | # .. .. .. ..
267 | # 70 71 .. 77
268 | #
269 | 
270 | def alg_to_coord(alg):
271 |     rank = 8 - int(alg[1])        # 0-7
272 |     file = ord(alg[0]) - ord('a') # 0-7
273 |     return rank, file
274 | 
275 | def coord_to_alg(coord):
276 |     letter = chr(ord('a') + coord[1])
277 |     number = str(8 - coord[0])
278 |     return letter + number
279 | 
280 | def to_planes(fen):
281 |     board_state = replace_tags_board(fen)
282 |     pieces_both = np.zeros(shape = (12, 8, 8), dtype=np.float32)
283 |     for rank in range(8):
284 |         for file in range(8):
285 |             v = board_state[rank * 8 + file]
286 |             if v.isalpha():
287 |                 pieces_both[ind[v]][rank][file] = 1
288 |     assert pieces_both.shape == (12, 8, 8)
289 |     return pieces_both
290 | 
291 | def replace_tags_board(board_san):
292 |     board_san = board_san.split(" ")[0]
293 |     board_san = board_san.replace("2", "11")
294 |     board_san = board_san.replace("3", "111")
295 |     board_san = board_san.replace("4", "1111")
296 |     board_san = board_san.replace("5", "11111")
297 |     board_san = board_san.replace("6", "111111")
298 |     board_san = board_san.replace("7", "1111111")
299 |     board_san = board_san.replace("8", "11111111")
300 |     return board_san.replace("/", "")
301 | 
302 | def is_black_turn(fen):
303 |     return fen.split(" ")[1] == 'b'


--------------------------------------------------------------------------------
/src/chess_zero/lib/data_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import ujson
 3 | from datetime import datetime
 4 | from glob import glob
 5 | from logging import getLogger
 6 | 
 7 | import chess
 8 | import pyperclip
 9 | from chess_zero.config import ResourceConfig
10 | 
11 | logger = getLogger(__name__)
12 | 
13 | 
14 | def pretty_print(env, colors):
15 |     new_pgn = open("test3.pgn", "at")
16 |     game = chess.pgn.Game.from_board(env.board)
17 |     game.headers["Result"] = env.result
18 |     game.headers["White"], game.headers["Black"] = colors
19 |     game.headers["Date"] = datetime.now().strftime("%Y.%m.%d")
20 |     new_pgn.write(str(game) + "\n\n")
21 |     new_pgn.close()
22 |     pyperclip.copy(env.board.fen())
23 | 
24 | 
25 | def find_pgn_files(directory, pattern='*.pgn'):
26 |     dir_pattern = os.path.join(directory, pattern)
27 |     files = list(sorted(glob(dir_pattern)))
28 |     return files
29 | 
30 | 
31 | def get_game_data_filenames(rc: ResourceConfig):
32 |     pattern = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % "*")
33 |     files = list(sorted(glob(pattern)))
34 |     return files
35 | 
36 | 
37 | def get_next_generation_model_dirs(rc: ResourceConfig):
38 |     dir_pattern = os.path.join(rc.next_generation_model_dir, rc.next_generation_model_dirname_tmpl % "*")
39 |     dirs = list(sorted(glob(dir_pattern)))
40 |     return dirs
41 | 
42 | 
43 | def write_game_data_to_file(path, data):
44 |     try:
45 |         with open(path, "wt") as f:
46 |             ujson.dump(data, f)
47 |     except Exception as e:
48 |         print(e)
49 | 
50 | 
51 | def read_game_data_from_file(path):
52 |     try:
53 |         with open(path, "rt") as f:
54 |             return ujson.load(f)
55 |     except Exception as e:
56 |         print(e)
57 | 
58 | # def conv_helper(path):
59 | #     with open(path, "rt") as f:
60 | #         data = json.load(f)
61 | #     with open(path, "wb") as f:
62 | #         pickle.dump(data, f)
63 | 
64 | # def convert_json_to_pickle():
65 | #     import os
66 | #     files = [x for x in os.listdir() if x.endswith(".json")]
67 | #     from concurrent.futures import ProcessPoolExecutor
68 | #     with ProcessPoolExecutor(max_workers=6) as executor:
69 | #         executor.map(conv_helper,files)
70 | 


--------------------------------------------------------------------------------
/src/chess_zero/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from logging import StreamHandler, basicConfig, DEBUG, getLogger, Formatter
 2 | 
 3 | 
 4 | def setup_logger(log_filename):
 5 |     format_str = '%(asctime)s@%(name)s %(levelname)s # %(message)s'
 6 |     basicConfig(filename=log_filename, level=DEBUG, format=format_str)
 7 |     stream_handler = StreamHandler()
 8 |     stream_handler.setFormatter(Formatter(format_str))
 9 |     getLogger().addHandler(stream_handler)
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     setup_logger("aa.log")
14 |     logger = getLogger("test")
15 |     logger.info("OK")
16 | 


--------------------------------------------------------------------------------
/src/chess_zero/lib/model_helper.py:
--------------------------------------------------------------------------------
 1 | from logging import getLogger
 2 | 
 3 | logger = getLogger(__name__)
 4 | 
 5 | 
 6 | def load_best_model_weight(model):
 7 |     """
 8 |     :param chess_zero.agent.model.ChessModel model:
 9 |     :return:
10 |     """
11 |     return model.load(model.config.resource.model_best_config_path, model.config.resource.model_best_weight_path)
12 | 
13 | 
14 | def save_as_best_model(model):
15 |     """
16 | 
17 |     :param chess_zero.agent.model.ChessModel model:
18 |     :return:
19 |     """
20 |     return model.save(model.config.resource.model_best_config_path, model.config.resource.model_best_weight_path)
21 | 
22 | 
23 | def reload_best_model_weight_if_changed(model):
24 |     """
25 | 
26 |     :param chess_zero.agent.model.ChessModel model:
27 |     :return:
28 |     """
29 |     if model.config.model.distributed:
30 |         return load_best_model_weight(model)
31 |     else:
32 |         logger.debug("start reload the best model if changed")
33 |         digest = model.fetch_digest(model.config.resource.model_best_weight_path)
34 |         if digest != model.digest:
35 |             return load_best_model_weight(model)
36 | 
37 |         logger.debug("the best model is not changed")
38 |         return False
39 | 


--------------------------------------------------------------------------------
/src/chess_zero/lib/tf_util.py:
--------------------------------------------------------------------------------
 1 | def set_session_config(per_process_gpu_memory_fraction=None, allow_growth=None):
 2 |     """
 3 | 
 4 |     :param allow_growth: When necessary, reserve memory
 5 |     :param float per_process_gpu_memory_fraction: specify GPU memory usage as 0 to 1
 6 | 
 7 |     :return:
 8 |     """
 9 |     import tensorflow as tf
10 |     import keras.backend as k
11 | 
12 |     config = tf.ConfigProto(
13 |         gpu_options=tf.GPUOptions(
14 |             per_process_gpu_memory_fraction=per_process_gpu_memory_fraction,
15 |             allow_growth=allow_growth,
16 |         )
17 |     )
18 |     sess = tf.Session(config=config)
19 |     k.set_session(sess)
20 | 


--------------------------------------------------------------------------------
/src/chess_zero/manager.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from logging import getLogger,disable
 4 | 
 5 | from .lib.logger import setup_logger
 6 | from .config import Config
 7 | 
 8 | logger = getLogger(__name__)
 9 | 
10 | CMD_LIST = ['self', 'opt', 'eval', 'sl', 'uci']
11 | 
12 | 
13 | def create_parser():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("cmd", help="what to do", choices=CMD_LIST)
16 |     parser.add_argument("--new", help="run from new best model", action="store_true")
17 |     parser.add_argument("--type", help="use normal setting", default="mini")
18 |     parser.add_argument("--total-step", help="set TrainerConfig.start_total_steps", type=int)
19 |     return parser
20 | 
21 | 
22 | def setup(config: Config, args):
23 |     config.opts.new = args.new
24 |     if args.total_step is not None:
25 |         config.trainer.start_total_steps = args.total_step
26 |     config.resource.create_directories()
27 |     setup_logger(config.resource.main_log_path)
28 | 
29 | 
30 | def start():
31 |     parser = create_parser()
32 |     args = parser.parse_args()
33 |     config_type = args.type
34 | 
35 |     if args.cmd == 'uci':
36 |         disable(999999) # plz don't interfere with uci
37 | 
38 |     config = Config(config_type=config_type)
39 |     setup(config, args)
40 | 
41 |     logger.info(f"config type: {config_type}")
42 | 
43 |     if args.cmd == 'self':
44 |         from .worker import self_play
45 |         return self_play.start(config)
46 |     elif args.cmd == 'opt':
47 |         from .worker import optimize
48 |         return optimize.start(config)
49 |     elif args.cmd == 'eval':
50 |         from .worker import evaluate
51 |         return evaluate.start(config)
52 |     elif args.cmd == 'sl':
53 |         from .worker import sl
54 |         return sl.start(config)
55 |     elif args.cmd == 'uci':
56 |         from .play_game import uci
57 |         return uci.start(config)
58 | 


--------------------------------------------------------------------------------
/src/chess_zero/play_game/uci.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from logging import getLogger
 3 | 
 4 | from chess_zero.agent.player_chess import ChessPlayer
 5 | from chess_zero.config import Config, PlayWithHumanConfig
 6 | from chess_zero.env.chess_env import ChessEnv
 7 | 
 8 | logger = getLogger(__name__)
 9 | 
10 | 
11 | # noinspection SpellCheckingInspection,SpellCheckingInspection,SpellCheckingInspection,SpellCheckingInspection,SpellCheckingInspection,SpellCheckingInspection
12 | def start(config: Config):
13 | 
14 |     PlayWithHumanConfig().update_play_config(config.play)
15 | 
16 |     me_player = None
17 |     env = ChessEnv().reset()
18 | 
19 |     while True:
20 |         line=input()
21 |         words=line.rstrip().split(" ",1)
22 |         if words[0] == "uci":
23 |             print("id name ChessZero")
24 |             print("id author ChessZero")
25 |             print("uciok")
26 |         elif words[0]=="isready":
27 |             if not me_player:
28 |                 me_player = get_player(config)
29 |             print("readyok")
30 |         elif words[0]=="ucinewgame":
31 |             env.reset()
32 |         elif words[0]=="position":
33 |             words=words[1].split(" ",1)
34 |             if words[0]=="startpos":
35 |                 env.reset()
36 |             else:
37 |                 fen = words[0]
38 |                 for _ in range(5):
39 |                     words = words[1].split(' ',1)
40 |                     fen += " " + words[0]
41 |                 env.update(fen)
42 |                 #print(maybe_flip_fen(fen,True))
43 |             if len(words) > 1:
44 |                 words = words[1].split(" ",1)
45 |                 if words[0]=="moves":
46 |                     for w in words[1].split(" "):
47 |                         env.step(w, False)
48 |         elif words[0]=="go":
49 |             if not me_player:
50 |                 me_player = get_player(config)
51 |             action = me_player.action(env, False)
52 |             print(f"bestmove {action}")
53 |         elif words[0]=="stop":
54 |             pass #lol
55 |         elif words[0]=="quit":
56 |             break
57 | 
58 | def get_player(config):
59 |     from chess_zero.agent.model_chess import ChessModel
60 |     from chess_zero.lib.model_helper import load_best_model_weight
61 |     model = ChessModel(config)
62 |     if not load_best_model_weight(model):
63 |         raise RuntimeError("Best model not found!")
64 |     return ChessPlayer(config, model.get_pipes(config.play.search_threads))
65 | 
66 | def info(depth,move, score):
67 |     print(f"info score cp {int(score*100)} depth {depth} pv {move}")
68 |     sys.stdout.flush()


--------------------------------------------------------------------------------
/src/chess_zero/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import multiprocessing as mp
 4 | 
 5 | _PATH_ = os.path.dirname(os.path.dirname(__file__))
 6 | 
 7 | 
 8 | if _PATH_ not in sys.path:
 9 | 	sys.path.append(_PATH_)
10 | 
11 | 
12 | if __name__ == "__main__":
13 | 	mp.set_start_method('spawn')
14 | 	sys.setrecursionlimit(10000)
15 | 	from chess_zero import manager
16 | 	manager.start()


--------------------------------------------------------------------------------
/src/chess_zero/stacktracer.py:
--------------------------------------------------------------------------------
  1 | """Stack tracer for multi-threaded applications.
  2 | 
  3 | 
  4 | Usage:
  5 | 
  6 | import stacktracer
  7 | stacktracer.start_trace("trace.html",interval=5,auto=True) # Set auto flag to always update file!
  8 | ....
  9 | stacktracer.stop_trace()
 10 | """
 11 | 
 12 | 
 13 | 
 14 | import sys
 15 | import traceback
 16 | from pygments import highlight
 17 | from pygments.formatters import HtmlFormatter
 18 | from pygments.lexers import PythonLexer
 19 | 
 20 | 
 21 | # Taken from http://bzimmer.ziclix.com/2008/12/17/python-thread-dumps/
 22 |  
 23 | def stacktraces():
 24 |     code = []
 25 |     for threadId, stack in sys._current_frames().items():
 26 |         code.append("\n# ThreadID: %s" % threadId)
 27 |         for filename, lineno, name, line in traceback.extract_stack(stack):
 28 |             code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
 29 |             if line:
 30 |                 code.append("  %s" % (line.strip()))
 31 |  
 32 |     return highlight("\n".join(code), PythonLexer(), HtmlFormatter(
 33 |       full=False,
 34 |       # style="native",
 35 |       noclasses=True,
 36 |     ))
 37 | 
 38 | 
 39 | # This part was made by nagylzs
 40 | import os
 41 | import time
 42 | import threading
 43 | 
 44 | class TraceDumper(threading.Thread):
 45 |     """Dump stack traces into a given file periodically."""
 46 |     def __init__(self,fpath,interval,auto):
 47 |         """
 48 |         @param fpath: File path to output HTML (stack trace file)
 49 |         @param auto: Set flag (True) to update trace continuously.
 50 |             Clear flag (False) to update only if file not exists.
 51 |             (Then delete the file to force update.)
 52 |         @param interval: In seconds: how often to update the trace file.
 53 |         """
 54 |         assert(interval>0.1)
 55 |         self.auto = auto
 56 |         self.interval = interval
 57 |         self.fpath = os.path.abspath(fpath)
 58 |         self.stop_requested = threading.Event()
 59 |         threading.Thread.__init__(self)
 60 |     
 61 |     def run(self):
 62 |         while not self.stop_requested.isSet():
 63 |             time.sleep(self.interval)
 64 |             if self.auto or not os.path.isfile(self.fpath):
 65 |                 self.stacktraces()
 66 |     
 67 |     def stop(self):
 68 |         self.stop_requested.set()
 69 |         self.join()
 70 |         try:
 71 |             if os.path.isfile(self.fpath):
 72 |                 os.unlink(self.fpath)
 73 |         except:
 74 |             pass
 75 |     
 76 |     def stacktraces(self):
 77 |         fout = open(self.fpath,"w")
 78 |         try:
 79 |             fout.write(stacktraces())
 80 |         finally:
 81 |             fout.close()
 82 | 
 83 | 
 84 | _tracer = None
 85 | def trace_start(fpath,interval=5,auto=True):
 86 |     """Start tracing into the given file."""
 87 |     global _tracer
 88 |     if _tracer is None:
 89 |         _tracer = TraceDumper(fpath,interval,auto)
 90 |         _tracer.setDaemon(True)
 91 |         _tracer.start()
 92 |     else:
 93 |         raise Exception("Already tracing to %s"%_tracer.fpath)
 94 | 
 95 | def trace_stop():
 96 |     """Stop tracing."""
 97 |     global _tracer
 98 |     if _tracer is None:
 99 |         raise Exception("Not tracing, cannot stop.")
100 |     else:
101 |         _trace.stop()
102 |         _trace = None


--------------------------------------------------------------------------------
/src/chess_zero/worker/evaluate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from concurrent.futures import ProcessPoolExecutor, as_completed
  3 | from logging import getLogger
  4 | from multiprocessing import Manager
  5 | from time import sleep
  6 | 
  7 | from chess_zero.agent.model_chess import ChessModel
  8 | from chess_zero.agent.player_chess import ChessPlayer
  9 | from chess_zero.config import Config
 10 | from chess_zero.env.chess_env import ChessEnv, Winner
 11 | from chess_zero.lib.data_helper import get_next_generation_model_dirs, pretty_print
 12 | from chess_zero.lib.model_helper import save_as_best_model, load_best_model_weight
 13 | 
 14 | logger = getLogger(__name__)
 15 | 
 16 | def start(config: Config):
 17 |     #tf_util.set_session_config(config.play.vram_frac)
 18 |     return EvaluateWorker(config).start()
 19 | 
 20 | class EvaluateWorker:
 21 |     def __init__(self, config: Config):
 22 |         """
 23 |         :param config:
 24 |         """
 25 |         self.config = config
 26 |         self.play_config = config.eval.play_config
 27 |         self.current_model = self.load_current_model()
 28 |         self.m = Manager()
 29 |         self.cur_pipes = self.m.list([self.current_model.get_pipes(self.play_config.search_threads) for _ in range(self.play_config.max_processes)])
 30 | 
 31 |     def start(self):
 32 |         while True:
 33 |             ng_model, model_dir = self.load_next_generation_model()
 34 |             logger.debug(f"start evaluate model {model_dir}")
 35 |             ng_is_great = self.evaluate_model(ng_model)
 36 |             if ng_is_great:
 37 |                 logger.debug(f"New Model become best model: {model_dir}")
 38 |                 save_as_best_model(ng_model)
 39 |                 self.current_model = ng_model
 40 |             self.move_model(model_dir) # i lost my models because of this :(
 41 | 
 42 |     def evaluate_model(self, ng_model):
 43 |         ng_pipes = self.m.list([ng_model.get_pipes(self.play_config.search_threads) for _ in range(self.play_config.max_processes)])
 44 | 
 45 |         futures = []
 46 |         with ProcessPoolExecutor(max_workers=self.play_config.max_processes) as executor:
 47 |             for game_idx in range(self.config.eval.game_num):
 48 |                 fut = executor.submit(play_game, self.config, cur=self.cur_pipes, ng=ng_pipes, current_white=(game_idx % 2 == 0))
 49 |                 futures.append(fut)
 50 | 
 51 |             results = []
 52 |             for fut in as_completed(futures):
 53 |                 # ng_score := if ng_model win -> 1, lose -> 0, draw -> 0.5
 54 |                 ng_score, env, current_white = fut.result()
 55 |                 results.append(ng_score)
 56 |                 win_rate = sum(results) / len(results)
 57 |                 game_idx = len(results)
 58 |                 logger.debug(f"game {game_idx:3}: ng_score={ng_score:.1f} as {'black' if current_white else 'white'} "
 59 |                              f"{'by resign ' if env.resigned else '          '}"
 60 |                              f"win_rate={win_rate*100:5.1f}% "
 61 |                              f"{env.board.fen().split(' ')[0]}")
 62 | 
 63 |                 colors = ("current_model", "ng_model")
 64 |                 if not current_white:
 65 |                     colors = reversed(colors)
 66 |                 pretty_print(env, colors)
 67 | 
 68 |                 if len(results)-sum(results) >= self.config.eval.game_num * (1-self.config.eval.replace_rate):
 69 |                     logger.debug(f"lose count reach {results.count(0)} so give up challenge")
 70 |                     return False
 71 |                 if sum(results) >= self.config.eval.game_num * self.config.eval.replace_rate:
 72 |                     logger.debug(f"win count reach {results.count(1)} so change best model")
 73 |                     return True
 74 | 
 75 |         win_rate = sum(results) / len(results)
 76 |         logger.debug(f"winning rate {win_rate*100:.1f}%")
 77 |         return win_rate >= self.config.eval.replace_rate
 78 | 
 79 |     def move_model(self, model_dir):
 80 |         rc = self.config.resource
 81 |         # config_path = os.path.join(model_dir, rc.next_generation_model_config_filename)
 82 |         # weight_path = os.path.join(model_dir, rc.next_generation_model_weight_filename)
 83 |         # os.remove(config_path)
 84 |         # os.remove(weight_path)
 85 |         new_dir = os.path.join(rc.next_generation_model_dir, "copies", model_dir.name)
 86 |         os.rename(model_dir, new_dir)
 87 | 
 88 |     def load_current_model(self):
 89 |         model = ChessModel(self.config)
 90 |         load_best_model_weight(model)
 91 |         return model
 92 | 
 93 |     def load_next_generation_model(self):
 94 |         rc = self.config.resource
 95 |         while True:
 96 |             dirs = get_next_generation_model_dirs(self.config.resource)
 97 |             if dirs:
 98 |                 break
 99 |             logger.info("There is no next generation model to evaluate")
100 |             sleep(60)
101 |         model_dir = dirs[-1] if self.config.eval.evaluate_latest_first else dirs[0]
102 |         config_path = os.path.join(model_dir, rc.next_generation_model_config_filename)
103 |         weight_path = os.path.join(model_dir, rc.next_generation_model_weight_filename)
104 |         model = ChessModel(self.config)
105 |         model.load(config_path, weight_path)
106 |         return model, model_dir
107 | 
108 | def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
109 |     cur_pipes = cur.pop()
110 |     ng_pipes = ng.pop()
111 |     env = ChessEnv().reset()
112 | 
113 |     current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config)
114 |     ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config)
115 |     if current_white:
116 |         white, black = current_player, ng_player
117 |     else:
118 |         white, black = ng_player, current_player
119 | 
120 |     while not env.done:
121 |         if env.white_to_move:
122 |             action = white.action(env)
123 |         else:
124 |             action = black.action(env)
125 |         env.step(action)
126 |         if env.num_halfmoves >= config.eval.max_game_length:
127 |             env.adjudicate()
128 | 
129 |     if env.winner == Winner.draw:
130 |         ng_score = 0.5
131 |     elif env.white_won == current_white:
132 |         ng_score = 0
133 |     else:
134 |         ng_score = 1
135 |     cur.append(cur_pipes)
136 |     ng.append(ng_pipes)
137 |     return ng_score, env, current_white


--------------------------------------------------------------------------------
/src/chess_zero/worker/optimize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import deque
  3 | from concurrent.futures import ProcessPoolExecutor
  4 | from datetime import datetime
  5 | from logging import getLogger
  6 | from time import sleep
  7 | from random import shuffle
  8 | 
  9 | import numpy as np
 10 | 
 11 | from chess_zero.agent.model_chess import ChessModel
 12 | from chess_zero.config import Config
 13 | from chess_zero.env.chess_env import canon_input_planes, is_black_turn, testeval
 14 | from chess_zero.lib.data_helper import get_game_data_filenames, read_game_data_from_file, get_next_generation_model_dirs
 15 | from chess_zero.lib.model_helper import load_best_model_weight
 16 | 
 17 | from keras.optimizers import Adam
 18 | from keras.callbacks import TensorBoard
 19 | logger = getLogger(__name__)
 20 | 
 21 | 
 22 | def start(config: Config):
 23 |     #tf_util.set_session_config(config.trainer.vram_frac)
 24 |     return OptimizeWorker(config).start()
 25 | 
 26 | 
 27 | class OptimizeWorker:
 28 |     def __init__(self, config: Config):
 29 |         self.config = config
 30 |         self.model = None  # type: ChessModel
 31 |         self.loaded_filenames = set()
 32 |         self.loaded_data = deque(maxlen=self.config.trainer.dataset_size) # this should just be a ring buffer i.e. queue of length 500,000 in AZ
 33 |         self.dataset = deque(),deque(),deque()
 34 |         self.executor = ProcessPoolExecutor(max_workers=config.trainer.cleaning_processes)
 35 | 
 36 |     def start(self):
 37 |         self.model = self.load_model()
 38 |         self.training()
 39 | 
 40 |     def training(self):
 41 |         self.compile_model()
 42 |         self.filenames = deque(get_game_data_filenames(self.config.resource))
 43 |         shuffle(self.filenames)
 44 |         last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps
 45 | 
 46 |         while True:
 47 |             self.fill_queue()
 48 |             # if self.dataset_size < self.config.trainer.min_data_size_to_learn:
 49 |             #     logger.info(f"dataset_size={self.dataset_size} is less than {self.config.trainer.min_data_size_to_learn}")
 50 |             #     sleep(60)
 51 |             #     self.fill_queue()
 52 |             #     continue
 53 |             #self.update_learning_rate(total_steps)
 54 |             steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint)
 55 |             total_steps += steps
 56 |             #if last_save_step + self.config.trainer.save_model_steps < total_steps:
 57 |             self.save_current_model()
 58 |             last_save_step = total_steps
 59 |             a,b,c=self.dataset
 60 |             while len(a) > self.config.trainer.dataset_size/2:
 61 |                 a.popleft()
 62 |                 b.popleft()
 63 |                 c.popleft()
 64 |             # if last_load_data_step + self.config.trainer.load_data_steps < total_steps:
 65 |             #     self.fill_queue()
 66 |             #     last_load_data_step = total_steps
 67 | 
 68 |     def train_epoch(self, epochs):
 69 |         tc = self.config.trainer
 70 |         state_ary, policy_ary, value_ary = self.collect_all_loaded_data()
 71 |         tensorboard_cb = TensorBoard(log_dir="./logs", batch_size=tc.batch_size, histogram_freq=1)
 72 |         self.model.model.fit(state_ary, [policy_ary, value_ary],
 73 |                              batch_size=tc.batch_size,
 74 |                              epochs=epochs,
 75 |                              shuffle=True,
 76 |                              validation_split=0.02,
 77 |                              callbacks=[tensorboard_cb])
 78 |         steps = (state_ary.shape[0] // tc.batch_size) * epochs
 79 |         return steps
 80 | 
 81 |     def compile_model(self):
 82 |         opt = Adam() #SGD(lr=2e-1, momentum=0.9) # Adam better?
 83 |         losses = ['categorical_crossentropy', 'mean_squared_error'] # avoid overfit for supervised 
 84 |         self.model.model.compile(optimizer=opt, loss=losses, loss_weights=self.config.trainer.loss_weights)
 85 | 
 86 |     def save_current_model(self):
 87 |         rc = self.config.resource
 88 |         model_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
 89 |         model_dir = os.path.join(rc.next_generation_model_dir, rc.next_generation_model_dirname_tmpl % model_id)
 90 |         os.makedirs(model_dir, exist_ok=True)
 91 |         config_path = os.path.join(model_dir, rc.next_generation_model_config_filename)
 92 |         weight_path = os.path.join(model_dir, rc.next_generation_model_weight_filename)
 93 |         self.model.save(config_path, weight_path)
 94 | 
 95 |     def fill_queue(self):
 96 |         futures = deque()
 97 |         with ProcessPoolExecutor(max_workers=self.config.trainer.cleaning_processes) as executor:
 98 |             for _ in range(self.config.trainer.cleaning_processes):
 99 |                 if len(self.filenames) == 0:
100 |                     break
101 |                 filename = self.filenames.popleft()
102 |                 logger.debug(f"loading data from {filename}")
103 |                 futures.append(executor.submit(load_data_from_file,filename))
104 |             while futures and len(self.dataset[0]) < self.config.trainer.dataset_size:
105 |                 for x,y in zip(self.dataset,futures.popleft().result()):
106 |                     x.extend(y)
107 |                 if len(self.filenames) > 0:
108 |                     filename = self.filenames.popleft()
109 |                     logger.debug(f"loading data from {filename}")
110 |                     futures.append(executor.submit(load_data_from_file,filename))
111 | 
112 |     def collect_all_loaded_data(self):
113 |         state_ary,policy_ary,value_ary=self.dataset
114 | 
115 |         state_ary1 = np.asarray(state_ary, dtype=np.float32)
116 |         policy_ary1 = np.asarray(policy_ary, dtype=np.float32)
117 |         value_ary1 = np.asarray(value_ary, dtype=np.float32)
118 |         return state_ary1, policy_ary1, value_ary1
119 | 
120 | 
121 |     def load_model(self):
122 |         model = ChessModel(self.config)
123 |         rc = self.config.resource
124 | 
125 |         dirs = get_next_generation_model_dirs(rc)
126 |         if not dirs:
127 |             logger.debug("loading best model")
128 |             if not load_best_model_weight(model):
129 |                 raise RuntimeError("Best model can not loaded!")
130 |         else:
131 |             latest_dir = dirs[-1]
132 |             logger.debug("loading latest model")
133 |             config_path = os.path.join(latest_dir, rc.next_generation_model_config_filename)
134 |             weight_path = os.path.join(latest_dir, rc.next_generation_model_weight_filename)
135 |             model.load(config_path, weight_path)
136 |         return model
137 |     # def unload_data_of_file(self, filename):
138 |     #     logger.debug(f"removing data about {filename} from training set")
139 |     #     self.loaded_filenames.remove(filename)
140 |     #     if filename in self.loaded_data:
141 |     #         del self.loaded_data[filename]
142 | 
143 | def load_data_from_file(filename):
144 |     data = read_game_data_from_file(filename)
145 |     return convert_to_cheating_data(data) ### HERE, use with SL
146 | 
147 | 
148 | def convert_to_cheating_data(data):
149 |     """
150 |     :param data: format is SelfPlayWorker.buffer
151 |     :return:
152 |     """
153 |     state_list = []
154 |     policy_list = []
155 |     value_list = []
156 |     for state_fen, policy, value in data:
157 | 
158 |         state_planes = canon_input_planes(state_fen)
159 |         #assert check_current_planes(state_fen, state_planes)
160 | 
161 |         if is_black_turn(state_fen):
162 |             policy = Config.flip_policy(policy)
163 | 
164 |         # assert len(policy) == 1968
165 |         # assert state_planes.dtype == np.float32
166 |         # assert state_planes.shape == (18, 8, 8) #print(state_planes.shape)
167 | 
168 |         move_number = int(state_fen.split(' ')[5])
169 |         value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster
170 |         sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty)
171 | 
172 |         state_list.append(state_planes)
173 |         policy_list.append(policy)
174 |         value_list.append(sl_value)
175 | 
176 |     return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)


--------------------------------------------------------------------------------
/src/chess_zero/worker/self_play.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import deque
  3 | from concurrent.futures import ProcessPoolExecutor
  4 | from datetime import datetime
  5 | from logging import getLogger
  6 | from multiprocessing import Manager
  7 | from threading import Thread
  8 | from time import time
  9 | 
 10 | from chess_zero.agent.model_chess import ChessModel
 11 | from chess_zero.agent.player_chess import ChessPlayer
 12 | from chess_zero.config import Config
 13 | from chess_zero.env.chess_env import ChessEnv, Winner
 14 | from chess_zero.lib.data_helper import get_game_data_filenames, write_game_data_to_file, pretty_print
 15 | from chess_zero.lib.model_helper import load_best_model_weight, save_as_best_model, \
 16 |     reload_best_model_weight_if_changed
 17 | 
 18 | logger = getLogger(__name__)
 19 | 
 20 | def start(config: Config):
 21 |     return SelfPlayWorker(config).start()
 22 | 
 23 | 
 24 | # noinspection PyAttributeOutsideInit
 25 | class SelfPlayWorker:
 26 |     def __init__(self, config: Config):
 27 |         """
 28 |         :param config:
 29 |         """
 30 |         self.config = config
 31 |         self.current_model = self.load_model()
 32 |         self.m = Manager()
 33 |         self.cur_pipes = self.m.list([self.current_model.get_pipes(self.config.play.search_threads) for _ in range(self.config.play.max_processes)])
 34 | 
 35 |     def start(self):
 36 |         self.buffer = []
 37 | 
 38 |         futures = deque()
 39 |         with ProcessPoolExecutor(max_workers=self.config.play.max_processes) as executor:
 40 |             for game_idx in range(self.config.play.max_processes):
 41 |                 futures.append(executor.submit(self_play_buffer, self.config, cur=self.cur_pipes))
 42 |             game_idx = 0
 43 |             while True:
 44 |                 game_idx += 1
 45 |                 start_time = time()
 46 |                 env, data = futures.popleft().result()
 47 |                 print(f"game {game_idx:3} time={time() - start_time:5.1f}s "
 48 |                     f"halfmoves={env.num_halfmoves:3} {env.winner:12} "
 49 |                     f"{'by resign ' if env.resigned else '          '}")
 50 | 
 51 |                 pretty_print(env, ("current_model", "current_model"))
 52 |                 self.buffer += data
 53 |                 if (game_idx % self.config.play_data.nb_game_in_file) == 0:
 54 |                     self.flush_buffer()
 55 |                     reload_best_model_weight_if_changed(self.current_model)
 56 |                 futures.append(executor.submit(self_play_buffer, self.config, cur=self.cur_pipes)) # Keep it going
 57 | 
 58 |         if len(data) > 0:
 59 |             self.flush_buffer()
 60 | 
 61 |     def load_model(self):
 62 |         model = ChessModel(self.config)
 63 |         if self.config.opts.new or not load_best_model_weight(model):
 64 |             model.build()
 65 |             save_as_best_model(model)
 66 |         return model
 67 | 
 68 |     def flush_buffer(self):
 69 |         rc = self.config.resource
 70 |         game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
 71 |         path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
 72 |         logger.info(f"save play data to {path}")
 73 |         thread = Thread(target = write_game_data_to_file, args=(path, self.buffer))
 74 |         thread.start()
 75 |         self.buffer = []
 76 | 
 77 |     def remove_play_data(self):
 78 |         return
 79 |         files = get_game_data_filenames(self.config.resource)
 80 |         if len(files) < self.config.play_data.max_file_num:
 81 |             return
 82 |         for i in range(len(files) - self.config.play_data.max_file_num):
 83 |             os.remove(files[i])
 84 | 
 85 | 
 86 | def self_play_buffer(config, cur) -> (ChessEnv, list):
 87 |     pipes = cur.pop() # borrow
 88 |     env = ChessEnv().reset()
 89 | 
 90 |     white = ChessPlayer(config, pipes=pipes)
 91 |     black = ChessPlayer(config, pipes=pipes)
 92 | 
 93 |     while not env.done:
 94 |         if env.white_to_move:
 95 |             action = white.action(env)
 96 |         else:
 97 |             action = black.action(env)
 98 |         env.step(action)
 99 |         if env.num_halfmoves >= config.play.max_game_length:
100 |             env.adjudicate()
101 | 
102 |     if env.winner == Winner.white:
103 |         black_win = -1
104 |     elif env.winner == Winner.black:
105 |         black_win = 1
106 |     else:
107 |         black_win = 0
108 | 
109 |     black.finish_game(black_win)
110 |     white.finish_game(-black_win)
111 | 
112 |     data = []
113 |     for i in range(len(white.moves)):
114 |         data.append(white.moves[i])
115 |         if i < len(black.moves):
116 |             data.append(black.moves[i])
117 | 
118 |     cur.append(pipes)
119 |     return env, data


--------------------------------------------------------------------------------
/src/chess_zero/worker/sl.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from concurrent.futures import ProcessPoolExecutor, as_completed
  4 | from datetime import datetime
  5 | from logging import getLogger
  6 | from threading import Thread
  7 | from time import time
  8 | 
  9 | import chess.pgn
 10 | 
 11 | from chess_zero.agent.player_chess import ChessPlayer
 12 | from chess_zero.config import Config
 13 | from chess_zero.env.chess_env import ChessEnv, Winner
 14 | from chess_zero.lib.data_helper import write_game_data_to_file, find_pgn_files
 15 | 
 16 | logger = getLogger(__name__)
 17 | 
 18 | TAG_REGEX = re.compile(r"^\[([A-Za-z0-9_]+)\s+\"(.*)\"\]\s*$")
 19 | 
 20 | 
 21 | def start(config: Config):
 22 |     return SupervisedLearningWorker(config).start()
 23 | 
 24 | 
 25 | class SupervisedLearningWorker:
 26 |     def __init__(self, config: Config):
 27 |         """
 28 |         :param config:
 29 |         """
 30 |         self.config = config
 31 |         self.buffer = []
 32 | 
 33 |     def start(self):
 34 |         self.buffer = []
 35 |         # noinspection PyAttributeOutsideInit
 36 |         self.idx = 0
 37 |         start_time = time()
 38 |         with ProcessPoolExecutor(max_workers=7) as executor:
 39 |             games = self.get_games_from_all_files()
 40 |             for res in as_completed([executor.submit(get_buffer, self.config, game) for game in games]): #poisoned reference (memleak)
 41 |                 self.idx += 1
 42 |                 env, data = res.result()
 43 |                 self.save_data(data)
 44 |                 end_time = time()
 45 |                 logger.debug(f"game {self.idx:4} time={(end_time - start_time):.3f}s "
 46 |                              f"halfmoves={env.num_halfmoves:3} {env.winner:12}"
 47 |                              f"{' by resign ' if env.resigned else '           '}"
 48 |                              f"{env.observation.split(' ')[0]}")
 49 |                 start_time = end_time
 50 | 
 51 |         if len(self.buffer) > 0:
 52 |             self.flush_buffer()
 53 | 
 54 |     def get_games_from_all_files(self):
 55 |         files = find_pgn_files(self.config.resource.play_data_dir)
 56 |         print (files)
 57 |         games = []
 58 |         for filename in files:
 59 |             games.extend(get_games_from_file(filename))
 60 |         print("done reading")
 61 |         return games
 62 | 
 63 |     def save_data(self, data):
 64 |         self.buffer += data
 65 |         if self.idx % self.config.play_data.sl_nb_game_in_file == 0:
 66 |             self.flush_buffer()
 67 | 
 68 |     def flush_buffer(self):
 69 |         rc = self.config.resource
 70 |         game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
 71 |         path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
 72 |         logger.info(f"save play data to {path}")
 73 |         thread = Thread(target = write_game_data_to_file, args=(path, self.buffer))
 74 |         thread.start()
 75 |         self.buffer = []
 76 | 
 77 | def get_games_from_file(filename):
 78 |     pgn = open(filename, errors='ignore')
 79 |     offsets = list(chess.pgn.scan_offsets(pgn))
 80 |     n = len(offsets)
 81 |     print(f"found {n} games")
 82 |     games = []
 83 |     for offset in offsets:
 84 |         pgn.seek(offset)
 85 |         games.append(chess.pgn.read_game(pgn))
 86 |     return games
 87 | 
 88 | def clip_elo_policy(config, elo):
 89 |     return min(1, max(0, elo - config.play_data.min_elo_policy) / config.play_data.max_elo_policy)
 90 |     # 0 until min_elo, 1 after max_elo, linear in between
 91 | 
 92 | def get_buffer(config, game) -> (ChessEnv, list):
 93 |     env = ChessEnv().reset()
 94 |     white = ChessPlayer(config, dummy = True)
 95 |     black = ChessPlayer(config, dummy = True)
 96 |     result = game.headers["Result"]
 97 |     white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
 98 |     white_weight = clip_elo_policy(config, white_elo)
 99 |     black_weight = clip_elo_policy(config, black_elo)
100 |     
101 |     actions = []
102 |     while not game.is_end():
103 |         game = game.variation(0)
104 |         actions.append(game.move.uci())
105 |     k = 0
106 |     while not env.done and k < len(actions):
107 |         if env.white_to_move:
108 |             action = white.sl_action(env.observation, actions[k], weight= white_weight) #ignore=True
109 |         else:
110 |             action = black.sl_action(env.observation, actions[k], weight= black_weight) #ignore=True
111 |         env.step(action, False)
112 |         k += 1
113 | 
114 |     if not env.board.is_game_over() and result != '1/2-1/2':
115 |         env.resigned = True
116 |     if result == '1-0':
117 |         env.winner = Winner.white
118 |         black_win = -1
119 |     elif result == '0-1':
120 |         env.winner = Winner.black
121 |         black_win = 1
122 |     else:
123 |         env.winner = Winner.draw
124 |         black_win = 0
125 | 
126 |     black.finish_game(black_win)
127 |     white.finish_game(-black_win)
128 | 
129 |     data = []
130 |     for i in range(len(white.moves)):
131 |         data.append(white.moves[i])
132 |         if i < len(black.moves):
133 |             data.append(black.moves[i])
134 | 
135 |     return env, data


--------------------------------------------------------------------------------