├── .circleci
    ├── Dockerfile
    └── config.yml
├── .clang-format
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── COPYING
├── FLAGS.md
├── README.md
├── appveyor.yml
├── build-cl.cmd
├── build-cuda.cmd
├── build.sh
├── changelog.txt
├── checkdir.py
├── dist
    └── README-cuda.txt
├── meson.build
├── meson_options.txt
├── scripts
    └── bumpversion.py
├── src
    ├── benchmark
    │   ├── benchmark.cc
    │   └── benchmark.h
    ├── chess
    │   ├── bitboard.cc
    │   ├── bitboard.h
    │   ├── board.cc
    │   ├── board.h
    │   ├── board_test.cc
    │   ├── callbacks.h
    │   ├── position.cc
    │   ├── position.h
    │   ├── position_test.cc
    │   ├── uciloop.cc
    │   └── uciloop.h
    ├── engine.cc
    ├── engine.h
    ├── main.cc
    ├── mcts
    │   ├── auxengine.cc
    │   ├── node.cc
    │   ├── node.h
    │   ├── params.cc
    │   ├── params.h
    │   ├── search.cc
    │   └── search.h
    ├── neural
    │   ├── blas
    │   │   ├── README.md
    │   │   ├── blas.h
    │   │   ├── convolution1.cc
    │   │   ├── convolution1.h
    │   │   ├── fully_connected_layer.cc
    │   │   ├── fully_connected_layer.h
    │   │   ├── network_blas.cc
    │   │   ├── se_unit.cc
    │   │   ├── se_unit.h
    │   │   ├── winograd_convolution3.cc
    │   │   ├── winograd_convolution3.h
    │   │   └── winograd_transform.ispc
    │   ├── cache.cc
    │   ├── cache.h
    │   ├── cuda
    │   │   ├── common_kernels.cu
    │   │   ├── cuda_common.h
    │   │   ├── fp16_kernels.cu
    │   │   ├── kernels.h
    │   │   ├── layers.cc
    │   │   ├── layers.h
    │   │   ├── network_cudnn.cc
    │   │   └── readme.txt
    │   ├── encoder.cc
    │   ├── encoder.h
    │   ├── encoder_test.cc
    │   ├── factory.cc
    │   ├── factory.h
    │   ├── loader.cc
    │   ├── loader.h
    │   ├── network.h
    │   ├── network_check.cc
    │   ├── network_demux.cc
    │   ├── network_legacy.cc
    │   ├── network_legacy.h
    │   ├── network_mux.cc
    │   ├── network_random.cc
    │   ├── network_rr.cc
    │   ├── network_st_batch.cc
    │   ├── network_st_batch.h
    │   ├── network_tf.cc
    │   ├── opencl
    │   │   ├── OpenCL.cc
    │   │   ├── OpenCL.h
    │   │   ├── OpenCLBuffers.cc
    │   │   ├── OpenCLBuffers.h
    │   │   ├── OpenCLParams.h
    │   │   ├── OpenCLTuner.cc
    │   │   ├── OpenCLTuner.h
    │   │   ├── README.md
    │   │   ├── clblast_level3
    │   │   │   ├── common.opencl
    │   │   │   ├── xgemm_batched.opencl
    │   │   │   ├── xgemm_part1.opencl
    │   │   │   ├── xgemm_part2.opencl
    │   │   │   ├── xgemm_part3.opencl
    │   │   │   └── xgemv.opencl
    │   │   ├── clsource
    │   │   │   ├── config.opencl
    │   │   │   ├── convolve1.opencl
    │   │   │   ├── convolve3.opencl
    │   │   │   ├── policymap.opencl
    │   │   │   └── se.opencl
    │   │   └── network_opencl.cc
    │   ├── shared
    │   │   ├── activation.cc
    │   │   ├── activation.h
    │   │   ├── policy_map.h
    │   │   ├── winograd_filter.cc
    │   │   └── winograd_filter.h
    │   ├── writer.cc
    │   └── writer.h
    ├── selfplay
    │   ├── game.cc
    │   ├── game.h
    │   ├── loop.cc
    │   ├── loop.h
    │   ├── tournament.cc
    │   └── tournament.h
    ├── syzygy
    │   ├── syzygy.cc
    │   ├── syzygy.h
    │   └── syzygy_test.cc
    ├── utils
    │   ├── bititer.h
    │   ├── cache-old.h
    │   ├── cache.h
    │   ├── commandline.cc
    │   ├── commandline.h
    │   ├── configfile.cc
    │   ├── configfile.h
    │   ├── cppattributes.h
    │   ├── exception.h
    │   ├── fastmath.h
    │   ├── filesystem.h
    │   ├── filesystem.posix.cc
    │   ├── filesystem.win32.cc
    │   ├── hashcat.h
    │   ├── hashcat_test.cc
    │   ├── histogram.cc
    │   ├── histogram.h
    │   ├── logging.cc
    │   ├── logging.h
    │   ├── mutex.h
    │   ├── optional.h
    │   ├── optionsdict.cc
    │   ├── optionsdict.h
    │   ├── optionsparser.cc
    │   ├── optionsparser.h
    │   ├── optionsparser_test.cc
    │   ├── random.cc
    │   ├── random.h
    │   ├── smallarray.h
    │   ├── string.cc
    │   ├── string.h
    │   ├── transpose.cc
    │   ├── transpose.h
    │   ├── weights_adapter.cc
    │   └── weights_adapter.h
    ├── version.cc
    ├── version.h
    └── version.inc
├── subprojects
    ├── gtest.wrap
    ├── protobuf-3.6.0.wrap
    ├── protobuf.wrap
    └── zlib.wrap
├── tensorflow.md
├── third_party
    └── cl2.hpp
└── windows_build.md


/.circleci/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM floopcz/tensorflow_cc:ubuntu-shared-cuda
 2 | 
 3 | RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && apt-get update && apt-get install -y intel-mkl-64bit-2018.2-046
 4 | RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev git ssh tar gzip ca-certificates sudo
 5 | RUN pip3 install meson
 6 | RUN ln -s /usr/include/ /usr/include/openblas
 7 | 
 8 | RUN curl -OL https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip
 9 | RUN unzip protoc-3.5.1-linux-x86_64.zip -d protoc3
10 | RUN sudo mv protoc3/bin/* /usr/local/bin/
11 | RUN sudo mv protoc3/include/* /usr/local/include/
12 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.4
 6 |     steps:
 7 |       - checkout
 8 |       - run:
 9 |           name: "Pull Submodules"
10 |           command: |
11 |             git submodule init
12 |             git submodule update --remote
13 |       - run:
14 |           name: Build clang version
15 |           command: CC=clang-6.0 CXX=clang++-6.0 ./build.sh
16 |       - run:
17 |           command: cp build/release/lc0 /tmp/lc0-clang
18 |       - run:
19 |           name: Build g++ version
20 |           command: ./build.sh
21 |       - run:
22 |           command: cp build/release/lc0 /tmp/lc0-g++
23 |       - store_artifacts:
24 |           path: /tmp/lc0-clang
25 |           destination: lc0-ubuntu-18-04-clang
26 |       - store_artifacts:
27 |           path: /tmp/lc0-g++
28 |           destination: lc0-ubuntu-18-04-g++
29 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | BasedOnStyle: Google
4 | DerivePointerAlignment: false
5 | ...
6 | 
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | testdata/
 3 | LC0VSProj/
 4 | CUDA_NN/
 5 | .DS_Store
 6 | xcuserdata
 7 | subprojects/*
 8 | !subprojects/*.wrap
 9 | lc0.xcodeproj/
10 | *.swp
11 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libs/lczero-common"]
2 | 	path = libs/lczero-common
3 | 	url = https://github.com/LeelaChessZero/lczero-common.git
4 | 


--------------------------------------------------------------------------------
/build-cl.cmd:
--------------------------------------------------------------------------------
 1 | rd /s build
 2 | 
 3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
 4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
 5 | 
 6 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
 7 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
 8 | 
 9 | meson.py build --backend vs2017 --buildtype release ^
10 | -Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^
11 | -Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^
12 | -Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^
13 | -Dopencl_include="C:\Program Files (x86)\AMD APP SDK\3.0\include" ^
14 | -Ddefault_library=static
15 | 
16 | pause
17 | 
18 | cd build
19 | 
20 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
21 | /p:PreferredToolArchitecture=x64 "subprojects\zlib-1.2.11\Windows resource for file 'win32_zlib1.rc'@cus.vcxproj" ^
22 | /filelogger
23 | 
24 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
25 | /p:PreferredToolArchitecture=x64 subprojects\zlib-1.2.11\subprojects@zlib-1.2.11@@z@sta.vcxproj ^
26 | /filelogger
27 | 
28 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
29 | /p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^
30 | /filelogger
31 | 
32 | 


--------------------------------------------------------------------------------
/build-cuda.cmd:
--------------------------------------------------------------------------------
 1 | rd /s build
 2 | 
 3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
 4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
 5 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
 6 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
 7 | meson.py build --backend vs2017 --buildtype release ^
 8 | -Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^
 9 | -Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^
10 | -Ddefault_library=static
11 | 
12 | pause
13 | 
14 | 
15 | cd build
16 | 
17 | %MSBuild%  ^
18 | /p:Configuration=Release ^
19 | /p:Platform=x64 ^
20 | /p:PreferredToolArchitecture=x64 lc0.sln ^
21 | /filelogger
22 | 
23 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | case $1 in
 6 |   plain|debug|debugoptimized|release|minsize)
 7 |     BUILDTYPE=$1
 8 |     shift
 9 |     ;;
10 |   *)
11 |     BUILDTYPE=release
12 |     ;;
13 | esac
14 | 
15 | BUILDDIR=build/${BUILDTYPE}
16 | 
17 | if [ -f ${BUILDDIR}/build.ninja ]
18 | then
19 |   meson configure ${BUILDDIR} -Dbuildtype=${BUILDTYPE} -Dprefix=${INSTALL_PREFIX:-/usr/local} "$@"
20 | else
21 |   meson ${BUILDDIR} --buildtype ${BUILDTYPE} --prefix ${INSTALL_PREFIX:-/usr/local} "$@"
22 | fi
23 | 
24 | pushd ${BUILDDIR}
25 | 
26 | NINJA=$(awk '/ninja/ {ninja=$4} END {print ninja}' meson-logs/meson-log.txt)
27 | 
28 | if [ -n "${INSTALL_PREFIX}" ]
29 | then
30 |   ${NINJA} install
31 | else
32 |   ${NINJA}
33 | fi
34 | 
35 | popd
36 | 


--------------------------------------------------------------------------------
/checkdir.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | 
3 | import sys
4 | import os
5 | if len(sys.argv) > 1 and os.path.isdir(sys.argv[1]):
6 |   exit(0)
7 | exit(1)
8 | 


--------------------------------------------------------------------------------
/dist/README-cuda.txt:
--------------------------------------------------------------------------------
 1 | Lc0
 2 | 
 3 | Lc0 is a UCI-compliant chess engine designed to play chess via
 4 | neural network, specifically those of the LeelaChessZero project
 5 | (https://lczero.org).
 6 | 
 7 | This binary uses CUDA and cuDNN dynamic link libraries copyrighted
 8 | by Nvidia corporation (http://www.nvidia.com), and redistributed as
 9 | permitted by the respective license file (see CUDA.txt section 2.2
10 | and CUDNN.txt section "CUDNN DISTRIBUTION" for details). You are
11 | authorized to redistribute these libraries together with this
12 | package as a whole but not individually.
13 | 
14 | 
15 | License
16 | 
17 | Leela Chess is free software: you can redistribute it and/or modify
18 | it under the terms of the GNU General Public License as published by
19 | the Free Software Foundation, either version 3 of the License, or
20 | (at your option) any later version.
21 | 
22 | Leela Chess is distributed in the hope that it will be useful,
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 | GNU General Public License for more details.
26 | 
27 | You should have received a copy of the GNU General Public License
28 | along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
29 | 
30 | Additional permission under GNU GPL version 3 section 7
31 | 
32 | If you modify this Program, or any covered work, by linking or
33 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
34 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
35 | modified version of those libraries), containing parts covered by the
36 | terms of the respective license agreement, the licensors of this
37 | Program grant you additional permission to convey the resulting work.
38 | 
39 | 


--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
  1 | option('tensorflow_include', 
  2 |        type: 'array',
  3 |        value: ['/usr/local/include/tensorflow/'],
  4 |        description: 'Paths to tensorflow include directories')
  5 | 
  6 | option('protobuf_include',
  7 |        type: 'array',
  8 |        value: ['/usr/local/include/'],
  9 |        description: 'Paths to protobuf include directories')
 10 | 
 11 | option('openblas_include',
 12 |        type: 'array',
 13 |        value: ['/usr/include/openblas/'],
 14 |        description: 'Paths to openblas include directories')
 15 | 
 16 | option('opencl_include',
 17 |        type: 'array',
 18 |        value: ['/usr/include/'],
 19 |        description: 'Paths to OpenCL include directories')
 20 | 
 21 | option('tensorflow_libdir',
 22 |        type: 'array',
 23 |        value: ['/usr/local/lib/tensorflow_cc/'],
 24 |        description: 'Paths to tensorflow libraries')
 25 | 
 26 | option('protobuf_libdir',
 27 |        type: 'array',
 28 |        value: ['/usr/lib/x86_64-linux-gnu/'],
 29 |        description: 'Paths to protobuf libraries')
 30 | 
 31 | option('openblas_libdirs',
 32 |        type: 'array',
 33 |        value: ['/usr/lib/'],
 34 |        description: 'Paths to OpenBLAS libraries')
 35 | 
 36 | option('opencl_libdirs',
 37 |        type: 'array',
 38 |        value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'],
 39 |        description: 'Paths to OpenCL libraries')
 40 | 
 41 | option('cudnn_libdirs',
 42 |        type: 'array',
 43 |        value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'],
 44 |        description: 'Paths to Cuda/cudnn libraries')
 45 | 
 46 | option('mkl_libdirs',
 47 |        type: 'array',
 48 |        value: ['/opt/intel/lib/intel64', '/opt/intel/mkl/lib/intel64', '/opt/intel/mkl/lib'],
 49 |        description: 'Paths to MKL libraries')
 50 | 
 51 | option('mkl_include',
 52 |        type: 'array',
 53 |        value: ['/opt/intel/mkl/include'],
 54 |        description: 'Paths to MKL libraries')
 55 | 
 56 | option('cudnn_include', 
 57 |        type: 'array',
 58 |        value: ['/opt/cuda/include/', '/usr/local/cuda/include/'],
 59 |        description: 'Paths to cudnn include directory')
 60 | 
 61 | option('build_backends',
 62 |        type: 'boolean',
 63 |        value: true,
 64 |        description: 'Build backends for NN computation')
 65 | 
 66 | option('blas',
 67 |        type: 'boolean',
 68 |        value: true,
 69 |        description: 'Enable BLAS backend')
 70 | 
 71 | option('ispc',
 72 |        type: 'boolean',
 73 |        value: true,
 74 |        description: 'use ispc')
 75 | 
 76 | option('ispc_native_only',
 77 |        type: 'boolean',
 78 |        value: true,
 79 |        description: 'use ispc and enable native arch only')
 80 | 
 81 | option('cudnn',
 82 |        type: 'boolean',
 83 |        value: true,
 84 |        description: 'Enable cuDNN backend')
 85 | 
 86 | option('opencl',
 87 |        type: 'boolean',
 88 |        value: true,
 89 |        description: 'Enable OpenCL backend')
 90 | 
 91 | option('tensorflow',
 92 |        type: 'boolean',
 93 |        value: false,
 94 |        description: 'Enable TensorFlow backend')
 95 | 
 96 | option('openblas',
 97 |        type: 'boolean',
 98 |        value: true,
 99 |        description: 'Enable OpenBLAS support')
100 | 
101 | option('mkl',
102 |        type: 'boolean',
103 |        value: true,
104 |        description: 'Enable MKL BLAS support')
105 | 
106 | option('accelerate',
107 |        type: 'boolean',
108 |        value: true,
109 |        description: 'Enable Accelerate BLAS support')
110 | 
111 | option('popcnt',
112 |        type: 'boolean',
113 |        value: true,
114 |        description: 'Use the popcnt instruction')
115 | 
116 | option('pext',
117 |        type: 'boolean',
118 |        value: false,
119 |        description: 'Use the pext instruction')
120 | 
121 | option('gtest',
122 |        type: 'boolean',
123 |        value: true,
124 |        description: 'Build gtest tests')
125 | 
126 | option('protobuf-3-6-0',
127 |        type: 'boolean',
128 |        value: false,
129 |        description: 'Use the protobuf 3.6.0 subproject')
130 | 


--------------------------------------------------------------------------------
/scripts/bumpversion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | 
 6 | 
 7 | VERSION_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../src/version.inc")
 8 | VERSION_CONTENT = """
 9 | #define LC0_VERSION_MAJOR {}
10 | #define LC0_VERSION_MINOR {}
11 | #define LC0_VERSION_PATCH {}
12 | #define LC0_VERSION_POSTFIX "{}"
13 | """
14 | VERSION_CONTENT = VERSION_CONTENT.strip()
15 | 
16 | 
17 | def get_version():
18 |     with open(VERSION_FILE, 'r') as f:
19 |         major = int(f.readline().split()[2])
20 |         minor = int(f.readline().split()[2])
21 |         patch = int(f.readline().split()[2])
22 |         postfix = f.readline().split()[2]
23 | 
24 |     postfix = postfix.replace('"', '')
25 |     return major, minor, patch, postfix
26 | 
27 | 
28 | def set_version(major, minor, patch, postfix=""):
29 |     version_inc = VERSION_CONTENT.format(major, minor, patch, postfix)
30 | 
31 |     with open(VERSION_FILE, 'w') as f:
32 |         f.write(version_inc)
33 | 
34 | 
35 | def update(major, minor, patch, postfix=""):
36 |     set_version(major, minor, patch, postfix)
37 | 
38 | 
39 | def main(argv):
40 |     major, minor, patch, postfix = get_version()
41 | 
42 |     if argv.major:
43 |         major += 1
44 |         minor = 0
45 |         patch = 0
46 |         postfix = ""
47 |         update(major, minor, patch)
48 |     if argv.minor:
49 |         minor += 1
50 |         patch = 0
51 |         postfix = ""
52 |         update(major, minor, patch)
53 |     if argv.patch:
54 |         patch += 1
55 |         postfix = ""
56 |         update(major, minor, patch)
57 |     if argv.postfix and len(argv.postfix) > 0:
58 |         postfix = argv.postfix
59 |         update(major, minor, patch, postfix)
60 | 
61 |     if len(postfix) == 0:
62 |         print('v{}.{}.{}'.format(major, minor, patch))
63 |     else:
64 |         print('v{}.{}.{}-{}'.format(major, minor, patch, postfix))
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     argparser = argparse.ArgumentParser(description=\
69 |             'Set or read current version.')
70 |     argparser.add_argument('--major', action='store_true',
71 |             help='bumps major version')
72 |     argparser.add_argument('--minor', action='store_true',
73 |             help='bumps minor version')
74 |     argparser.add_argument('--patch', action='store_true',
75 |             help='bumps patch')
76 |     argparser.add_argument('--postfix', type=str,
77 |             help='set postfix')
78 |     argv = argparser.parse_args()
79 |     main(argv)
80 | 
81 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmark.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018-2019 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #include "benchmark/benchmark.h"
 29 | #include "mcts/search.h"
 30 | 
 31 | namespace lczero {
 32 | namespace {
 33 | const int kDefaultThreads = 2;
 34 | 
 35 | const OptionId kThreadsOptionId{"threads", "Threads",
 36 |                                 "Number of (CPU) worker threads to use.", 't'};
 37 | const OptionId kNNCacheSizeId{
 38 |     "nncache", "NNCacheSize",
 39 |     "Number of positions to store in a memory cache. A large cache can speed "
 40 |     "up searching, but takes memory."};
 41 | const OptionId kNodesId{"nodes", "", "Number of nodes to run as a benchmark."};
 42 | const OptionId kMovetimeId{"movetime", "",
 43 |                            "Benchmark time allocation, in milliseconds."};
 44 | const OptionId kFenId{"fen", "", "Benchmark initial position FEN."};
 45 | 
 46 | }  // namespace
 47 | 
 48 | void Benchmark::Run() {
 49 |   OptionsParser options;
 50 |   NetworkFactory::PopulateOptions(&options);
 51 |   options.Add<IntOption>(kThreadsOptionId, 1, 128) = kDefaultThreads;
 52 |   options.Add<IntOption>(kNNCacheSizeId, 0, 999999999) = 200000;
 53 |   SearchParams::Populate(&options);
 54 | 
 55 |   options.Add<IntOption>(kNodesId, -1, 999999999) = -1;
 56 |   options.Add<IntOption>(kMovetimeId, -1, 999999999) = 10000;
 57 |   options.Add<StringOption>(kFenId) = ChessBoard::kStartposFen;
 58 | 
 59 |   if (!options.ProcessAllFlags()) return;
 60 | 
 61 |   try {
 62 |     auto option_dict = options.GetOptionsDict();
 63 | 
 64 |     auto network = NetworkFactory::LoadNetwork(option_dict);
 65 | 
 66 |     NodeTree tree;
 67 |     tree.ResetToPosition(option_dict.Get<std::string>(kFenId.GetId()), {});
 68 | 
 69 |     NNCache cache;
 70 |     cache.SetCapacity(option_dict.Get<int>(kNNCacheSizeId.GetId()));
 71 | 
 72 |     const auto start = std::chrono::steady_clock::now();
 73 | 
 74 |     SearchLimits limits;
 75 |     int visits = option_dict.Get<int>(kNodesId.GetId());
 76 |     const int movetime = option_dict.Get<int>(kMovetimeId.GetId());
 77 |     if (movetime > -1) {
 78 |       limits.search_deadline = start + std::chrono::milliseconds(movetime);
 79 |     }
 80 |     if (visits > -1) {
 81 |         limits.visits = visits;
 82 |     }
 83 | 
 84 |     auto search = std::make_unique<Search>(
 85 |         tree, network.get(),
 86 |         std::bind(&Benchmark::OnBestMove, this, std::placeholders::_1),
 87 |         std::bind(&Benchmark::OnInfo, this, std::placeholders::_1), limits,
 88 |         option_dict, &cache, nullptr);
 89 | 
 90 |     search->StartThreads(option_dict.Get<int>(kThreadsOptionId.GetId()));
 91 | 
 92 |     search->Wait();
 93 | 
 94 |     const auto end = std::chrono::steady_clock::now();
 95 |     std::chrono::duration<double> time = end - start;
 96 |     std::cout << "Benchmark final time " << time.count() << "s calculating "
 97 |               << search->GetTotalPlayouts() / time.count()
 98 |               << " nodes per second." << std::endl;
 99 |   } catch (Exception& ex) {
100 |     std::cerr << ex.what() << std::endl;
101 |   }
102 | }
103 | 
104 | void Benchmark::OnBestMove(const BestMoveInfo& move) {
105 |   std::cout << "bestmove " << move.bestmove.as_string() << std::endl;
106 | }
107 | 
108 | void Benchmark::OnInfo(const std::vector<ThinkingInfo>& infos) {
109 |   std::string line = "Benchmark time " + std::to_string(infos[0].time);
110 |   line += "ms, " + std::to_string(infos[0].nodes) + " nodes, ";
111 |   line += std::to_string(infos[0].nps) + " nps";
112 |   if (!infos[0].pv.empty()) line += ", move " + infos[0].pv[0].as_string();
113 |   std::cout << line << std::endl;
114 | }
115 | 
116 | }  // namespace lczero
117 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmark.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include "mcts/search.h"
31 | #include "neural/cache.h"
32 | #include "neural/factory.h"
33 | #include "utils/optionsparser.h"
34 | 
35 | namespace lczero {
36 | 
37 | class Benchmark{
38 |  public:
39 |   Benchmark() = default;
40 | 
41 |   void Run();
42 |   void OnBestMove(const BestMoveInfo& move);
43 |   void OnInfo(const std::vector<ThinkingInfo>& infos);
44 | };
45 | 
46 | }  // namespace lczero
47 | 


--------------------------------------------------------------------------------
/src/chess/callbacks.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <functional>
 31 | #include <string>
 32 | #include <vector>
 33 | #include "chess/bitboard.h"
 34 | #include "chess/position.h"
 35 | #include "utils/optional.h"
 36 | 
 37 | namespace lczero {
 38 | 
 39 | // Is sent when search decides on the best move.
 40 | struct BestMoveInfo {
 41 |   BestMoveInfo(Move bestmove, Move ponder = Move{})
 42 |       : bestmove(bestmove), ponder(ponder) {}
 43 |   Move bestmove;
 44 |   Move ponder;
 45 |   // Those are extensions and not really UCI protocol.
 46 |   // 1 if it's "player1", 2 if it's "player2"
 47 |   int player = -1;
 48 |   // Index of the game in the tournament (0-based).
 49 |   int game_id = -1;
 50 |   // The color of the player, if known.
 51 |   optional<bool> is_black;
 52 | 
 53 |   using Callback = std::function<void(const BestMoveInfo&)>;
 54 | };
 55 | 
 56 | // Is sent during the search.
 57 | struct ThinkingInfo {
 58 |   // Full depth.
 59 |   int depth = -1;
 60 |   // Maximum depth.
 61 |   int seldepth = -1;
 62 |   // Time since start of thinking.
 63 |   int64_t time = -1;
 64 |   // Nodes visited.
 65 |   int64_t nodes = -1;
 66 |   // Nodes per second.
 67 |   int nps = -1;
 68 |   // Hash fullness * 1000
 69 |   int hashfull = -1;
 70 |   // Win in centipawns.
 71 |   optional<int> score;
 72 |   // Number of successful TB probes (not the same as playouts ending in TB hit).
 73 |   int tb_hits = -1;
 74 |   // Best line found. Moves are from perspective of white player.
 75 |   std::vector<Move> pv;
 76 |   // Multipv index.
 77 |   int multipv = -1;
 78 |   // Freeform comment.
 79 |   std::string comment;
 80 | 
 81 |   // Those are extensions and not really UCI protocol.
 82 |   // 1 if it's "player1", 2 if it's "player2"
 83 |   int player = -1;
 84 |   // Index of the game in the tournament (0-based).
 85 |   int game_id = -1;
 86 |   // The color of the player, if known.
 87 |   optional<bool> is_black;
 88 | 
 89 |   using Callback = std::function<void(const std::vector<ThinkingInfo>&)>;
 90 | };
 91 | 
 92 | // Is sent when a single game is finished.
 93 | struct GameInfo {
 94 |   // Game result.
 95 |   GameResult game_result = GameResult::UNDECIDED;
 96 |   // Name of the file with training data.
 97 |   std::string training_filename;
 98 |   // Game moves.
 99 |   std::vector<Move> moves;
100 |   // Index of the game in the tournament (0-based).
101 |   int game_id = -1;
102 |   // The color of the player1, if known.
103 |   optional<bool> is_black;
104 |   // Minimum resign threshold which would have resulted in a false positive
105 |   // if resign had of been enabled.
106 |   // Only provided if the game wasn't played with resign enabled.
107 |   optional<float> min_false_positive_threshold;
108 | 
109 |   using Callback = std::function<void(const GameInfo&)>;
110 | };
111 | 
112 | // Is sent in the end of tournament and also during the tournament.
113 | struct TournamentInfo {
114 |   // Did tournament finish, so those results are final.
115 |   bool finished = false;
116 | 
117 |   // Player1's [win/draw/lose] as [white/black].
118 |   // e.g. results[2][1] is how many times player 1 lost as black.
119 |   int results[3][2] = {{0, 0}, {0, 0}, {0, 0}};
120 |   using Callback = std::function<void(const TournamentInfo&)>;
121 | };
122 | 
123 | }  // namespace lczero
124 | 


--------------------------------------------------------------------------------
/src/chess/uciloop.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <fstream>
31 | #include <string>
32 | #include <unordered_map>
33 | #include <vector>
34 | #include "chess/callbacks.h"
35 | #include "utils/exception.h"
36 | 
37 | namespace lczero {
38 | 
39 | struct GoParams {
40 |   optional<std::int64_t> wtime;
41 |   optional<std::int64_t> btime;
42 |   optional<std::int64_t> winc;
43 |   optional<std::int64_t> binc;
44 |   optional<int> movestogo;
45 |   optional<int> depth;
46 |   optional<int> nodes;
47 |   optional<std::int64_t> movetime;
48 |   bool infinite = false;
49 |   std::vector<std::string> searchmoves;
50 |   bool ponder = false;
51 | };
52 | 
53 | class UciLoop {
54 |  public:
55 |   virtual ~UciLoop() {}
56 |   virtual void RunLoop();
57 | 
58 |   // Sends response to host.
59 |   void SendResponse(const std::string& response);
60 |   // Sends responses to host ensuring they are received as a block.
61 |   virtual void SendResponses(const std::vector<std::string>& responses);
62 |   void SendBestMove(const BestMoveInfo& move);
63 |   void SendInfo(const std::vector<ThinkingInfo>& infos);
64 |   void SendId();
65 | 
66 |   // Command handlers.
67 |   virtual void CmdUci() { throw Exception("Not supported"); }
68 |   virtual void CmdIsReady() { throw Exception("Not supported"); }
69 |   virtual void CmdSetOption(const std::string& /*name*/,
70 |                             const std::string& /*value*/,
71 |                             const std::string& /*context*/) {
72 |     throw Exception("Not supported");
73 |   }
74 |   virtual void CmdUciNewGame() { throw Exception("Not supported"); }
75 |   virtual void CmdPosition(const std::string& /*position*/,
76 |                            const std::vector<std::string>& /*moves*/) {
77 |     throw Exception("Not supported");
78 |   }
79 |   virtual void CmdGo(const GoParams& /*params*/) {
80 |     throw Exception("Not supported");
81 |   }
82 |   virtual void CmdStop() { throw Exception("Not supported"); }
83 |   virtual void CmdPonderHit() { throw Exception("Not supported"); }
84 |   virtual void CmdStart() { throw Exception("Not supported"); }
85 | 
86 |  private:
87 |   bool DispatchCommand(
88 |       const std::string& command,
89 |       const std::unordered_map<std::string, std::string>& params);
90 | };
91 | 
92 | }  // namespace lczero
93 | 


--------------------------------------------------------------------------------
/src/main.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018-2019 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "benchmark/benchmark.h"
29 | #include "chess/board.h"
30 | #include "engine.h"
31 | #include "selfplay/loop.h"
32 | #include "utils/commandline.h"
33 | #include "utils/logging.h"
34 | #include "version.h"
35 | 
36 | int main(int argc, const char** argv) {
37 |   LOGFILE << "Leelafish, based on Lc0, started.";
38 |   CERR << "Leelafish, based on:";
39 |   CERR << "       _";
40 |   CERR << "|   _ | |";
41 |   CERR << "|_ |_ |_| v" << GetVersionStr() << " built " << __DATE__;
42 |   using namespace lczero;
43 | 
44 |   InitializeMagicBitboards();
45 | 
46 |   CommandLine::Init(argc, argv);
47 |   CommandLine::RegisterMode("uci", "(default) Act as UCI engine");
48 |   CommandLine::RegisterMode("selfplay", "Play games with itself");
49 |   CommandLine::RegisterMode("benchmark", "Quick benchmark");
50 | 
51 |   if (CommandLine::ConsumeCommand("selfplay")) {
52 |     // Selfplay mode.
53 |     SelfPlayLoop loop;
54 |     loop.RunLoop();
55 |   } else if (CommandLine::ConsumeCommand("benchmark")) {
56 |     // Benchmark mode.
57 |     Benchmark benchmark;
58 |     benchmark.Run();
59 |   } else {
60 |     // Consuming optional "uci" mode.
61 |     CommandLine::ConsumeCommand("uci");
62 |     // Ordinary UCI engine.
63 |     EngineLoop loop;
64 |     loop.RunLoop();
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/neural/blas/README.md:
--------------------------------------------------------------------------------
 1 | The files in this directory comprise the BLAS backend of Lc0.
 2 | 
 3 | ## License
 4 | 
 5 | Leela Chess is free software: you can redistribute it and/or modify
 6 | it under the terms of the GNU General Public License as published by
 7 | the Free Software Foundation, either version 3 of the License, or
 8 | (at your option) any later version.
 9 | 
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | GNU General Public License for more details.
14 | 
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | **The source files of this directory are not covered by any additional
19 | permission.**
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/src/neural/blas/blas.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | // Select the BLAS vendor based on defines
22 | 
23 | #ifdef USE_MKL
24 | #include <mkl.h>
25 | #else
26 | 
27 | #ifdef USE_OPENBLAS
28 | #include <cblas.h>
29 | 
30 | // Specific openblas routines.
31 | extern "C" {
32 | int openblas_get_num_procs(void);
33 | void openblas_set_num_threads(int num_threads);
34 | char* openblas_get_corename(void);
35 | char* openblas_get_config(void);
36 | }
37 | 
38 | #else
39 | 
40 | #ifdef __APPLE__
41 | #include <Accelerate/Accelerate.h>
42 | #define USE_ACCELERATE
43 | #endif
44 | 
45 | #endif  // USE_OPENBLAS
46 | 
47 | #endif  // USE_MKL
48 | 


--------------------------------------------------------------------------------
/src/neural/blas/convolution1.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include "neural/blas/convolution1.h"
20 | #include "neural/blas/blas.h"
21 | 
22 | namespace lczero {
23 | 
24 | void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
25 |                            const size_t output_channels, const float* input,
26 |                            const float* weights, float* output) {
27 |   for (size_t i = 0; i < batch_size; i++) {
28 |     // C←αAB + βC
29 |     // M Number of rows in matrices A and C.
30 |     // N Number of columns in matrices B and C.
31 |     // K Number of columns in matrix A; number of rows in matrix B.
32 |     // lda The size of the first dimension of matrix A; if you are
33 |     // passing a matrix A[m][n], the value should be m.
34 |     //    cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
35 |     //                ldb, beta, C, N);
36 | 
37 |     //             C                          A                     B
38 |     //
39 |     //           outputs       :=          weights        x      input
40 |     //
41 |     //   cols:  kSquares (N)         input_channels (K)        kSquares(N)
42 |     //
43 |     //   rows:  output_channels (M)   output_channels (M)  input_channels (K)
44 | 
45 |     const float* batch_input = input + i * kSquares * input_channels;
46 |     float* batch_output = output + i * kSquares * output_channels;
47 | 
48 |     cblas_sgemm(CblasRowMajor,         // Row major formar
49 |                 CblasNoTrans,          // A not transposed
50 |                 CblasNoTrans,          // B not transposed
51 |                 (int)output_channels,  // M
52 |                 kSquares,              // N
53 |                 (int)input_channels,   // K
54 |                 1.0f,                  // Alpha
55 |                 weights,               // A
56 |                 (int)input_channels,   // lda, leading rank of A
57 |                 batch_input,           // B
58 |                 kSquares,              // ldb, leading rank of B
59 |                 0.0f,                  // beta
60 |                 batch_output,          // C
61 |                 kSquares);             // ldc, leading rank of B
62 |   }
63 | }
64 | 
65 | }  // namespace lczero
66 | 


--------------------------------------------------------------------------------
/src/neural/blas/convolution1.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | #include <vector>
23 | 
24 | namespace lczero {
25 | 
26 | // Convolution 1x1
27 | class Convolution1 {
28 |  public:
29 |   Convolution1() = delete;
30 | 
31 |   // Batched forward inference.
32 |   static void Forward(const size_t batch_size, const size_t input_channels,
33 |                       const size_t output_channels, const float* input,
34 |                       const float* weights, float* output);
35 | 
36 |  private:
37 |   static constexpr auto kWidth = 8;
38 |   static constexpr auto kHeight = 8;
39 |   static constexpr auto kSquares = kWidth * kHeight;
40 | };
41 | }  // namespace lczero
42 | 


--------------------------------------------------------------------------------
/src/neural/blas/fully_connected_layer.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  This file is part of Leela Chess Zero.
  3 |  Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |  Leela Chess is free software: you can redistribute it and/or modify
  6 |  it under the terms of the GNU General Public License as published by
  7 |  the Free Software Foundation, either version 3 of the License, or
  8 |  (at your option) any later version.
  9 | 
 10 |  Leela Chess is distributed in the hope that it will be useful,
 11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  GNU General Public License for more details.
 14 | 
 15 |  You should have received a copy of the GNU General Public License
 16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "neural/blas/fully_connected_layer.h"
 20 | #include "neural/blas/blas.h"
 21 | 
 22 | #include <algorithm>
 23 | #include <cassert>
 24 | #include <cmath>
 25 | 
 26 | namespace lczero {
 27 | 
 28 | void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
 29 |                                     const size_t output_size,
 30 |                                     const float* inputs, const float* weights,
 31 |                                     const float* biases, bool apply_relu,
 32 |                                     float* outputs) {
 33 |   if (batch_size == 1) {
 34 |     // Just a matrix-vector multiplication
 35 |     //
 36 |     //             C                A                     B
 37 |     //
 38 |     //         outputs    :=     weights      x       inputs
 39 |     //
 40 |     //   cols:   1               input_size            1
 41 |     //
 42 |     //   rows  output_size      output_size          input_size
 43 |     //
 44 | 
 45 |     cblas_sgemv(CblasRowMajor, CblasNoTrans,
 46 |                 // M     K
 47 |                 (int)output_size, (int)input_size, 1.0f, weights,
 48 |                 (int)input_size, inputs, 1, 0.0f, outputs, 1);
 49 |   } else {
 50 |     // more columns, matrix-matrix multiplication
 51 |     //
 52 |     //             C                     A                         B
 53 |     //
 54 |     //            outputs      :=       weights        x         inputs
 55 |     //
 56 |     //   cols:   batch_size (N)       input_size  (K)          batch_size (N)
 57 |     //
 58 |     //   rows  output_size (M)        output_size (M)         input_size (K)
 59 |     //
 60 | 
 61 |     // C←αAB + βC
 62 |     // M Number of rows in matrices A and C.
 63 |     // N Number of columns in matrices B and C.
 64 |     // K Number of columns in matrix A; number of rows in matrix B.
 65 |     // lda The size of the first dimension of matrix A; if you are
 66 |     // passing a matrix A[m][n], the value should be m.
 67 |     //    cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
 68 |     //                ldb, beta, C, N);
 69 | 
 70 |     cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
 71 |                 (int)output_size,   // M
 72 |                 (int)batch_size,    // N
 73 |                 (int)input_size,    // K
 74 |                 1.0f,               // alpha
 75 |                 weights,            // A
 76 |                 (int)input_size,    // lda, leading rank of A
 77 |                 inputs,             // B
 78 |                 (int)input_size,    // ldb, leading rank of B
 79 |                 0.0f,               // beta
 80 |                 outputs,            // C
 81 |                 (int)output_size);  // ldc, leading rank of C
 82 |   }
 83 |   if (apply_relu) {
 84 |     for (size_t i = 0; i < batch_size; i++) {
 85 |       float* batch_outputs = outputs + i * output_size;
 86 |       for (size_t o = 0; o < output_size; o++) {
 87 |         float val = biases[o] + batch_outputs[o];
 88 |         batch_outputs[o] = val >= 0 ? val : 0;
 89 |       }
 90 |     }
 91 |   } else {
 92 |     for (size_t i = 0; i < batch_size; i++) {
 93 |       float* batch_outputs = outputs + i * output_size;
 94 |       for (size_t o = 0; o < output_size; o++) {
 95 |         batch_outputs[o] += biases[o];
 96 |       }
 97 |     }
 98 |   }
 99 | }
100 | 
101 | float FullyConnectedLayer::Forward0D(const size_t size, const float* x,
102 |                                      const float* y) {
103 |   // A scalar product, also known as a dot-product.
104 |   // float cblas_sdot(const int N, const float *X, const int incX, const float
105 |   // *Y,
106 |   // const int incY);
107 |   return cblas_sdot((int)size, x, 1, y, 1);
108 | }
109 | 
110 | }  // namespace lczero
111 | 


--------------------------------------------------------------------------------
/src/neural/blas/fully_connected_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | #include <vector>
23 | 
24 | namespace lczero {
25 | 
26 | class FullyConnectedLayer {
27 |  public:
28 |   FullyConnectedLayer() = delete;
29 | 
30 |   // Forward inference, batched, from input_size to output_size
31 |   static void Forward1D(const size_t batch_size, const size_t input_size,
32 |                         const size_t output_size, const float* input,
33 |                         const float* weights, const float* biases,
34 |                         bool apply_relu, float* output);
35 | 
36 |   // Forward inference, no batched, from input_size to scalar
37 |   static float Forward0D(const size_t input_size, const float* input,
38 |                          const float* weights);
39 | 
40 | };
41 | 
42 | }  // namespace lczero
43 | 


--------------------------------------------------------------------------------
/src/neural/blas/se_unit.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include "neural/blas/se_unit.h"
20 | #include "neural/blas/fully_connected_layer.h"
21 | 
22 | #include <cmath>
23 | 
24 | namespace lczero {
25 | namespace {
26 | constexpr int kWidth = 8;
27 | constexpr int kHeight = 8;
28 | constexpr int kSquares = kWidth * kHeight;
29 | }  // namespace
30 | 
31 | static void global_avg_pooling(const size_t channels, const float* input,
32 |                                float* output) {
33 |   for (auto c = size_t{0}; c < channels; c++) {
34 |     auto acc = 0.0f;
35 |     for (auto i = size_t{0}; i < kSquares; i++) {
36 |       acc += input[c * kSquares + i];
37 |     }
38 |     output[c] = acc / kSquares;
39 |   }
40 | }
41 | 
42 | static void apply_se(const size_t channels, const size_t batch_size,
43 |                      const float* input, const float* res, const float* scale,
44 |                      float* output) {
45 |   const auto lambda_ReLU = [](const auto val) {
46 |     return (val > 0.0f) ? val : 0;
47 |   };
48 | 
49 |   const auto lambda_sigmoid = [](const auto val) {
50 |     return 1.0f / (1.0f + exp(-val));
51 |   };
52 | 
53 |   for (auto c = size_t{0}; c < channels * batch_size; c++) {
54 |     auto batch = c / channels;
55 |     auto gamma = lambda_sigmoid(scale[c + batch * channels]);
56 |     auto beta = scale[c + batch * channels + channels];
57 |     for (auto i = size_t{0}; i < kSquares; i++) {
58 |       output[c * kSquares + i] = lambda_ReLU(gamma * input[c * kSquares + i] +
59 |                                              beta + res[c * kSquares + i]);
60 |     }
61 |   }
62 | }
63 | 
64 | void ApplySEUnit(const size_t batch_size, const size_t channels,
65 |                  const size_t se_fc_outputs, const float* input,
66 |                  const float* residual, const float* weights_w1,
67 |                  const float* weights_b1, const float* weights_w2,
68 |                  const float* weights_b2, float* output) {
69 |   std::vector<float> pool(2 * channels * batch_size);
70 |   std::vector<float> fc_out1(batch_size * se_fc_outputs);
71 | 
72 |   global_avg_pooling(channels * batch_size, input, pool.data());
73 | 
74 |   FullyConnectedLayer::Forward1D(batch_size, channels, se_fc_outputs,
75 |                                  pool.data(), weights_w1, weights_b1,
76 |                                  true,  // Relu On
77 |                                  fc_out1.data());
78 | 
79 |   FullyConnectedLayer::Forward1D(batch_size, se_fc_outputs, 2 * channels,
80 |                                  fc_out1.data(), weights_w2, weights_b2,
81 |                                  false,  // Relu Off
82 |                                  pool.data());
83 | 
84 |   // Sigmoid, scale and add residual
85 |   apply_se(channels, batch_size, input, residual, pool.data(), output);
86 | }
87 | 
88 | }  // namespace lczero
89 | 


--------------------------------------------------------------------------------
/src/neural/blas/se_unit.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | 
23 | namespace lczero {
24 | 
25 | void ApplySEUnit(const size_t batch_size, const size_t channels,
26 |                  const size_t se_fc_outputs, const float* input,
27 |                  const float* residual, const float* weights_w1,
28 |                  const float* weights_b1, const float* weights_w2,
29 |                  const float* weights_b2, float* output);
30 | 
31 | }  // namespace lczero
32 | 


--------------------------------------------------------------------------------
/src/neural/blas/winograd_convolution3.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | #include <vector>
23 | 
24 | namespace lczero {
25 | 
26 | // Convolution 3x3 on a 8x8 board using the Winograd algorithm.
27 | //
28 | // Ref:
29 | //
30 | // Fast Algorithms for Convolutional Neural Networks
31 | // https://arxiv.org/abs/1509.09308
32 | //
33 | // https://ai.intel.com/winograd/
34 | // https://ai.intel.com/winograd-2/
35 | 
36 | // Convolution 3x3 using the Winograd algorithm
37 | class WinogradConvolution3 {
38 |  public:
39 |   // The instance will allocate memory resources for the
40 |   // largest batch size, and the largest input and output
41 |   // layers.
42 |   WinogradConvolution3(const size_t max_batch_size,
43 |                        const size_t max_input_layers,
44 |                        const size_t max_output_layers);
45 | 
46 |   // Forward inference, batched.
47 |   void Forward(const size_t batch_size, const size_t input_channels,
48 |                const size_t output_channels, const float* input,
49 |                const float* weights, float* output);
50 | 
51 |  private:
52 |   void TransformIn(const size_t batch_size, const float* input,
53 |                    const size_t channels);
54 | 
55 |   void Sgemm(const size_t batch_size, const float* weights,
56 |              const size_t input_channels, const size_t output_channels);
57 | 
58 |   void TransformOut(const size_t batch_size, float* output,
59 |                     const size_t channels);
60 | 
61 |   static constexpr auto kWidth = 8;
62 |   static constexpr auto kHeight = 8;
63 |   static constexpr auto kSquares = kWidth * kHeight;
64 | 
65 |   static constexpr auto kWtiles = (kWidth + 1) / 2;  // 4
66 |   static constexpr auto kTiles = kWtiles * kWtiles;  // 16
67 | 
68 |   static constexpr auto kWinogradAlpha = 4;
69 |   static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha;
70 | 
71 |   std::vector<float> V_;
72 |   std::vector<float> M_;
73 | };
74 | }  // namespace lczero
75 | 


--------------------------------------------------------------------------------
/src/neural/cache.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | #include "neural/cache.h"
 28 | #include <cassert>
 29 | #include <iostream>
 30 | 
 31 | namespace lczero {
 32 | CachingComputation::CachingComputation(
 33 |     std::unique_ptr<NetworkComputation> parent, NNCache* cache)
 34 |     : parent_(std::move(parent)), cache_(cache) {}
 35 | 
 36 | int CachingComputation::GetCacheMisses() const {
 37 |   return parent_->GetBatchSize();
 38 | }
 39 | 
 40 | int CachingComputation::GetBatchSize() const { return batch_.size(); }
 41 | 
 42 | bool CachingComputation::AddInputByHash(uint64_t hash) {
 43 |   NNCacheLock lock(cache_, hash);
 44 |   if (!lock) return false;
 45 |   batch_.emplace_back();
 46 |   batch_.back().lock = std::move(lock);
 47 |   batch_.back().hash = hash;
 48 |   return true;
 49 | }
 50 | 
 51 | void CachingComputation::PopCacheHit() {
 52 |   assert(!batch_.empty());
 53 |   assert(batch_.back().lock);
 54 |   assert(batch_.back().idx_in_parent == -1);
 55 |   batch_.pop_back();
 56 | }
 57 | 
 58 | void CachingComputation::AddInput(
 59 |     uint64_t hash, InputPlanes&& input,
 60 |     std::vector<uint16_t>&& probabilities_to_cache) {
 61 |   if (AddInputByHash(hash)) return;
 62 |   batch_.emplace_back();
 63 |   batch_.back().hash = hash;
 64 |   batch_.back().idx_in_parent = parent_->GetBatchSize();
 65 |   batch_.back().probabilities_to_cache = probabilities_to_cache;
 66 |   parent_->AddInput(std::move(input));
 67 | }
 68 | 
 69 | void CachingComputation::PopLastInputHit() {
 70 |   assert(!batch_.empty());
 71 |   assert(batch_.back().idx_in_parent == -1);
 72 |   batch_.pop_back();
 73 | }
 74 | 
 75 | void CachingComputation::ComputeBlocking() {
 76 |   if (parent_->GetBatchSize() == 0) return;
 77 |   parent_->ComputeBlocking();
 78 | 
 79 |   // Fill cache with data from NN.
 80 |   for (const auto& item : batch_) {
 81 |     if (item.idx_in_parent == -1) continue;
 82 |     auto req =
 83 |         std::make_unique<CachedNNRequest>(item.probabilities_to_cache.size());
 84 |     req->q = parent_->GetQVal(item.idx_in_parent);
 85 |     req->d = parent_->GetDVal(item.idx_in_parent);
 86 |     int idx = 0;
 87 |     for (auto x : item.probabilities_to_cache) {
 88 |       req->p[idx++] =
 89 |           std::make_pair(x, parent_->GetPVal(item.idx_in_parent, x));
 90 |     }
 91 |     cache_->Insert(item.hash, std::move(req));
 92 |   }
 93 | }
 94 | 
 95 | float CachingComputation::GetQVal(int sample) const {
 96 |   const auto& item = batch_[sample];
 97 |   if (item.idx_in_parent >= 0) return parent_->GetQVal(item.idx_in_parent);
 98 |   return item.lock->q;
 99 | }
100 | 
101 | float CachingComputation::GetDVal(int sample) const {
102 |   const auto& item = batch_[sample];
103 |   if (item.idx_in_parent >= 0) return parent_->GetDVal(item.idx_in_parent);
104 |   return item.lock->d;
105 | }
106 | 
107 | float CachingComputation::GetPVal(int sample, int move_id) const {
108 |   auto& item = batch_[sample];
109 |   if (item.idx_in_parent >= 0)
110 |     return parent_->GetPVal(item.idx_in_parent, move_id);
111 |   const auto& moves = item.lock->p;
112 | 
113 |   int total_count = 0;
114 |   while (total_count < moves.size()) {
115 |     // Optimization: usually moves are stored in the same order as queried.
116 |     const auto& move = moves[item.last_idx++];
117 |     if (item.last_idx == moves.size()) item.last_idx = 0;
118 |     if (move.first == move_id) return move.second;
119 |     ++total_count;
120 |   }
121 |   assert(false);  // Move not found.
122 |   return 0;
123 | }
124 | 
125 | }  // namespace lczero
126 | 


--------------------------------------------------------------------------------
/src/neural/cache.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | #pragma once
28 | 
29 | #include "neural/network.h"
30 | #include "utils/cache.h"
31 | #include "utils/smallarray.h"
32 | 
33 | namespace lczero {
34 | 
35 | struct CachedNNRequest {
36 |   CachedNNRequest(size_t size) : p(size) {}
37 |   typedef std::pair<uint16_t, float> IdxAndProb;
38 |   float q;
39 |   float d;
40 |   // TODO(mooskagh) Don't really need index if using perfect hash.
41 |   SmallArray<IdxAndProb> p;
42 | };
43 | 
44 | typedef LruCache<uint64_t, CachedNNRequest> NNCache;
45 | typedef LruCacheLock<uint64_t, CachedNNRequest> NNCacheLock;
46 | 
47 | // Wraps around NetworkComputation and caches result.
48 | // While it mostly repeats NetworkComputation interface, it's not derived
49 | // from it, as AddInput() needs hash and index of probabilities to store.
50 | class CachingComputation {
51 |  public:
52 |   CachingComputation(std::unique_ptr<NetworkComputation> parent,
53 |                      NNCache* cache);
54 | 
55 |   // How many inputs are not found in cache and will be forwarded to a wrapped
56 |   // computation.
57 |   int GetCacheMisses() const;
58 |   // Total number of times AddInput/AddInputByHash were (successfully) called.
59 |   int GetBatchSize() const;
60 |   // Adds input by hash only. If that hash is not in cache, returns false
61 |   // and does nothing. Otherwise adds.
62 |   bool AddInputByHash(uint64_t hash);
63 |   // Adds a sample to the batch.
64 |   // @hash is a hash to store/lookup it in the cache.
65 |   // @probabilities_to_cache is which indices of policy head to store.
66 |   void AddInput(uint64_t hash, InputPlanes&& input,
67 |                 std::vector<uint16_t>&& probabilities_to_cache);
68 |   // Undos last AddInput. If it was a cache miss, the it's actually not removed
69 |   // from parent's batch.
70 |   void PopLastInputHit();
71 |   // Do the computation.
72 |   void ComputeBlocking();
73 |   // Returns Q value of @sample.
74 |   float GetQVal(int sample) const;
75 |   // Returns probability of draw if NN has WDL value head
76 |   float GetDVal(int sample) const;
77 |   // Returns P value @move_id of @sample.
78 |   float GetPVal(int sample, int move_id) const;
79 |   // Pops last input from the computation. Only allowed for inputs which were
80 |   // cached.
81 |   void PopCacheHit();
82 | 
83 |  private:
84 |   struct WorkItem {
85 |     uint64_t hash;
86 |     NNCacheLock lock;
87 |     int idx_in_parent = -1;
88 |     std::vector<uint16_t> probabilities_to_cache;
89 |     mutable int last_idx = 0;
90 |   };
91 | 
92 |   std::unique_ptr<NetworkComputation> parent_;
93 |   NNCache* cache_;
94 |   std::vector<WorkItem> batch_;
95 | };
96 | 
97 | }  // namespace lczero
98 | 


--------------------------------------------------------------------------------
/src/neural/cuda/cuda_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include <cuda_runtime.h>
29 | #include <cuda_fp16.h>
30 | #include <cublas_v2.h>
31 | #include <cudnn.h>
32 | 
33 | #include "utils/exception.h"
34 | 
35 | namespace lczero {
36 | namespace cudnn_backend {
37 | 
38 | void CudnnError(cudnnStatus_t status, const char* file, const int& line);
39 | void CublasError(cublasStatus_t status, const char* file, const int& line);
40 | void CudaError(cudaError_t status, const char* file, const int& line);
41 | 
42 | #define ReportCUDNNErrors(status) CudnnError(status, __FILE__, __LINE__)
43 | #define ReportCUBLASErrors(status) CublasError(status, __FILE__, __LINE__)
44 | #define ReportCUDAErrors(status) CudaError(status, __FILE__, __LINE__)
45 | 
46 | inline int DivUp(int a, int b) { return (a + b - 1) / b; }
47 | 
48 | }  // namespace cudnn_backend
49 | }  // namespace lczero
50 | 


--------------------------------------------------------------------------------
/src/neural/cuda/kernels.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018-2019 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | namespace lczero {
29 | namespace cudnn_backend {
30 | 
31 | // Adds two vectors (possibly of different sizes), also do optional
32 | // activation (relu, tanh or sigmoid).
33 | template <typename T>
34 | void addVectors(T* c, T* a, T* b, int size, int asize, int bsize, bool relu,
35 |                 bool use_tanh, bool use_sigmoid);
36 | 
37 | // Add bias to convolution's output.
38 | template <typename T>
39 | void addBias_NCHW(T* c, T* a, T* b, int N, int C, int H, int W);
40 | 
41 | // Conversion from: fp32 -> fp16 datatype, and NCHW -> NHWC layout.
42 | // Cudnn kernels work best with NCHW layout for fp32, and with NHWC for fp16.
43 | void fp32NCHWtofp16NHWC(half* output_tensor, float* input_tensor, int Nin,
44 |                         int Cin, int Nout, int Cout, int H, int W);
45 | 
46 | // Plain data-type conversion (no layout conversion).
47 | template <typename DstType, typename SrcType>
48 | void copyTypeConverted(DstType* op, SrcType* ip, int N);
49 | 
50 | // Perform batch normilization.
51 | template <typename T>
52 | void batchNorm(T* output, const T* input, const T* skipInput, int N, int C,
53 |                int H, int W, float* means, float* var_multipliers, bool relu);
54 | 
55 | // Unpack planes (input to network).
56 | void expandPlanes_Fp32_NCHW(float* output, const uint64_t* masks,
57 |                             const float* values, int n);
58 | 
59 | void expandPlanes_Fp16_NHWC(half* output, const uint64_t* masks,
60 |                             const float* values, int n);
61 | 
62 | // Perform global avg pool.
63 | template <typename T>
64 | void globalAvgPool(int N, int C, T* output, const T* input,
65 |                    const T* prevLayerBias);
66 | 
67 | // Perform global scale.
68 | template <typename T>
69 | void globalScale(int N, int C, T* output, const T* input, const T* scaleBias,
70 |                  const T* prevLayerBias);
71 | 
72 | // Perform Squeeze-and-Excitation (SE) in a single fused kernel.
73 | // Returns false if the fused kernel can't handle the sizes.
74 | bool Se_Fp16_NHWC(int N, int C, int numFc1Out, half* output, const half* skip,
75 |                   const half* input, const half* w1, const half* b1,
76 |                   const half* w2, const half* b2, const half* bPrev);
77 | 
78 | template <typename T>
79 | void PolicyMap(int N, T* output, const T* input, const short* indices,
80 |                int inputSize, int usedSize, int outputSize);
81 | 
82 | }  // namespace cudnn_backend
83 | }  // namespace lczero
84 | 


--------------------------------------------------------------------------------
/src/neural/cuda/readme.txt:
--------------------------------------------------------------------------------
 1 | cuda/cudnn backend for lc0. Here is a brief description of various files:
 2 | 
 3 | 1. network_cudnn.cc -> cpp file containing network, computation, etc stuff related to lc0
 4 | 2. layers.cc -> cpp files containing layer classes
 5 | 3. layers.h -> header file for layer classes.
 6 | 4. kernels.h -> header file for cuda kernels
 7 | 5. common_kernels.cu -> common kernels (fp32, and fp16 that can work with old GPUs)
 8 | 6. fp16_kernels.cu -> fp16 specific kernels (not used on other GPUs)
 9 | 7. cuda_common.h -> header for common cuda stuff like ReportCUDAErrors, etc.
10 | 8. readme.txt -> this file
11 | 
12 | High level overview: network is built of layer objects, layers are either implemented using cudnn/cublas libraries, or custom cuda kernels.
13 | 
14 | lc0 search -> network_cudnn -> layers -> kernels


--------------------------------------------------------------------------------
/src/neural/encoder.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include "chess/position.h"
31 | #include "neural/network.h"
32 | 
33 | namespace lczero {
34 | 
35 | enum class FillEmptyHistory {NO, FEN_ONLY, ALWAYS};
36 | 
37 | // Encodes the last position in history for the neural network request.
38 | InputPlanes EncodePositionForNN(const PositionHistory& history,
39 |                                 int history_planes,
40 |                                 FillEmptyHistory fill_empty_history);
41 | 
42 | }  // namespace lczero
43 | 


--------------------------------------------------------------------------------
/src/neural/loader.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <string>
31 | #include <vector>
32 | 
33 | #include "neural/network.h"
34 | #include "proto/net.pb.h"
35 | 
36 | namespace lczero {
37 | 
38 | using FloatVector = std::vector<float>;
39 | using FloatVectors = std::vector<FloatVector>;
40 | 
41 | using WeightsFile = pblczero::Net;
42 | 
43 | // Read weights file and fill the weights structure.
44 | WeightsFile LoadWeightsFromFile(const std::string& filename);
45 | 
46 | // Tries to find a file which looks like a weights file, and located in
47 | // directory of binary_name or one of subdirectories. If there are several such
48 | // files, returns one which has the latest modification date.
49 | std::string DiscoverWeightsFile();
50 | 
51 | }  // namespace lczero
52 | 


--------------------------------------------------------------------------------
/src/neural/network.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <memory>
31 | #include <vector>
32 | 
33 | namespace lczero {
34 | 
35 | const int kInputPlanes = 112;
36 | 
37 | // All input planes are 64 value vectors, every element of which is either
38 | // 0 or some value, unique for the plane. Therefore, input is defined as
39 | // a bitmask showing where to set the value, and the value itself.
40 | struct InputPlane {
41 |   InputPlane() = default;
42 |   void SetAll() { mask = ~0ull; }
43 |   void Fill(float val) {
44 |     SetAll();
45 |     value = val;
46 |   }
47 |   std::uint64_t mask = 0ull;
48 |   float value = 1.0f;
49 | };
50 | using InputPlanes = std::vector<InputPlane>;
51 | 
52 | // An interface to implement by computing backends.
53 | class NetworkComputation {
54 |  public:
55 |   // Adds a sample to the batch.
56 |   virtual void AddInput(InputPlanes&& input) = 0;
57 |   // Do the computation.
58 |   virtual void ComputeBlocking() = 0;
59 |   // Returns how many times AddInput() was called.
60 |   virtual int GetBatchSize() const = 0;
61 |   // Returns Q value of @sample.
62 |   virtual float GetQVal(int sample) const = 0;
63 |   virtual float GetDVal(int sample) const = 0;
64 |   // Returns P value @move_id of @sample.
65 |   virtual float GetPVal(int sample, int move_id) const = 0;
66 |   virtual ~NetworkComputation() {}
67 | };
68 | 
69 | class Network {
70 |  public:
71 |   virtual std::unique_ptr<NetworkComputation> NewComputation() = 0;
72 |   virtual ~Network(){};
73 | };
74 | 
75 | }  // namespace lczero
76 | 


--------------------------------------------------------------------------------
/src/neural/network_legacy.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  This file is part of Leela Chess Zero.
  3 |  Copyright (C) 2018-2019 The LCZero Authors
  4 | 
  5 |  Leela Chess is free software: you can redistribute it and/or modify
  6 |  it under the terms of the GNU General Public License as published by
  7 |  the Free Software Foundation, either version 3 of the License, or
  8 |  (at your option) any later version.
  9 | 
 10 |  Leela Chess is distributed in the hope that it will be useful,
 11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  GNU General Public License for more details.
 14 | 
 15 |  You should have received a copy of the GNU General Public License
 16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "neural/network_legacy.h"
 20 | 
 21 | #include <algorithm>
 22 | #include <cmath>
 23 | #include "utils/weights_adapter.h"
 24 | 
 25 | namespace lczero {
 26 | namespace {
 27 | static constexpr float kEpsilon = 1e-5f;
 28 | }  // namespace
 29 | 
 30 | LegacyWeights::LegacyWeights(const pblczero::Weights& weights)
 31 |     : input(weights.input()),
 32 |       policy1(weights.policy1()),
 33 |       policy(weights.policy()),
 34 |       ip_pol_w(LayerAdapter(weights.ip_pol_w()).as_vector()),
 35 |       ip_pol_b(LayerAdapter(weights.ip_pol_b()).as_vector()),
 36 |       value(weights.value()),
 37 |       ip1_val_w(LayerAdapter(weights.ip1_val_w()).as_vector()),
 38 |       ip1_val_b(LayerAdapter(weights.ip1_val_b()).as_vector()),
 39 |       ip2_val_w(LayerAdapter(weights.ip2_val_w()).as_vector()),
 40 |       ip2_val_b(LayerAdapter(weights.ip2_val_b()).as_vector()) {
 41 |   for (const auto& res : weights.residual()) {
 42 |     residual.emplace_back(res);
 43 |   }
 44 | }
 45 | 
 46 | LegacyWeights::SEunit::SEunit(const pblczero::Weights::SEunit& se)
 47 |     : w1(LayerAdapter(se.w1()).as_vector()),
 48 |       b1(LayerAdapter(se.b1()).as_vector()),
 49 |       w2(LayerAdapter(se.w2()).as_vector()),
 50 |       b2(LayerAdapter(se.b2()).as_vector()) {}
 51 | 
 52 | LegacyWeights::Residual::Residual(const pblczero::Weights::Residual& residual)
 53 |     : conv1(residual.conv1()),
 54 |       conv2(residual.conv2()),
 55 |       se(residual.se()),
 56 |       has_se(residual.has_se()) {}
 57 | 
 58 | LegacyWeights::ConvBlock::ConvBlock(const pblczero::Weights::ConvBlock& block)
 59 |     : weights(LayerAdapter(block.weights()).as_vector()),
 60 |       biases(LayerAdapter(block.biases()).as_vector()),
 61 |       bn_gammas(LayerAdapter(block.bn_gammas()).as_vector()),
 62 |       bn_betas(LayerAdapter(block.bn_betas()).as_vector()),
 63 |       bn_means(LayerAdapter(block.bn_means()).as_vector()),
 64 |       bn_stddivs(LayerAdapter(block.bn_stddivs()).as_vector()) {
 65 |   if (weights.size() == 0) {
 66 |     // Empty ConvBlock.
 67 |     return;
 68 |   }
 69 | 
 70 |   if (bn_betas.size() == 0) {
 71 |     // Old net without gamma and beta.
 72 |     for (auto i = size_t{0}; i < bn_means.size(); i++) {
 73 |       bn_betas.emplace_back(0.0f);
 74 |       bn_gammas.emplace_back(1.0f);
 75 |     }
 76 |   }
 77 |   if (biases.size() == 0) {
 78 |     for (auto i = size_t{0}; i < bn_means.size(); i++) {
 79 |       biases.emplace_back(0.0f);
 80 |     }
 81 |   }
 82 | 
 83 |   if (bn_means.size() == 0) {
 84 |     // No batch norm.
 85 |     return;
 86 |   }
 87 | 
 88 |   // Fold batch norm into weights and biases.
 89 |   // Variance to gamma.
 90 |   for (auto i = size_t{0}; i < bn_stddivs.size(); i++) {
 91 |     bn_gammas[i] *= 1.0f / std::sqrt(bn_stddivs[i] + kEpsilon);
 92 |     bn_means[i] -= biases[i];
 93 |   }
 94 | 
 95 |   auto outputs = biases.size();
 96 | 
 97 |   // We can treat the [inputs, filter_size, filter_size] dimensions as one.
 98 |   auto inputs = weights.size() / outputs;
 99 | 
100 |   for (auto o = size_t{0}; o < outputs; o++) {
101 |     for (auto c = size_t{0}; c < inputs; c++) {
102 |       weights[o * inputs + c] *= bn_gammas[o];
103 |     }
104 | 
105 |     biases[o] = -bn_gammas[o] * bn_means[o] + bn_betas[o];
106 |   }
107 | 
108 |   // Batch norm weights are not needed anymore.
109 |   bn_stddivs.clear();
110 |   bn_means.clear();
111 |   bn_betas.clear();
112 |   bn_gammas.clear();
113 | }
114 | }  // namespace lczero
115 | 


--------------------------------------------------------------------------------
/src/neural/network_legacy.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018-2019 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <vector>
22 | #include "proto/net.pb.h"
23 | 
24 | namespace lczero {
25 | 
26 | // DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED!!!
27 | // Legacy structure describing network weights.
28 | // Please try to migrate away from this struture do not add anything new
29 | // to it.
30 | 
31 | struct LegacyWeights {
32 |   explicit LegacyWeights(const pblczero::Weights& weights);
33 | 
34 |   using Vec = std::vector<float>;
35 |   struct ConvBlock {
36 |     explicit ConvBlock(const pblczero::Weights::ConvBlock& block);
37 | 
38 |     Vec weights;
39 |     Vec biases;
40 |     Vec bn_gammas;
41 |     Vec bn_betas;
42 |     Vec bn_means;
43 |     Vec bn_stddivs;
44 |   };
45 | 
46 |   struct SEunit {
47 |     explicit SEunit(const pblczero::Weights::SEunit& se);
48 |     Vec w1;
49 |     Vec b1;
50 |     Vec w2;
51 |     Vec b2;
52 |   };
53 | 
54 |   struct Residual {
55 |     explicit Residual(const pblczero::Weights::Residual& residual);
56 |     ConvBlock conv1;
57 |     ConvBlock conv2;
58 |     SEunit se;
59 |     bool has_se;
60 |   };
61 | 
62 |   // Input convnet.
63 |   ConvBlock input;
64 | 
65 |   // Residual tower.
66 |   std::vector<Residual> residual;
67 | 
68 |   // Policy head
69 |   // Extra convolution for AZ-style policy head
70 |   ConvBlock policy1;
71 |   ConvBlock policy;
72 |   Vec ip_pol_w;
73 |   Vec ip_pol_b;
74 | 
75 |   // Value head
76 |   ConvBlock value;
77 |   Vec ip1_val_w;
78 |   Vec ip1_val_b;
79 |   Vec ip2_val_w;
80 |   Vec ip2_val_b;
81 | };
82 | 
83 | }  // namespace lczero
84 | 


--------------------------------------------------------------------------------
/src/neural/network_random.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #include <chrono>
 29 | #include <cmath>
 30 | #include <cstring>
 31 | #include <functional>
 32 | #include <thread>
 33 | #include "neural/factory.h"
 34 | #include "utils/hashcat.h"
 35 | 
 36 | namespace lczero {
 37 | namespace {
 38 | 
 39 | class RandomNetworkComputation : public NetworkComputation {
 40 |  public:
 41 |   RandomNetworkComputation(int delay, int seed, bool uniform_mode)
 42 |       : delay_ms_(delay), seed_(seed), uniform_mode_(uniform_mode) {}
 43 | 
 44 |   void AddInput(InputPlanes&& input) override {
 45 |     std::uint64_t hash = seed_;
 46 |     for (const auto& plane : input) {
 47 |       hash = HashCat({hash, plane.mask});
 48 |       std::uint32_t tmp;
 49 |       std::memcpy(&tmp, &plane.value, sizeof(float));
 50 |       const std::uint64_t value_hash = tmp;
 51 |       hash = HashCat({hash, value_hash});
 52 |     }
 53 |     inputs_.push_back(hash);
 54 |   }
 55 | 
 56 |   void ComputeBlocking() override {
 57 |     if (delay_ms_) {
 58 |       std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms_));
 59 |     }
 60 |   }
 61 | 
 62 |   int GetBatchSize() const override { return inputs_.size(); }
 63 | 
 64 |   float GetQVal(int sample) const override {
 65 |     if (uniform_mode_) return 0.0f;
 66 |     return (int(inputs_[sample] % 200000) - 100000) / 100000.0;
 67 |   }
 68 | 
 69 |   float GetDVal(int sample) const override {
 70 |     if (uniform_mode_) return 0.0f;
 71 |     // Maximum D value is 1 - abs(Q) for W, D, L to be in range [0.0, 1.0].
 72 |     float q = GetQVal(sample);
 73 |     float max_d = 1.0f - std::fabs(q);
 74 |     // Hash in arbitrary constant to make D return different value from Q.
 75 |     float d = max_d * (HashCat({inputs_[sample], 1234}) % 10000) / 10000.0;
 76 |     return d;
 77 |   }
 78 | 
 79 |   float GetPVal(int sample, int move_id) const override {
 80 |     if (uniform_mode_) return 1.0f;
 81 |     return (HashCat({inputs_[sample], static_cast<unsigned long>(move_id)}) %
 82 |             10000) /
 83 |            10000.0;
 84 |   }
 85 | 
 86 |  private:
 87 |   std::vector<std::uint64_t> inputs_;
 88 |   int delay_ms_ = 0;
 89 |   int seed_ = 0;
 90 |   bool uniform_mode_ = false;
 91 | };
 92 | 
 93 | class RandomNetwork : public Network {
 94 |  public:
 95 |   RandomNetwork(const OptionsDict& options)
 96 |       : delay_ms_(options.GetOrDefault<int>("delay", 0)),
 97 |         seed_(options.GetOrDefault<int>("seed", 0)),
 98 |         uniform_mode_(options.GetOrDefault<bool>("uniform", false)) {}
 99 |   std::unique_ptr<NetworkComputation> NewComputation() override {
100 |     return std::make_unique<RandomNetworkComputation>(delay_ms_, seed_, uniform_mode_);
101 |   }
102 | 
103 |  private:
104 |   int delay_ms_ = 0;
105 |   int seed_ = 0;
106 |   bool uniform_mode_ = false;
107 | };
108 | }  // namespace
109 | 
110 | std::unique_ptr<Network> MakeRandomNetwork(const WeightsFile& /*weights*/,
111 |                                            const OptionsDict& options) {
112 |   return std::make_unique<RandomNetwork>(options);
113 | }
114 | 
115 | REGISTER_NETWORK("random", MakeRandomNetwork, -900)
116 | 
117 | }  // namespace lczero
118 | 


--------------------------------------------------------------------------------
/src/neural/network_rr.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "neural/factory.h"
29 | 
30 | #include <condition_variable>
31 | #include <queue>
32 | #include <thread>
33 | #include "utils/exception.h"
34 | 
35 | namespace lczero {
36 | namespace {
37 | 
38 | class RoundRobinNetwork : public Network {
39 |  public:
40 |   RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) {
41 |     const auto parents = options.ListSubdicts();
42 |     if (parents.empty()) {
43 |       // If options are empty, or multiplexer configured in root object,
44 |       // initialize on root object and default backend.
45 |       auto backends = NetworkFactory::Get()->GetBackendsList();
46 |       AddBackend(backends[0], weights, options);
47 |     }
48 | 
49 |     for (const auto& name : parents) {
50 |       AddBackend(name, weights, options.GetSubdict(name));
51 |     }
52 |   }
53 | 
54 |   void AddBackend(const std::string& name, const WeightsFile& weights,
55 |                   const OptionsDict& opts) {
56 |     const std::string backend = opts.GetOrDefault<std::string>("backend", name);
57 | 
58 |     networks_.emplace_back(
59 |         NetworkFactory::Get()->Create(backend, weights, opts));
60 |   }
61 | 
62 |   std::unique_ptr<NetworkComputation> NewComputation() override {
63 |     const long long val = ++counter_;
64 |     return networks_[val % networks_.size()]->NewComputation();
65 |   }
66 | 
67 |   ~RoundRobinNetwork() {}
68 | 
69 |  private:
70 |   std::vector<std::unique_ptr<Network>> networks_;
71 |   std::atomic<long long> counter_;
72 | };
73 | 
74 | std::unique_ptr<Network> MakeRoundRobinNetwork(const WeightsFile& weights,
75 |                                                const OptionsDict& options) {
76 |   return std::make_unique<RoundRobinNetwork>(weights, options);
77 | }
78 | 
79 | REGISTER_NETWORK("roundrobin", MakeRoundRobinNetwork, -999)
80 | 
81 | }  // namespace
82 | }  // namespace lczero
83 | 


--------------------------------------------------------------------------------
/src/neural/network_st_batch.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "neural/network_st_batch.h"
29 | 
30 | #include <cassert>
31 | 
32 | namespace lczero {
33 | 
34 | SingleThreadBatchingNetwork::SingleThreadBatchingNetwork(
35 |     std::unique_ptr<Network> parent)
36 |     : parent_(std::move(parent)) {}
37 | 
38 | std::unique_ptr<NetworkComputation>
39 | SingleThreadBatchingNetwork::NewComputation() {
40 |   ++computations_pending_;
41 |   return std::make_unique<SingleThreadBatchingNetworkComputation>(this);
42 | }
43 | 
44 | void SingleThreadBatchingNetwork::Reset() {
45 |   assert(computations_pending_ == 0);
46 |   parent_computation_ = parent_->NewComputation();
47 | }
48 | 
49 | SingleThreadBatchingNetworkComputation::SingleThreadBatchingNetworkComputation(
50 |     SingleThreadBatchingNetwork* network)
51 |     : network_(network),
52 |       start_idx_(network_->parent_computation_->GetBatchSize()) {}
53 | 
54 | void SingleThreadBatchingNetworkComputation::AddInput(InputPlanes&& input) {
55 |   assert(start_idx_ + batch_size_ ==
56 |          network_->parent_computation_->GetBatchSize());
57 |   ++batch_size_;
58 |   network_->parent_computation_->AddInput(std::move(input));
59 | }
60 | 
61 | void SingleThreadBatchingNetworkComputation::ComputeBlocking() {
62 |   if (--network_->computations_pending_ == 0)
63 |     network_->parent_computation_->ComputeBlocking();
64 | }
65 | 
66 | float SingleThreadBatchingNetworkComputation::GetQVal(int sample) const {
67 |   return network_->parent_computation_->GetQVal(sample - start_idx_);
68 | }
69 | 
70 | float SingleThreadBatchingNetworkComputation::GetDVal(int sample) const {
71 |   return network_->parent_computation_->GetDVal(sample - start_idx_);
72 | }
73 | 
74 | float SingleThreadBatchingNetworkComputation::GetPVal(int sample,
75 |                                                       int move_id) const {
76 |   return network_->parent_computation_->GetPVal(sample - start_idx_, move_id);
77 | }
78 | 
79 | }  // namespace lczero
80 | 


--------------------------------------------------------------------------------
/src/neural/network_st_batch.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include "neural/network.h"
31 | 
32 | namespace lczero {
33 | 
34 | // This is a network that helps to combine batches from multiple games running
35 | // is a single thread. Not thread safe.
36 | // Usage:
37 | //   network.Reset();   // Creates new parent computation
38 | //   computations = []
39 | //   multiple times:
40 | //     x = network.NewComputation()
41 | //     computations += x
42 | //     x.AddInput();
43 | //     x.AddInput();
44 | //     x.AddInput();
45 | //     ...
46 | //   for x in computations:
47 | //     x.ComputeBlocking()   // Only last call actually computes, and they are
48 | //                           // computed together in one batch.
49 | //   for x in computations:
50 | //     use(x)
51 | class SingleThreadBatchingNetwork : public Network {
52 |  public:
53 |   SingleThreadBatchingNetwork(std::unique_ptr<Network> parent);
54 |   std::unique_ptr<NetworkComputation> NewComputation() override;
55 | 
56 |   // Start a fresh batch.
57 |   void Reset();
58 | 
59 |  private:
60 |   std::unique_ptr<Network> parent_;
61 |   std::unique_ptr<NetworkComputation> parent_computation_;
62 |   int computations_pending_ = 0;
63 |   friend class SingleThreadBatchingNetworkComputation;
64 | };
65 | 
66 | class SingleThreadBatchingNetworkComputation : public NetworkComputation {
67 |  public:
68 |   SingleThreadBatchingNetworkComputation(SingleThreadBatchingNetwork* network);
69 | 
70 |   // Adds a sample to the parent batch.
71 |   void AddInput(InputPlanes&& input) override;
72 |   // May not actually compute immediately. Instead computes when all
73 |   // computations of the network called this.
74 |   void ComputeBlocking() override;
75 |   // Returns how many times AddInput() was called.
76 |   int GetBatchSize() const override { return batch_size_; }
77 |   // Returns Q value of @sample.
78 |   float GetQVal(int sample) const override;
79 |   float GetDVal(int sample) const override;
80 |   // Returns P value @move_id of @sample.
81 |   float GetPVal(int sample, int move_id) const override;
82 | 
83 |  private:
84 |   SingleThreadBatchingNetwork* const network_;
85 |   int start_idx_;
86 |   int batch_size_ = 0;
87 | };
88 | 
89 | }  // namespace lczero
90 | 


--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLBuffers.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Originally from the Leela Zero project.
  3 |   Copyright (C) 2017 Gian-Carlo Pascutto
  4 | 
  5 |   This file is part of Leela Chess Zero.
  6 |   Copyright (C) 2018-2019 The LCZero Authors
  7 | 
  8 |   Leela Chess is free software: you can redistribute it and/or modify
  9 |   it under the terms of the GNU General Public License as published by
 10 |   the Free Software Foundation, either version 3 of the License, or
 11 |   (at your option) any later version.
 12 | 
 13 |   Leela Chess is distributed in the hope that it will be useful,
 14 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |   GNU General Public License for more details.
 17 | 
 18 |   You should have received a copy of the GNU General Public License
 19 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 20 | */
 21 | 
 22 | #pragma once
 23 | 
 24 | #include <algorithm>
 25 | #include <array>
 26 | #include <cassert>
 27 | #include <cmath>
 28 | #include <cstdio>
 29 | #include <cstdlib>
 30 | #include <fstream>
 31 | #include <iomanip>
 32 | #include <memory>
 33 | #include <sstream>
 34 | #include <string>
 35 | #include <thread>
 36 | 
 37 | #include "neural/opencl/OpenCL.h"
 38 | #include "neural/opencl/OpenCLParams.h"
 39 | #include "neural/opencl/OpenCLTuner.h"
 40 | #include "utils/logging.h"
 41 | 
 42 | class OpenCL_Network;
 43 | 
 44 | class OpenCLBuffers {
 45 |   friend class OpenCL;
 46 |   friend class OpenCL_Network;
 47 | 
 48 |  public:
 49 |   OpenCLBuffers(const OpenCL_Network& opencl_net);
 50 | 
 51 |   void forward(const std::vector<net_t>& input, std::vector<net_t>& output_pol,
 52 |                std::vector<net_t>& output_val, const int batch_size);
 53 | 
 54 |  private:
 55 |   using weight_slice_t = std::vector<cl::Buffer>::const_iterator;
 56 | 
 57 |   void convolve3(int channels, int outputs, cl::Buffer& bufferIn,
 58 |                  cl::Buffer& bufferOut, cl::Buffer& bufferV,
 59 |                  cl::Buffer& bufferM, weight_slice_t weights,
 60 |                  cl::Buffer* bufferResidual, weight_slice_t biases,
 61 |                  bool skip_in_transform, bool fuse_in_transform,
 62 |                  bool store_inout, bool relu, int batch_size);
 63 | 
 64 |   void convolve1(int channels, int outputs, cl::Buffer& bufferInput,
 65 |                  cl::Buffer& bufferOutput, cl::Buffer& bufferMerge,
 66 |                  weight_slice_t weights, weight_slice_t biases, int batch_size);
 67 | 
 68 |   void innerproduct(cl::Buffer& input, weight_slice_t weights,
 69 |                     weight_slice_t biases, cl::Buffer& output, const int inputs,
 70 |                     const int outputs, const int relu, int batch_size);
 71 | 
 72 |   void squeeze_excitation(int channels, int fc_outputs, cl::Buffer& bufferIn,
 73 |                           cl::Buffer& bufferTemp1, cl::Buffer& bufferTemp2,
 74 |                           weight_slice_t weights, cl::Buffer& bufferResidual,
 75 |                           int batch_size);
 76 | 
 77 |   void policymap(int N, const cl::Buffer& input, cl::Buffer& output,
 78 |                  const cl::Buffer& indices, int inputSize, int usedSize,
 79 |                  int outputSize);
 80 | 
 81 |   const OpenCL_Network& m_opencl_net;
 82 |   const OpenCL& m_opencl;
 83 | 
 84 |   cl::CommandQueue m_commandqueue;
 85 |   cl::Kernel m_convolve1_kernel;
 86 |   cl::Kernel m_merge_kernel;
 87 |   cl::Kernel m_in_transform_kernel;
 88 |   cl::Kernel m_sgemm_kernel;
 89 |   cl::Kernel m_sgemv_kernel;
 90 |   cl::Kernel m_out_transform_bn_kernel;
 91 |   cl::Kernel m_out_transform_bn_in_kernel;
 92 |   cl::Kernel m_global_avg_pooling_kernel;
 93 |   cl::Kernel m_apply_se_kernel;
 94 |   cl::Kernel m_policymap_kernel;
 95 |   cl::Buffer m_inBuffer;
 96 |   cl::Buffer m_inBuffer2;
 97 |   cl::Buffer m_VBuffer;
 98 |   cl::Buffer m_MBuffer;
 99 |   cl::Buffer m_pool_buffer;
100 |   cl::Buffer m_pinnedOutBuffer_pol;
101 |   cl::Buffer m_pinnedOutBuffer_val;
102 | };
103 | 


--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLParams.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Originally from the Leela Zero project.
 3 |   Copyright (C) 2017 Gian-Carlo Pascutto
 4 | 
 5 |   This file is part of Leela Chess Zero.
 6 |   Copyright (C) 2018 The LCZero Authors
 7 | 
 8 |   Leela Chess is free software: you can redistribute it and/or modify
 9 |   it under the terms of the GNU General Public License as published by
10 |   the Free Software Foundation, either version 3 of the License, or
11 |   (at your option) any later version.
12 | 
13 |   Leela Chess is distributed in the hope that it will be useful,
14 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |   GNU General Public License for more details.
17 | 
18 |   You should have received a copy of the GNU General Public License
19 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
20 | */
21 | 
22 | #pragma once
23 | 
24 | struct OpenCLParams {
25 |   int gpuId = -1;
26 | 
27 |   bool tune_only = false;
28 |   bool force_tune = false;
29 |   bool tune_exhaustive = false;
30 |   int tune_batch_size = 1;
31 | };
32 | 


--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLTuner.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Originally from the Leela Zero project.
 3 |   Copyright (C) 2017 Gian-Carlo Pascutto
 4 | 
 5 |   This file is part of Leela Chess Zero.
 6 |   Copyright (C) 2018 The LCZero Authors
 7 | 
 8 |   Leela Chess is free software: you can redistribute it and/or modify
 9 |   it under the terms of the GNU General Public License as published by
10 |   the Free Software Foundation, either version 3 of the License, or
11 |   (at your option) any later version.
12 | 
13 |   Leela Chess is distributed in the hope that it will be useful,
14 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |   GNU General Public License for more details.
17 | 
18 |   You should have received a copy of the GNU General Public License
19 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
20 | */
21 | 
22 | #pragma once
23 | 
24 | #include <map>
25 | #include <string>
26 | #include <vector>
27 | 
28 | #include "OpenCLParams.h"
29 | 
30 | using Configurations = std::pair<std::string, std::vector<size_t>>;
31 | using TuneParameters = std::map<std::string, size_t>;
32 | 
33 | class OpenCL;
34 | 
35 | class Tuner {
36 |   OpenCL& m_opencl;
37 |   const OpenCLParams& m_params;
38 |   cl::Context m_context;
39 |   cl::Device m_device;
40 | 
41 |  public:
42 |   std::string tune_sgemm(const int m, const int n, const int k,
43 |                          const int batch_size, const int runs = 4);
44 |   std::string load_sgemm_tuners(const int m, const int n, const int k,
45 |                                 const int batch_size);
46 | 
47 |   static constexpr auto TUNER_VERSION = 0;
48 |   Tuner(OpenCL& opencl, const OpenCLParams& params, cl::Context context,
49 |         cl::Device device)
50 |       : m_opencl(opencl),
51 |         m_params(params),
52 |         m_context(context),
53 |         m_device(device) {}
54 | 
55 |  private:
56 |   void store_sgemm_tuners(const int m, const int n, const int k,
57 |                           const int batch_size, std::string tuners);
58 |   bool valid_config_sgemm(TuneParameters p, bool exhaustive);
59 |   std::string parameters_to_defines(const TuneParameters& p);
60 |   std::string parameters_to_string(const TuneParameters& p);
61 |   TuneParameters get_parameters_by_int(const std::vector<Configurations>& opts,
62 |                                        const int n);
63 |   std::string sgemm_tuners_from_line(std::string line, const int m, const int n,
64 |                                      const int k, const int batch_size);
65 | };
66 | 


--------------------------------------------------------------------------------
/src/neural/opencl/README.md:
--------------------------------------------------------------------------------
 1 | The files in this directory comprise the OpenCL backend of Lc0.
 2 | 
 3 | ## License
 4 | 
 5 | Leela Chess is free software: you can redistribute it and/or modify
 6 | it under the terms of the GNU General Public License as published by
 7 | the Free Software Foundation, either version 3 of the License, or
 8 | (at your option) any later version.
 9 | 
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | GNU General Public License for more details.
14 | 
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | **The source files of this directory are not covered by any additional
19 | permission.**
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/src/neural/opencl/clblast_level3/xgemm_batched.opencl:
--------------------------------------------------------------------------------
 1 | 
 2 | // =================================================================================================
 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
 5 | // width of 100 characters per line.
 6 | //
 7 | // Author(s):
 8 | //   Cedric Nugteren <www.cedricnugteren.nl>
 9 | //
10 | // This file contains the batched version of the non-direct GEMM kernel. See part 1 for information
11 | // about the non-batched version of the kernel.
12 | //
13 | // =================================================================================================
14 | 
15 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
16 | // literal). Comment-out this line for syntax-highlighting when developing.
17 | R"(
18 | 
19 | // =================================================================================================
20 | 
21 | // Main entry point of the kernel. This is the regular full version.
22 | __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
23 | void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK,
24 |                   const __global realM* restrict agm,
25 |                   const __global realN* restrict bgm,
26 |                   __global realM* restrict cgm) {
27 |   const int batch = get_group_id(2);
28 | 
29 |   // Sets the offsets
30 |   const int a_offset = kSizeM*kSizeK*batch;
31 |   const int b_offset = kSizeK*kSizeN*batch;
32 |   const int c_offset = kSizeM*kSizeN*batch;
33 |   const __global realM* restrict agm_ = &agm[a_offset / VWM];
34 |   const __global realN* restrict bgm_ = &bgm[b_offset / VWN];
35 |   __global realM* restrict cgm_ = &cgm[c_offset / VWM];
36 | 
37 |   // Allocates workgroup-private memory (local memory)
38 |   #if SA == 1
39 |     __local realM alm[KWG * MWG/VWM];
40 |   #endif
41 |   #if SB == 1
42 |     __local realN blm[KWG * NWG/VWN];
43 |   #endif
44 | 
45 |   // Computes the matrix-multiplication and stores the result in global memory
46 |   #if SA == 1 && SB == 1
47 |     XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alm, blm);
48 |   #elif SA == 1
49 |     XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alm);
50 |   #elif SB == 1
51 |     XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, blm);
52 |   #else
53 |     XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_);
54 |   #endif
55 | }
56 | 
57 | // =================================================================================================
58 | 
59 | // End of the C++11 raw string literal
60 | )"
61 | 
62 | // =================================================================================================
63 | 


--------------------------------------------------------------------------------
/src/neural/opencl/clblast_level3/xgemm_part2.opencl:
--------------------------------------------------------------------------------
  1 | 
  2 | // =================================================================================================
  3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
  4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
  5 | // width of 100 characters per line.
  6 | //
  7 | // Author(s):
  8 | //   Cedric Nugteren <www.cedricnugteren.nl>
  9 | //
 10 | // This is part 2 of 4 of the GEMM kernel. See part 1 for more information.
 11 | //
 12 | // =================================================================================================
 13 | 
 14 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
 15 | // literal). Comment-out this line for syntax-highlighting when developing.
 16 | R"(
 17 | 
 18 | // =================================================================================================
 19 | 
 20 | // The vectorised multiply-add function
 21 | INLINE_FUNC realM MultiplyAddVector(realM cvec, const realM avec, const real bval) {
 22 |   #if USE_VECTOR_MAD == 1
 23 |     cvec += avec * bval;
 24 |   #else
 25 |     #if VWM == 1
 26 |       MultiplyAdd(cvec,    avec,    bval);
 27 |     #elif VWM == 2
 28 |       MultiplyAdd(cvec.x , avec.x,  bval);
 29 |       MultiplyAdd(cvec.y , avec.y,  bval);
 30 |     #elif VWM == 4
 31 |       MultiplyAdd(cvec.x , avec.x,  bval);
 32 |       MultiplyAdd(cvec.y , avec.y,  bval);
 33 |       MultiplyAdd(cvec.z , avec.z,  bval);
 34 |       MultiplyAdd(cvec.w , avec.w,  bval);
 35 |     #elif VWM == 8
 36 |       MultiplyAdd(cvec.s0, avec.s0, bval);
 37 |       MultiplyAdd(cvec.s1, avec.s1, bval);
 38 |       MultiplyAdd(cvec.s2, avec.s2, bval);
 39 |       MultiplyAdd(cvec.s3, avec.s3, bval);
 40 |       MultiplyAdd(cvec.s4, avec.s4, bval);
 41 |       MultiplyAdd(cvec.s5, avec.s5, bval);
 42 |       MultiplyAdd(cvec.s6, avec.s6, bval);
 43 |       MultiplyAdd(cvec.s7, avec.s7, bval);
 44 |     #elif VWM == 16
 45 |       MultiplyAdd(cvec.s0, avec.s0, bval);
 46 |       MultiplyAdd(cvec.s1, avec.s1, bval);
 47 |       MultiplyAdd(cvec.s2, avec.s2, bval);
 48 |       MultiplyAdd(cvec.s3, avec.s3, bval);
 49 |       MultiplyAdd(cvec.s4, avec.s4, bval);
 50 |       MultiplyAdd(cvec.s5, avec.s5, bval);
 51 |       MultiplyAdd(cvec.s6, avec.s6, bval);
 52 |       MultiplyAdd(cvec.s7, avec.s7, bval);
 53 |       MultiplyAdd(cvec.s8, avec.s8, bval);
 54 |       MultiplyAdd(cvec.s9, avec.s9, bval);
 55 |       MultiplyAdd(cvec.sA, avec.sA, bval);
 56 |       MultiplyAdd(cvec.sB, avec.sB, bval);
 57 |       MultiplyAdd(cvec.sC, avec.sC, bval);
 58 |       MultiplyAdd(cvec.sD, avec.sD, bval);
 59 |       MultiplyAdd(cvec.sE, avec.sE, bval);
 60 |       MultiplyAdd(cvec.sF, avec.sF, bval);
 61 |     #endif
 62 |   #endif
 63 |   return cvec;
 64 | }
 65 | 
 66 | // =================================================================================================
 67 | 
 68 | // Merges the results in Cpm with the global array in Cgm.
 69 | INLINE_FUNC void StoreResults(__global realM* cgm, realM cpm[NWI*MWI/VWM], const int kSizeM) {
 70 |   #pragma unroll
 71 |   for (int _ni = 0; _ni < NWI; _ni += 1) {
 72 |     #pragma unroll
 73 |     for (int _mi = 0; _mi < MWI/VWM; _mi += 1) {
 74 |       #if STRM == 0
 75 |         int mg = _mi + get_local_id(0)*(MWI/VWM);
 76 |       #elif STRM == 1
 77 |         int mg = get_local_id(0) + _mi*MDIMC;
 78 |       #endif
 79 |       #if STRN == 0
 80 |         int ng = _ni + get_local_id(1)*NWI;
 81 |       #elif STRN == 1
 82 |         int ng = _ni%VWN + get_local_id(1)*VWN + (_ni/VWN)*VWN*NDIMC;
 83 |       #endif
 84 |       int idm = mg + GetGroupID0() * (MWG/VWM);
 85 |       int idn = ng + GetGroupID1() * NWG;
 86 |       int index = idn*(kSizeM/VWM) + idm;
 87 | 
 88 |       cgm[index] = cpm[_ni * (MWI/VWM) + _mi];
 89 | 
 90 |     }
 91 |   }
 92 | }
 93 | 
 94 | // =================================================================================================
 95 | 
 96 | // End of the C++11 raw string literal
 97 | )"
 98 | 
 99 | // =================================================================================================
100 | 


--------------------------------------------------------------------------------
/src/neural/opencl/clsource/config.opencl:
--------------------------------------------------------------------------------
 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
 2 | // literal). Comment-out this line for syntax-highlighting when developing.
 3 | R"(
 4 | 
 5 | typedef float net_t;
 6 | #define vload_net_t(offset,p) ((p)[(offset)])
 7 | #define vstore_net_t(data,offset,p) (((p)[(offset)])=(data))
 8 | 
 9 | #define BOARD_SIZE 8
10 | #define BOARD_SQUARES (BOARD_SIZE*BOARD_SIZE)
11 | 
12 | // End of the C++11 raw string literal
13 | )"
14 | 


--------------------------------------------------------------------------------
/src/neural/opencl/clsource/policymap.opencl:
--------------------------------------------------------------------------------
 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
 2 | // literal). Comment-out this line for syntax-highlighting when developing.
 3 | 
 4 | R"(
 5 | __kernel void policymap(
 6 |               __global const net_t * restrict input,
 7 |               __global net_t * restrict output,
 8 |               __global short* restrict indices,
 9 |               const int N,
10 |               const int inputSize,
11 |               const int usedSize,
12 |               const int outputSize) {
13 | 
14 |   int tid = get_global_id(0);
15 | 
16 |   int n = tid / usedSize;
17 |   int i = tid % usedSize;
18 | 
19 |   if (n >= N) return;
20 | 
21 |   int j = indices[i];
22 | 
23 |   if (j >= 0) {
24 |     output[n * outputSize + j] = input[n * inputSize + i];
25 |   }
26 | }
27 | // End of the C++11 raw string literal
28 | )"
29 | 


--------------------------------------------------------------------------------
/src/neural/opencl/clsource/se.opencl:
--------------------------------------------------------------------------------
 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
 2 | // literal). Comment-out this line for syntax-highlighting when developing.
 3 | 
 4 | R"(
 5 |     __kernel void global_avg_pooling(
 6 |                    const int channels,
 7 |                    __global const net_t * restrict in,
 8 |                    __global net_t * restrict out) {
 9 | 
10 |         const int col = get_global_id(0);  // column
11 |         const int c = get_global_id(1);  // channel
12 | 
13 |         const int lid = get_local_id(0);
14 | 
15 |         __local net_t row_acc[BOARD_SIZE];
16 | 
17 |         if (c < channels && col < BOARD_SIZE) {
18 | 
19 |             net_t acc = 0.0f;
20 | 
21 |             for ( int i = 0; i < BOARD_SIZE; i++) {
22 |                 acc += vload_net_t(c * BOARD_SQUARES + i * BOARD_SIZE + col, in);
23 |             }
24 |             row_acc[lid] = acc;
25 |         }
26 | 
27 |         barrier(CLK_LOCAL_MEM_FENCE);
28 | 
29 |         if (lid == 0) {
30 |             net_t acc = 0.0f;
31 |             for ( int i = 0; i < BOARD_SIZE; i++) {
32 |                 acc += row_acc[i];
33 |             }
34 |             acc = acc/BOARD_SQUARES;
35 |             vstore_net_t(acc, c, out);
36 |         }
37 |     }
38 | 
39 |     __kernel void apply_se(
40 |                   const int channels,
41 |                   const int batch_size,
42 |                   __global const net_t * restrict input,
43 |                   __global net_t * restrict residual,
44 |                   __constant const net_t * restrict fc_out) {
45 | 
46 |         const int col = get_global_id(0);  // column
47 |         const int c = get_global_id(1);  // channel
48 | 
49 |         const int batch = c / channels;
50 | 
51 |         if (c < batch_size * channels && col < BOARD_SIZE) {
52 |             net_t gamma = vload_net_t(c + batch * channels, fc_out);
53 |             gamma = 1.0f/(1.0f + exp(-gamma)); // Sigmoid
54 |             net_t beta = vload_net_t(c + batch * channels + channels, fc_out);
55 | 
56 |             for ( int i = 0; i < BOARD_SIZE; i++) {
57 |                 const int idx = c * BOARD_SQUARES + i * BOARD_SIZE + col;
58 |                 const net_t in = vload_net_t(idx, input);
59 |                 const net_t res = vload_net_t(idx, residual);
60 | 
61 |                 net_t val = gamma * in + res + beta;
62 | 
63 |                 val = val > 0.0f ? val : 0.0f;
64 | 
65 |                 vstore_net_t(val, idx, residual);
66 |             }
67 |         }
68 |     }
69 | // End of the C++11 raw string literal
70 | )"
71 | 


--------------------------------------------------------------------------------
/src/neural/shared/activation.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include "neural/shared/activation.h"
20 | 
21 | #include <algorithm>
22 | #include <cmath>
23 | 
24 | namespace lczero {
25 | namespace {
26 | constexpr int kWidth = 8;
27 | constexpr int kHeight = 8;
28 | constexpr int kSquares = kWidth * kHeight;
29 | }  // namespace
30 | 
31 | void SoftmaxActivation(const size_t size, const float* input, float* output) {
32 |   auto alpha = *std::max_element(input, input + size);
33 | 
34 |   auto denom = 0.0f;
35 |   for (size_t i = 0; i < size; i++) {
36 |     auto val = std::exp(input[i] - alpha);
37 |     output[i] = val;
38 |     denom += val;
39 |   }
40 |   for (size_t i = 0; i < size; i++) {
41 |     output[i] = output[i] / denom;
42 |   }
43 | }
44 | 
45 | void BiasResidualRelu(const size_t batch_size, const size_t channels,
46 |                  float* data, const float* biases,
47 |                  const float* eltwise,
48 |                  const bool relu) {
49 |   for (size_t i = 0; i < batch_size; i++) {
50 |     for (size_t c = 0; c < channels; ++c) {
51 |       auto bias = biases[c];
52 | 
53 |       if (eltwise == nullptr) {
54 |         auto arr = &data[c * kSquares];
55 |         for (size_t b = 0; b < kSquares; b++) {
56 |           float val = arr[b] + bias;
57 |           if (relu) {
58 |             val = val > 0 ? val : 0;
59 |           }
60 |           arr[b] = val;
61 |         }
62 |       } else {
63 |         auto arr = &data[c * kSquares];
64 |         auto res = &eltwise[c * kSquares];
65 |         for (size_t b = 0; b < kSquares; b++) {
66 |           float val = res[b] + arr[b] + bias;
67 |           if (relu) {
68 |             val = val > 0 ? val : 0;
69 |           }
70 |           arr[b] = val;
71 |         }
72 |       }
73 |     }
74 |     data += channels * kSquares;
75 |     if (eltwise != nullptr) eltwise += channels * kSquares;
76 |   }
77 | }
78 | }  // namespace lczero
79 | 


--------------------------------------------------------------------------------
/src/neural/shared/activation.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | #include <vector>
23 | 
24 | namespace lczero {
25 | 
26 |   // Softmax activation
27 | void SoftmaxActivation(const size_t size, const float* input, float* output);
28 | 
29 | void BiasResidualRelu(const size_t batch_size, const size_t channels,
30 |                  float* data, const float* biases,
31 |                  const float* eltwise = nullptr,
32 |                  const bool relu = true);
33 | }  // namespace lczero
34 | 


--------------------------------------------------------------------------------
/src/neural/shared/winograd_filter.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include "neural/shared/winograd_filter.h"
20 | 
21 | #include <array>
22 | 
23 | namespace lczero {
24 | namespace {
25 | 
26 | static constexpr auto kWinogradAlpha = 4;
27 | static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha;
28 | 
29 | }  // namespace
30 | 
31 | std::vector<float> WinogradFilterZeropadU(const std::vector<float>& U,
32 |                                           const size_t outputs,
33 |                                           const size_t channels,
34 |                                           const size_t outputs_pad,
35 |                                           const size_t channels_pad) {
36 |   // Fill with zeroes.
37 |   auto Upad = std::vector<float>(kWinogradTile * outputs_pad * channels_pad);
38 | 
39 |   for (size_t o = 0; o < outputs; o++) {
40 |     for (size_t c = 0; c < channels; c++) {
41 |       for (size_t xi = 0; xi < kWinogradAlpha; xi++) {
42 |         for (size_t nu = 0; nu < kWinogradAlpha; nu++) {
43 |           Upad[xi * (kWinogradAlpha * outputs_pad * channels_pad) +
44 |                nu * (outputs_pad * channels_pad) + c * outputs_pad + o] =
45 |               U[xi * (kWinogradAlpha * outputs * channels) +
46 |                 nu * (outputs * channels) + c * outputs + o];
47 |         }
48 |       }
49 |     }
50 |   }
51 |   return Upad;
52 | }
53 | 
54 | std::vector<float> WinogradFilterTransformF(const std::vector<float>& f,
55 |                                             const size_t outputs,
56 |                                             const size_t channels) {
57 |   // F(2x2, 3x3) Winograd filter transformation
58 |   // transpose(G.dot(f).dot(G.transpose()))
59 |   // U matrix is transposed for better memory layout in SGEMM
60 |   auto U = std::vector<float>(kWinogradTile * outputs * channels);
61 |   auto G = std::array<float, kWinogradTile>{1.0, 0.0,  0.0, 0.5, 0.5, 0.5,
62 |                                             0.5, -0.5, 0.5, 0.0, 0.0, 1.0};
63 |   auto temp = std::array<float, 12>{};
64 | 
65 |   for (size_t o = 0; o < outputs; o++) {
66 |     for (size_t c = 0; c < channels; c++) {
67 |       for (size_t i = 0; i < 4; i++) {
68 |         for (size_t j = 0; j < 3; j++) {
69 |           auto acc = 0.0f;
70 |           for (size_t k = 0; k < 3; k++) {
71 |             acc += G[i * 3 + k] * f[o * channels * 9 + c * 9 + k * 3 + j];
72 |           }
73 |           temp[i * 3 + j] = acc;
74 |         }
75 |       }
76 | 
77 |       for (size_t xi = 0; xi < 4; xi++) {
78 |         for (size_t nu = 0; nu < 4; nu++) {
79 |           auto acc = 0.0f;
80 |           for (size_t k = 0; k < 3; k++) {
81 |             acc += temp[xi * 3 + k] * G[nu * 3 + k];
82 |           }
83 |           U[xi * (4 * outputs * channels) + nu * (outputs * channels) +
84 |             c * outputs + o] = acc;
85 |         }
86 |       }
87 |     }
88 |   }
89 |   return U;
90 | }
91 | 
92 | }  // namespace lczero
93 | 


--------------------------------------------------------------------------------
/src/neural/shared/winograd_filter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <cstddef>
22 | #include <vector>
23 | 
24 | namespace lczero {
25 | 
26 | // Here are BLAS-free methods to setup the filter
27 | // for the 3x3 winograd convolution algorithm.
28 | //
29 | // Ref:
30 | //
31 | // Fast Algorithms for Convolutional Neural Networks
32 | // https://arxiv.org/abs/1509.09308
33 | //
34 | // https://ai.intel.com/winograd/
35 | // https://ai.intel.com/winograd-2/
36 | 
37 | // Convolution filter for 3x3 Winograd algorithm
38 | 
39 | // Create the zero-padded U matrix.
40 | std::vector<float> WinogradFilterZeropadU(const std::vector<float>& U,
41 |                                           const size_t outputs,
42 |                                           const size_t channels,
43 |                                           const size_t outputs_pad,
44 |                                           const size_t channels_pad);
45 | 
46 | // Create the filter transform matrix.
47 | std::vector<float> WinogradFilterTransformF(const std::vector<float>& f,
48 |                                             const size_t outputs,
49 |                                             const size_t channels);
50 | 
51 | }  // namespace lczero
52 | 


--------------------------------------------------------------------------------
/src/neural/writer.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "neural/writer.h"
29 | 
30 | #include <iomanip>
31 | #include <sstream>
32 | #include "utils/commandline.h"
33 | #include "utils/exception.h"
34 | #include "utils/filesystem.h"
35 | #include "utils/random.h"
36 | 
37 | namespace lczero {
38 | 
39 | TrainingDataWriter::TrainingDataWriter(int game_id) {
40 |   static std::string directory =
41 |       CommandLine::BinaryDirectory() + "/data-" + Random::Get().GetString(12);
42 |   // It's fine if it already exists.
43 |   CreateDirectory(directory.c_str());
44 | 
45 |   std::ostringstream oss;
46 |   oss << directory << '/' << "game_" << std::setfill('0') << std::setw(6)
47 |       << game_id << ".gz";
48 | 
49 |   filename_ = oss.str();
50 |   fout_ = gzopen(filename_.c_str(), "wb");
51 |   if (!fout_) throw Exception("Cannot create gzip file " + filename_);
52 | }
53 | 
54 | void TrainingDataWriter::WriteChunk(const V4TrainingData& data) {
55 |   auto bytes_written =
56 |       gzwrite(fout_, reinterpret_cast<const char*>(&data), sizeof(data));
57 |   if (bytes_written != sizeof(data)) {
58 |     throw Exception("Unable to write into " + filename_);
59 |   }
60 | }
61 | 
62 | void TrainingDataWriter::Finalize() {
63 |   gzclose(fout_);
64 |   fout_ = nullptr;
65 | }
66 | 
67 | }  // namespace lczero
68 | 


--------------------------------------------------------------------------------
/src/neural/writer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include <zlib.h>
29 | #include <fstream>
30 | #include "utils/cppattributes.h"
31 | 
32 | #pragma once
33 | 
34 | namespace lczero {
35 | 
36 | #pragma pack(push, 1)
37 | 
38 | struct V4TrainingData {
39 |   uint32_t version;
40 |   float probabilities[1858];
41 |   uint64_t planes[104];
42 |   uint8_t castling_us_ooo;
43 |   uint8_t castling_us_oo;
44 |   uint8_t castling_them_ooo;
45 |   uint8_t castling_them_oo;
46 |   uint8_t side_to_move;
47 |   uint8_t rule50_count;
48 |   uint8_t move_count;
49 |   int8_t result;
50 |   float root_q;
51 |   float best_q;
52 |   float root_d;
53 |   float best_d;
54 | } PACKED_STRUCT;
55 | static_assert(sizeof(V4TrainingData) == 8292, "Wrong struct size");
56 | 
57 | #pragma pack(pop)
58 | 
59 | class TrainingDataWriter {
60 |  public:
61 |   // Creates a new file to write in data directory. It will has @game_id
62 |   // somewhere in the filename.
63 |   TrainingDataWriter(int game_id);
64 | 
65 |   ~TrainingDataWriter() {
66 |     if (fout_) Finalize();
67 |   }
68 | 
69 |   // Writes a chunk.
70 |   void WriteChunk(const V4TrainingData& data);
71 | 
72 |   // Flushes file and closes it.
73 |   void Finalize();
74 | 
75 |   // Gets full filename of the file written.
76 |   std::string GetFileName() const { return filename_; }
77 | 
78 |  private:
79 |   std::string filename_;
80 |   gzFile fout_;
81 | };
82 | 
83 | }  // namespace lczero
84 | 


--------------------------------------------------------------------------------
/src/selfplay/game.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #pragma once
 29 | 
 30 | #include "chess/position.h"
 31 | #include "chess/uciloop.h"
 32 | #include "mcts/search.h"
 33 | #include "neural/cache.h"
 34 | #include "neural/network.h"
 35 | #include "utils/optionsparser.h"
 36 | 
 37 | namespace lczero {
 38 | 
 39 | struct SelfPlayLimits : SearchLimits {
 40 |   // Movetime
 41 |   std::int64_t movetime;
 42 | };
 43 | 
 44 | struct PlayerOptions {
 45 |   // Network to use by the player.
 46 |   Network* network;
 47 |   // Callback when player moves.
 48 |   BestMoveInfo::Callback best_move_callback;
 49 |   // Callback when player outputs info.
 50 |   ThinkingInfo::Callback info_callback;
 51 |   // NNcache to use.
 52 |   NNCache* cache;
 53 |   // User options dictionary.
 54 |   const OptionsDict* uci_options;
 55 |   // Limits to use for every move.
 56 |   SelfPlayLimits search_limits;
 57 | };
 58 | 
 59 | // Plays a single game vs itself.
 60 | class SelfPlayGame {
 61 |  public:
 62 |   // Player options may point to the same network/cache/etc.
 63 |   // If shared_tree is true, search tree is reused between players.
 64 |   // (useful for training games). Otherwise the tree is separate for black
 65 |   // and white (useful i.e. when they use different networks).
 66 |   SelfPlayGame(PlayerOptions player1, PlayerOptions player2, bool shared_tree);
 67 | 
 68 |   // Populate command line options that it uses.
 69 |   static void PopulateUciParams(OptionsParser* options);
 70 | 
 71 |   // Starts the game and blocks until the game is finished.
 72 |   void Play(int white_threads, int black_threads, bool training,
 73 |             bool enable_resign = true);
 74 |   // Aborts the game currently played, doesn't matter if it's synchronous or
 75 |   // not.
 76 |   void Abort();
 77 | 
 78 |   // Writes training data to a file.
 79 |   void WriteTrainingData(TrainingDataWriter* writer) const;
 80 | 
 81 |   GameResult GetGameResult() const { return game_result_; }
 82 |   std::vector<Move> GetMoves() const;
 83 |   // Gets the eval which required the biggest swing up to get the final outcome.
 84 |   // Eval is the expected outcome in the range 0<->1.
 85 |   float GetWorstEvalForWinnerOrDraw() const;
 86 | 
 87 |  private:
 88 |   // options_[0] is for white player, [1] for black.
 89 |   PlayerOptions options_[2];
 90 |   // Node tree for player1 and player2. If the tree is shared between players,
 91 |   // tree_[0] == tree_[1].
 92 |   std::shared_ptr<NodeTree> tree_[2];
 93 | 
 94 |   // Search that is currently in progress. Stored in members so that Abort()
 95 |   // can stop it.
 96 |   std::unique_ptr<Search> search_;
 97 |   bool abort_ = false;
 98 |   GameResult game_result_ = GameResult::UNDECIDED;
 99 |   // Track minimum eval for each player so that GetWorstEvalForWinnerOrDraw()
100 |   // can be calculated after end of game.
101 |   float min_eval_[2] = {1.0f, 1.0f};
102 |   std::mutex mutex_;
103 | 
104 |   // Training data to send.
105 |   std::vector<V4TrainingData> training_data_;
106 | };
107 | 
108 | }  // namespace lczero
109 | 


--------------------------------------------------------------------------------
/src/selfplay/loop.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <thread>
31 | #include "chess/uciloop.h"
32 | #include "selfplay/tournament.h"
33 | #include "utils/optionsparser.h"
34 | 
35 | namespace lczero {
36 | 
37 | class SelfPlayLoop : public UciLoop {
38 |  public:
39 |   SelfPlayLoop();
40 |   ~SelfPlayLoop();
41 | 
42 |   void RunLoop() override;
43 |   void CmdStart() override;
44 |   void CmdUci() override;
45 |   void CmdSetOption(const std::string& name, const std::string& value,
46 |                     const std::string& context) override;
47 | 
48 |  private:
49 |   void SendGameInfo(const GameInfo& move);
50 |   void SendTournament(const TournamentInfo& info);
51 | 
52 |   void EnsureOptionsSent();
53 |   OptionsParser options_;
54 | 
55 |   std::unique_ptr<SelfPlayTournament> tournament_;
56 |   std::unique_ptr<std::thread> thread_;
57 | };
58 | 
59 | }  // namespace lczero
60 | 


--------------------------------------------------------------------------------
/src/selfplay/tournament.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <list>
 31 | #include "selfplay/game.h"
 32 | #include "utils/mutex.h"
 33 | #include "utils/optionsdict.h"
 34 | #include "utils/optionsparser.h"
 35 | 
 36 | namespace lczero {
 37 | 
 38 | // Runs many selfplay games, possibly in parallel.
 39 | class SelfPlayTournament {
 40 |  public:
 41 |   SelfPlayTournament(const OptionsDict& options,
 42 |                      BestMoveInfo::Callback best_move_info,
 43 |                      ThinkingInfo::Callback thinking_info,
 44 |                      GameInfo::Callback game_info,
 45 |                      TournamentInfo::Callback tournament_info);
 46 | 
 47 |   // Populate command line options that it uses.
 48 |   static void PopulateOptions(OptionsParser* options);
 49 | 
 50 |   // Starts worker threads and exists immediately.
 51 |   void StartAsync();
 52 | 
 53 |   // Starts tournament and waits until it finishes.
 54 |   void RunBlocking();
 55 | 
 56 |   // Blocks until all worker threads finish.
 57 |   void Wait();
 58 | 
 59 |   // Tells worker threads to finish ASAP. Does not block.
 60 |   void Abort();
 61 | 
 62 |   // If there are ongoing games, aborts and waits.
 63 |   ~SelfPlayTournament();
 64 | 
 65 |  private:
 66 |   void Worker();
 67 |   void PlayOneGame(int game_id);
 68 | 
 69 |   Mutex mutex_;
 70 |   // Whether next game will be black for player1.
 71 |   bool next_game_black_ GUARDED_BY(mutex_) = false;
 72 |   // Number of games which already started.
 73 |   int games_count_ GUARDED_BY(mutex_) = 0;
 74 |   bool abort_ GUARDED_BY(mutex_) = false;
 75 |   // Games in progress. Exposed here to be able to abort them in case if
 76 |   // Abort(). Stored as list and not vector so that threads can keep iterators
 77 |   // to them and not worry that it becomes invalid.
 78 |   std::list<std::unique_ptr<SelfPlayGame>> games_ GUARDED_BY(mutex_);
 79 |   // Place to store tournament stats.
 80 |   TournamentInfo tournament_info_ GUARDED_BY(mutex_);
 81 | 
 82 |   Mutex threads_mutex_;
 83 |   std::vector<std::thread> threads_ GUARDED_BY(threads_mutex_);
 84 | 
 85 |   // All those are [0] for player1 and [1] for player2
 86 |   // Shared pointers for both players may point to the same object.
 87 |   std::shared_ptr<Network> networks_[2];
 88 |   std::shared_ptr<NNCache> cache_[2];
 89 |   const OptionsDict player_options_[2];
 90 |   SelfPlayLimits search_limits_[2];
 91 | 
 92 |   BestMoveInfo::Callback best_move_callback_;
 93 |   ThinkingInfo::Callback info_callback_;
 94 |   GameInfo::Callback game_callback_;
 95 |   TournamentInfo::Callback tournament_callback_;
 96 |   const int kThreads[2];
 97 |   const int kTotalGames;
 98 |   const bool kShareTree;
 99 |   const size_t kParallelism;
100 |   const bool kTraining;
101 |   const float kResignPlaythrough;
102 | };
103 | 
104 | }  // namespace lczero
105 | 


--------------------------------------------------------------------------------
/src/utils/bititer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | #include <cstdint>
30 | #ifdef _MSC_VER
31 | #include <intrin.h>
32 | #endif
33 | 
34 | namespace lczero {
35 | 
36 | inline unsigned long GetLowestBit(std::uint64_t value) {
37 | #if defined(_MSC_VER) && defined(_WIN64)
38 |     unsigned long result;
39 |     _BitScanForward64(&result, value);
40 |     return result;
41 | #elif defined(_MSC_VER)
42 |     unsigned long result;
43 |     if (value & 0xFFFFFFFF) {
44 |       _BitScanForward(&result, value);
45 |     } else {
46 |       _BitScanForward(&result, value >> 32);
47 |       result += 32;
48 |     }
49 |     return result;
50 | #else
51 |     return __builtin_ctzll(value);
52 | #endif
53 | }
54 | 
55 | // Iterates over all set bits of the value, lower to upper. The value of
56 | // dereferenced iterator is bit number (lower to upper, 0 bazed)
57 | template <typename T>
58 | class BitIterator {
59 |  public:
60 |   BitIterator(std::uint64_t value) : value_(value){};
61 |   bool operator!=(const BitIterator& other) { return value_ != other.value_; }
62 | 
63 |   void operator++() { value_ &= (value_ - 1); }
64 |   T operator*() const { return GetLowestBit(value_); }
65 | 
66 |  private:
67 |   std::uint64_t value_;
68 | };
69 | 
70 | class IterateBits {
71 |  public:
72 |   IterateBits(std::uint64_t value) : value_(value) {}
73 |   BitIterator<int> begin() { return value_; }
74 |   BitIterator<int> end() { return 0; }
75 | 
76 |  private:
77 |   std::uint64_t value_;
78 | };
79 | 
80 | }  // namespace lczero
81 | 


--------------------------------------------------------------------------------
/src/utils/commandline.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "utils/commandline.h"
29 | #include "utils/logging.h"
30 | 
31 | namespace lczero {
32 | 
33 | std::string CommandLine::binary_;
34 | std::vector<std::string> CommandLine::arguments_;
35 | std::vector<std::pair<std::string, std::string>> CommandLine::modes_;
36 | 
37 | void CommandLine::Init(int argc, const char** argv) {
38 |   binary_ = argv[0];
39 |   arguments_.clear();
40 |   std::ostringstream params;
41 |   for (int i = 1; i < argc; ++i) {
42 |     params << ' ' << argv[i];
43 |     arguments_.push_back(argv[i]);
44 |   }
45 |   LOGFILE << "Command line: " << binary_ << params.str();
46 | }
47 | 
48 | bool CommandLine::ConsumeCommand(const std::string& command) {
49 |   if (arguments_.empty()) return false;
50 |   if (arguments_[0] != command) return false;
51 |   arguments_.erase(arguments_.begin());
52 |   return true;
53 | }
54 | 
55 | void CommandLine::RegisterMode(const std::string& mode,
56 |                                const std::string& description) {
57 |   modes_.emplace_back(mode, description);
58 | }
59 | 
60 | std::string CommandLine::BinaryDirectory() {
61 |   std::string path = binary_;
62 |   const auto pos = path.find_last_of("\\/");
63 |   if (pos == std::string::npos) {
64 |     return ".";
65 |   }
66 |   path.resize(pos);
67 |   return path;
68 | }
69 | 
70 | }  // namespace lczero
71 | 


--------------------------------------------------------------------------------
/src/utils/commandline.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <string>
31 | #include <vector>
32 | 
33 | namespace lczero {
34 | 
35 | class CommandLine {
36 |  public:
37 |   CommandLine() = delete;
38 | 
39 |   // This function must be called before any other.
40 |   static void Init(int argc, const char** argv);
41 | 
42 |   // Name of the executable filename that was run.
43 |   static const std::string& BinaryName() { return binary_; }
44 | 
45 |   // Directory where the binary is run. Without trailing slash.
46 |   static std::string BinaryDirectory();
47 | 
48 |   // If the first command line parameter is @command, remove it and return
49 |   // true. Otherwise return false.
50 |   static bool ConsumeCommand(const std::string& command);
51 | 
52 |   // Command line arguments.
53 |   static const std::vector<std::string>& Arguments() { return arguments_; }
54 | 
55 |   static void RegisterMode(const std::string& mode,
56 |                            const std::string& description);
57 | 
58 |   static const std::vector<std::pair<std::string, std::string>>& GetModes() {
59 |     return modes_;
60 |   }
61 | 
62 |  private:
63 |   static std::string binary_;
64 |   static std::vector<std::string> arguments_;
65 |   static std::vector<std::pair<std::string, std::string>> modes_;
66 | };
67 | 
68 | }  // namespace lczero
69 | 


--------------------------------------------------------------------------------
/src/utils/configfile.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <string>
31 | #include <vector>
32 | 
33 | namespace lczero {
34 | 
35 | class OptionsParser;
36 | 
37 | class ConfigFile {
38 |  public:
39 |   ConfigFile() = delete;
40 | 
41 |   // This function must be called after PopulateOptions.
42 |   static bool Init(OptionsParser* options);
43 | 
44 |   // Returns the command line arguments from the config file.
45 |   static const std::vector<std::string>& Arguments() { return arguments_; }
46 | 
47 |   // Add the config file parameter to the options dictionary.
48 |   static void PopulateOptions(OptionsParser* options);
49 | 
50 |  private:
51 |   // Parses the config file into the arguments_ vector.
52 |   static bool ParseFile(const std::string& filename, OptionsParser* options);
53 | 
54 |   static std::string ProcessConfigFlag(const std::vector<std::string>& args);
55 | 
56 |   static std::vector<std::string> arguments_;
57 | };
58 | 
59 | }  // namespace lczero
60 | 


--------------------------------------------------------------------------------
/src/utils/cppattributes.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | // Enable thread safety attributes only with clang.
31 | // The attributes can be safely erased when compiling with other compilers.
32 | #if defined(__clang__) && (!defined(SWIG))
33 | #define ATTRIBUTE__(x) __attribute__((x))
34 | #else
35 | #define ATTRIBUTE__(x)  // no-op
36 | #endif
37 | 
38 | #define CAPABILITY(x) ATTRIBUTE__(capability(x))
39 | #define SCOPED_CAPABILITY ATTRIBUTE__(scoped_lockable)
40 | #define GUARDED_BY(x) ATTRIBUTE__(guarded_by(x))
41 | #define PT_GUARDED_BY(x) ATTRIBUTE__(pt_guarded_by(x))
42 | #define ACQUIRED_BEFORE(...) ATTRIBUTE__(acquired_before(__VA_ARGS__))
43 | #define ACQUIRED_AFTER(...) ATTRIBUTE__(acquired_after(__VA_ARGS__))
44 | #define REQUIRES(...) ATTRIBUTE__(requires_capability(__VA_ARGS__))
45 | #define REQUIRES_SHARED(...) \
46 |   ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
47 | #define ACQUIRE(...) ATTRIBUTE__(acquire_capability(__VA_ARGS__))
48 | #define ACQUIRE_SHARED(...) ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
49 | #define RELEASE(...) ATTRIBUTE__(release_capability(__VA_ARGS__))
50 | #define RELEASE_SHARED(...) ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
51 | #define TRY_ACQUIRE(...) ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
52 | #define TRY_ACQUIRE_SHARED(...) \
53 |   ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
54 | #define EXCLUDES(...) ATTRIBUTE__(locks_excluded(__VA_ARGS__))
55 | #define ASSERT_CAPABILITY(x) ATTRIBUTE__(assert_capability(x))
56 | #define ASSERT_SHARED_CAPABILITY(x) ATTRIBUTE__(assert_shared_capability(x))
57 | #define RETURN_CAPABILITY(x) ATTRIBUTE__(lock_returned(x))
58 | #define PACKED_STRUCT ATTRIBUTE__(packed)
59 | 
60 | #define NO_THREAD_SAFETY_ANALYSIS ATTRIBUTE__(no_thread_safety_analysis)
61 | 


--------------------------------------------------------------------------------
/src/utils/exception.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <stdexcept>
31 | #include "utils/logging.h"
32 | 
33 | namespace lczero {
34 | 
35 | // Exception to throw around.
36 | class Exception : public std::runtime_error {
37 |  public:
38 |   Exception(const std::string& what) : std::runtime_error(what) {
39 |     LOGFILE << "Exception: " << what;
40 |   }
41 | };
42 | 
43 | }  // namespace lczero
44 | 


--------------------------------------------------------------------------------
/src/utils/fastmath.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018-2019 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <cstring>
31 | 
32 | namespace lczero {
33 | // These stunts are performed by trained professionals, do not try this at home.
34 | 
35 | // Fast approximate log2(x). Does no range checking.
36 | // The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f)
37 | // where N is the integer and f the fractional part, f>=0.
38 | inline float FastLog2(const float a) {
39 |   uint32_t tmp;
40 |   std::memcpy(&tmp, &a, sizeof(float));
41 |   uint32_t expb = tmp >> 23;
42 |   tmp = (tmp & 0x7fffff) | (0x7f << 23);
43 |   float out;
44 |   std::memcpy(&out, &tmp, sizeof(float));
45 |   return out * (2.028011f - 0.342671f * out) - 128.68534f + expb;
46 | }
47 | 
48 | // Fast approximate 2^x. Does only limited range checking.
49 | // The approximation used here is 2^(N+f) ~ 2^N*(1+f*(0.656366+0.343634*f))
50 | // where N is the integer and f the fractional part, f>=0.
51 | inline float FastPow2(const float a) {
52 |   if (a < -126) return 0.0;
53 |   int32_t exp = floor(a);
54 |   float out = a - exp;
55 |   out = 1.0f + out * (0.656366f + 0.343634f * out);
56 |   int32_t tmp;
57 |   std::memcpy(&tmp, &out, sizeof(float));
58 |   tmp += static_cast<int32_t>(static_cast<uint32_t>(exp) << 23);
59 |   std::memcpy(&out, &tmp, sizeof(float));
60 |   return out;
61 | }
62 | 
63 | // Fast approximate ln(x). Does no range checking.
64 | inline float FastLog(const float a) {
65 |   return 0.6931471805599453f * FastLog2(a);
66 | }
67 | 
68 | }  // namespace lczero
69 | 


--------------------------------------------------------------------------------
/src/utils/filesystem.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <time.h>
31 | #include <string>
32 | #include <vector>
33 | 
34 | namespace lczero {
35 | 
36 | // Creates directory at a given path. Throws exception if cannot.
37 | // Returns silently if already exists.
38 | void CreateDirectory(const std::string& path);
39 | 
40 | // Returns list of full paths of regular files in this directory.
41 | // Silently returns empty vector on error.
42 | std::vector<std::string> GetFileList(const std::string& directory);
43 | 
44 | // Returns size of a file. Throws exception if file doesn't exist.
45 | uint64_t GetFileSize(const std::string& filename);
46 | 
47 | // Returns modification time of a file. Throws exception if file doesn't exist.
48 | time_t GetFileTime(const std::string& filename);
49 | 
50 | }  // namespace lczero
51 | 


--------------------------------------------------------------------------------
/src/utils/filesystem.posix.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "utils/exception.h"
29 | #include "utils/filesystem.h"
30 | 
31 | #include <dirent.h>
32 | #include <errno.h>
33 | #include <sys/stat.h>
34 | 
35 | namespace lczero {
36 | 
37 | void CreateDirectory(const std::string& path) {
38 |   if (mkdir(path.c_str(), 0777) < 0 && errno != EEXIST) {
39 |     throw Exception("Cannot create directory: " + path);
40 |   }
41 | }
42 | 
43 | std::vector<std::string> GetFileList(const std::string& directory) {
44 |   std::vector<std::string> result;
45 |   DIR* dir = opendir(directory.c_str());
46 |   if (!dir) return result;
47 |   while (auto* entry = readdir(dir)) {
48 |     bool exists = false;
49 |     switch (entry->d_type) {
50 |       case DT_REG:
51 |         exists = true;
52 |         break;
53 |       case DT_LNK:
54 |         // check that the soft link actually points to a regular file.
55 |         const std::string filename = directory + "/" + entry->d_name;
56 |         struct stat s;
57 |         exists =
58 |             stat(filename.c_str(), &s) == 0 && (s.st_mode & S_IFMT) == S_IFREG;
59 |         break;
60 |     }
61 |     if (exists) result.push_back(entry->d_name);
62 |   }
63 |   closedir(dir);
64 |   return result;
65 | }
66 | 
67 | uint64_t GetFileSize(const std::string& filename) {
68 |   struct stat s;
69 |   if (stat(filename.c_str(), &s) < 0) {
70 |     throw Exception("Cannot stat file: " + filename);
71 |   }
72 |   return s.st_size;
73 | }
74 | 
75 | time_t GetFileTime(const std::string& filename) {
76 |   struct stat s;
77 |   if (stat(filename.c_str(), &s) < 0) {
78 |     throw Exception("Cannot stat file: " + filename);
79 |   }
80 | #ifdef __APPLE__
81 |   return s.st_mtimespec.tv_sec;
82 | #else
83 |   return s.st_mtim.tv_sec;
84 | #endif
85 | }
86 | 
87 | }  // namespace lczero
88 | 


--------------------------------------------------------------------------------
/src/utils/filesystem.win32.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "utils/exception.h"
29 | #include "utils/filesystem.h"
30 | 
31 | #include <windows.h>
32 | #undef CreateDirectory
33 | 
34 | namespace lczero {
35 | 
36 | void CreateDirectory(const std::string& path) {
37 |   if (CreateDirectoryA(path.c_str(), nullptr)) return;
38 |   if (GetLastError() != ERROR_ALREADY_EXISTS) {
39 |     throw Exception("Cannot create directory: " + path);
40 |   }
41 | }
42 | 
43 | std::vector<std::string> GetFileList(const std::string& directory) {
44 |   std::vector<std::string> result;
45 |   WIN32_FIND_DATAA dir;
46 |   const auto handle = FindFirstFileA((directory + "\\*").c_str(), &dir);
47 |   if (handle == INVALID_HANDLE_VALUE) return result;
48 |   do {
49 |     if ((dir.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) {
50 |       result.emplace_back(dir.cFileName);
51 |     }
52 |   } while (FindNextFile(handle, &dir) != 0);
53 |   FindClose(handle);
54 |   return result;
55 | }
56 | 
57 | uint64_t GetFileSize(const std::string& filename) {
58 |   WIN32_FILE_ATTRIBUTE_DATA s;
59 |   if (!GetFileAttributesExA(filename.c_str(), GetFileExInfoStandard, &s)) {
60 |     throw Exception("Cannot stat file: " + filename);
61 |   }
62 |   return (static_cast<uint64_t>(s.nFileSizeHigh) << 32) + s.nFileSizeLow;
63 | }
64 | 
65 | time_t GetFileTime(const std::string& filename) {
66 |   WIN32_FILE_ATTRIBUTE_DATA s;
67 |   if (!GetFileAttributesExA(filename.c_str(), GetFileExInfoStandard, &s)) {
68 |     throw Exception("Cannot stat file: " + filename);
69 |   }
70 |   return (static_cast<uint64_t>(s.ftLastWriteTime.dwHighDateTime) << 32) +
71 |          s.ftLastWriteTime.dwLowDateTime;
72 | }
73 | 
74 | }  // namespace lczero
75 | 


--------------------------------------------------------------------------------
/src/utils/hashcat.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include <cstdint>
29 | #include <initializer_list>
30 | 
31 | #pragma once
32 | namespace lczero {
33 | 
34 | // Tries to scramble @val.
35 | inline uint64_t Hash(uint64_t val) {
36 |   return 0xfad0d7f2fbb059f1ULL * (val + 0xbaad41cdcb839961ULL) +
37 |          0x7acec0050bf82f43ULL * ((val >> 31) + 0xd571b3a92b1b2755ULL);
38 | }
39 | 
40 | // Appends value to a hash.
41 | inline uint64_t HashCat(uint64_t hash, uint64_t x) {
42 |   hash ^= 0x299799adf0d95defULL + Hash(x) + (hash << 6) + (hash >> 2);
43 |   return hash;
44 | }
45 | 
46 | // Combines 64-bit values into concatenated hash.
47 | inline uint64_t HashCat(std::initializer_list<uint64_t> args) {
48 |   uint64_t hash = 0;
49 |   for (uint64_t x : args) hash = HashCat(hash, x);
50 |   return hash;
51 | }
52 | 
53 | }  // namespace lczero
54 | 


--------------------------------------------------------------------------------
/src/utils/hashcat_test.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "utils/hashcat.h"
29 | #include <gtest/gtest.h>
30 | 
31 | namespace lczero {
32 | 
33 | TEST(HashCat, TestCollision) {
34 |   uint64_t hash1 = HashCat({0x8000000010500000, 0x4000080000002000,
35 |                             0x8000000000002000, 0x4000000000000000});
36 |   uint64_t hash2 = HashCat({0x4000000010500000, 0x1000080000002000,
37 |                             0x4000000000002000, 0x1000000000000000});
38 |   EXPECT_NE(hash1, hash2);
39 | }
40 | 
41 | }  // namespace lczero
42 | 
43 | int main(int argc, char** argv) {
44 |   ::testing::InitGoogleTest(&argc, argv);
45 |   return RUN_ALL_TESTS();
46 | }
47 | 


--------------------------------------------------------------------------------
/src/utils/histogram.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  This file is part of Leela Chess Zero.
  3 |  Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |  Leela Chess is free software: you can redistribute it and/or modify
  6 |  it under the terms of the GNU General Public License as published by
  7 |  the Free Software Foundation, either version 3 of the License, or
  8 |  (at your option) any later version.
  9 | 
 10 |  Leela Chess is distributed in the hope that it will be useful,
 11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  GNU General Public License for more details.
 14 | 
 15 |  You should have received a copy of the GNU General Public License
 16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |  Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |  If you modify this Program, or any covered work, by linking or
 21 |  combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |  Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |  modified version of those libraries), containing parts covered by the
 24 |  terms of the respective license agreement, the licensors of this
 25 |  Program grant you additional permission to convey the resulting work.
 26 |  */
 27 | 
 28 | #include "utils/histogram.h"
 29 | #include <stdio.h>
 30 | #include <string.h>
 31 | #include <algorithm>
 32 | #include <cmath>
 33 | #include <iomanip>
 34 | #include <iostream>
 35 | 
 36 | namespace lczero {
 37 | 
 38 | namespace {
 39 | void Print(const std::string& what) { std::cerr << what; }
 40 | 
 41 | void PrintAligned(const std::string& what, int aligned) {
 42 |   std::cerr << std::right << std::setw(aligned) << what;
 43 | }
 44 | 
 45 | std::string Format(const std::string& format, double value) {
 46 |   static const int kMaxBufferSize = 32;
 47 |   char buffer[kMaxBufferSize];
 48 |   const int len = snprintf(buffer, kMaxBufferSize, format.c_str(), value);
 49 |   return std::string(buffer, buffer + len);
 50 | }
 51 | }  // namespace
 52 | 
 53 | Histogram::Histogram()
 54 |     : Histogram(kDefaultMinExp, kDefaultMaxExp, kDefaultMinorScales) {}
 55 | 
 56 | Histogram::Histogram(int min_exp, int max_exp, int minor_scales)
 57 |     : min_exp_(min_exp),
 58 |       max_exp_(max_exp),
 59 |       minor_scales_(minor_scales),
 60 |       major_scales_(max_exp_ - min_exp_ + 1),
 61 |       total_scales_(major_scales_ * minor_scales_),
 62 |       buckets_(total_scales_ + 4) {
 63 |   Clear();
 64 | }
 65 | 
 66 | void Histogram::Clear() {
 67 |   std::fill(buckets_.begin(), buckets_.end(), 0);
 68 |   total_ = 0;
 69 |   max_ = 0;
 70 | }
 71 | 
 72 | void Histogram::Add(double value) {
 73 |   const int index = GetIndex(std::abs(value));
 74 |   const int count = ++buckets_[index];
 75 |   total_++;
 76 |   if (count > max_) max_ = count;
 77 | }
 78 | 
 79 | void Histogram::Dump() const {
 80 |   const double ymax = 0.02 + max_ / (double)total_;
 81 |   for (int i = 0; i < 100; i++) {
 82 |     const double yscale = 1 - i * 0.01;
 83 |     if (yscale > ymax) continue;
 84 |     const bool scale = i % 5 == 0;
 85 |     if (scale) {
 86 |       PrintAligned(Format("%.2g", yscale), 5);
 87 |       Print(" +");
 88 |     } else {
 89 |       Print("      |");
 90 |     }
 91 |     const double ymin = (99 - i) * 0.01;
 92 |     for (size_t j = 0; j < buckets_.size(); j++) {
 93 |       const double val = buckets_[j] / (double)total_;
 94 |       if (val > ymin) {
 95 |         Print("#");
 96 |       } else {
 97 |         Print(" ");
 98 |       }
 99 |     }
100 |     if (scale) {
101 |       Print("+");
102 |     } else {
103 |       Print("|");
104 |     }
105 |     Print("\n");
106 |   }
107 |   Print("      +");
108 |   for (int j = 0; j <= major_scales_; j++) {
109 |     const int size = j == 0 ? 5 : minor_scales_;
110 |     for (int k = 0; k < size - 1; k++) Print("-");
111 |     Print("+");
112 |   }
113 |   Print("\n");
114 |   Print("   -inf");
115 |   for (int j = 0; j < major_scales_; j++) {
116 |     const int size = j == 0 ? 5 : minor_scales_;
117 |     Print(" ");
118 |     PrintAligned(Format("%g", min_exp_ + j), size - 1);
119 |   }
120 |   Print("  ");
121 |   PrintAligned("+inf", minor_scales_ - 2);
122 |   Print(" \n");
123 | }
124 | 
125 | int Histogram::GetIndex(double val) const {
126 |   if (val <= 0) return 0;
127 |   const double log10 = std::log10(val);
128 |   // 2: -15 :    -15.1 ... -14.9          2 ... 3
129 |   // 1:          -15.3 ... -15.1
130 |   // 0:          -15.5 ... -15.3          0 ... 1
131 |   const int index =
132 |       static_cast<int>(std::floor(2.5 + minor_scales_ * (log10 - min_exp_)));
133 |   if (index < 0) return 0;
134 |   if (index >= total_scales_) return total_scales_ + 3;
135 |   return index + 2;
136 | }
137 | 
138 | }  // namespace lczero
139 | 


--------------------------------------------------------------------------------
/src/utils/histogram.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file is part of Leela Chess Zero.
 3 |  Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |  Leela Chess is free software: you can redistribute it and/or modify
 6 |  it under the terms of the GNU General Public License as published by
 7 |  the Free Software Foundation, either version 3 of the License, or
 8 |  (at your option) any later version.
 9 | 
10 |  Leela Chess is distributed in the hope that it will be useful,
11 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  GNU General Public License for more details.
14 | 
15 |  You should have received a copy of the GNU General Public License
16 |  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |  Additional permission under GNU GPL version 3 section 7
19 | 
20 |  If you modify this Program, or any covered work, by linking or
21 |  combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |  Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |  modified version of those libraries), containing parts covered by the
24 |  terms of the respective license agreement, the licensors of this
25 |  Program grant you additional permission to convey the resulting work.
26 |  */
27 | 
28 | #pragma once
29 | 
30 | #include <cstddef>
31 | #include <string>
32 | #include <vector>
33 | 
34 | namespace lczero {
35 | 
36 | // Histogram with a logarithmic x-axis.
37 | //
38 | //    0.50   +
39 | //           |
40 | //           |
41 | //           |
42 | //    0.40   |
43 | //
44 | //          ....
45 | //
46 | //           |
47 | //    0.10   +
48 | //           |
49 | //           |#
50 | //           |#         ##                     #|
51 | //           |#  #   # #### #  #     #         #|
52 | //    0.00   +----+----+----+----+---- ... +----+
53 | //
54 | //         -inf  -15  -14  -13  -12        5   inf
55 | 
56 | class Histogram {
57 |  public:
58 |   // Creates a histogram with default scales.
59 |   Histogram();
60 | 
61 |   // Creates a histogram from 10^min_exp to 10^max_exp
62 |   // with minor_scales spacing.
63 |   Histogram(int min_exp, int max_exp, int minor_scales);
64 | 
65 |   void Clear();
66 | 
67 |   // Adds a sample.
68 |   void Add(double value);
69 | 
70 |   // Dumps the histogram to stderr.
71 |   void Dump() const;
72 | 
73 |  private:
74 |   int GetIndex(double val) const;
75 | 
76 |   static constexpr int kDefaultMinExp = -15;
77 |   static constexpr int kDefaultMaxExp = 5;
78 |   static constexpr int kDefaultMinorScales = 5;
79 | 
80 |   const int min_exp_;
81 |   const int max_exp_;
82 |   const int minor_scales_;
83 |   const int major_scales_;
84 |   const int total_scales_;
85 |   std::vector<double> buckets_;
86 |   double total_;
87 |   double max_;
88 | };
89 | 
90 | }  // namespace lczero
91 | 


--------------------------------------------------------------------------------
/src/utils/logging.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #include "utils/logging.h"
 29 | #include <iomanip>
 30 | #include <iostream>
 31 | #include <thread>
 32 | 
 33 | namespace lczero {
 34 | 
 35 | namespace {
 36 | size_t kBufferSizeLines = 200;
 37 | const char* kStderrFilename = "<stderr>";
 38 | }  // namespace
 39 | 
 40 | Logging& Logging::Get() {
 41 |   static Logging logging;
 42 |   return logging;
 43 | }
 44 | 
 45 | void Logging::WriteLineRaw(const std::string& line) {
 46 |   Mutex::Lock lock_(mutex_);
 47 |   if (filename_.empty()) {
 48 |     buffer_.push_back(line);
 49 |     if (buffer_.size() > kBufferSizeLines) buffer_.pop_front();
 50 |   } else {
 51 |     auto& file = (filename_ == kStderrFilename) ? std::cerr : file_;
 52 |     file << line << std::endl;
 53 |   }
 54 | }
 55 | 
 56 | void Logging::SetFilename(const std::string& filename) {
 57 |   Mutex::Lock lock_(mutex_);
 58 |   if (filename_ == filename) return;
 59 |   filename_ = filename;
 60 |   if (filename.empty() || filename == kStderrFilename) {
 61 |     file_.close();
 62 |   }
 63 |   if (filename.empty()) return;
 64 |   if (filename != kStderrFilename) file_.open(filename, std::ios_base::app);
 65 |   auto& file = (filename == kStderrFilename) ? std::cerr : file_;
 66 |   file << "\n\n============= Log started. =============" << std::endl;
 67 |   for (const auto& line : buffer_) file << line << std::endl;
 68 |   buffer_.clear();
 69 | }
 70 | 
 71 | LogMessage::LogMessage(const char* file, int line) {
 72 |   *this << FormatTime(std::chrono::system_clock::now()) << ' '
 73 |         << std::setfill(' ') << std::this_thread::get_id() << std::setfill('0')
 74 |         << ' ' << file << ':' << line << "] ";
 75 | }
 76 | 
 77 | LogMessage::~LogMessage() { Logging::Get().WriteLineRaw(str()); }
 78 | 
 79 | StderrLogMessage::StderrLogMessage(const char* file, int line)
 80 |     : log_(file, line) {}
 81 | 
 82 | StderrLogMessage::~StderrLogMessage() {
 83 |   std::cerr << str() << std::endl;
 84 |   log_ << str();
 85 | }
 86 | 
 87 | std::chrono::time_point<std::chrono::system_clock> SteadyClockToSystemClock(
 88 |     std::chrono::time_point<std::chrono::steady_clock> time) {
 89 |   return std::chrono::system_clock::now() +
 90 |          std::chrono::duration_cast<std::chrono::system_clock::duration>(
 91 |              time - std::chrono::steady_clock::now());
 92 | }
 93 | 
 94 | std::string FormatTime(
 95 |     std::chrono::time_point<std::chrono::system_clock> time) {
 96 |   std::ostringstream ss;
 97 |   using namespace std::chrono;
 98 |   const auto us =
 99 |       duration_cast<microseconds>(time.time_since_epoch()).count() % 1000000;
100 |   auto timer = std::chrono::system_clock::to_time_t(time);
101 |   ss << std::put_time(std::localtime(&timer), "%m%d %T") << '.'
102 |      << std::setfill('0') << std::setw(6) << us;
103 |   return ss.str();
104 | }
105 | 
106 | }  // namespace lczero


--------------------------------------------------------------------------------
/src/utils/logging.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <deque>
31 | #include <fstream>
32 | #include <iomanip>
33 | #include <sstream>
34 | #include <string>
35 | 
36 | #include "utils/mutex.h"
37 | 
38 | namespace lczero {
39 | 
40 | class Logging {
41 |  public:
42 |   static Logging& Get();
43 | 
44 |   // Sets the name of the log. Empty name disables logging.
45 |   void SetFilename(const std::string& filename);
46 | 
47 |  private:
48 |   // Writes line to the log, and appends new line character.
49 |   void WriteLineRaw(const std::string& line);
50 | 
51 |   Mutex mutex_;
52 |   std::string filename_ GUARDED_BY(mutex_);
53 |   std::ofstream file_ GUARDED_BY(mutex_);
54 |   std::deque<std::string> buffer_ GUARDED_BY(mutex_);
55 | 
56 |   Logging() = default;
57 |   friend class LogMessage;
58 | };
59 | 
60 | class LogMessage : public std::ostringstream {
61 |  public:
62 |   LogMessage(const char* file, int line);
63 |   ~LogMessage();
64 | };
65 | 
66 | class StderrLogMessage : public std::ostringstream {
67 |  public:
68 |   StderrLogMessage(const char* file, int line);
69 |   ~StderrLogMessage();
70 | 
71 |  private:
72 |   LogMessage log_;
73 | };
74 | 
75 | std::chrono::time_point<std::chrono::system_clock> SteadyClockToSystemClock(
76 |     std::chrono::time_point<std::chrono::steady_clock> time);
77 | 
78 | std::string FormatTime(std::chrono::time_point<std::chrono::system_clock> time);
79 | }  // namespace lczero
80 | 
81 | #define LOGFILE ::lczero::LogMessage(__FILE__, __LINE__)
82 | #define CERR ::lczero::StderrLogMessage(__FILE__, __LINE__)


--------------------------------------------------------------------------------
/src/utils/mutex.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <atomic>
 31 | #include <mutex>
 32 | #include <shared_mutex>
 33 | #include "utils/cppattributes.h"
 34 | 
 35 | namespace lczero {
 36 | 
 37 | // Implementation of reader-preferenced shared mutex. Based on fair shared
 38 | // mutex.
 39 | class CAPABILITY("mutex") RpSharedMutex {
 40 |  public:
 41 |   RpSharedMutex() : waiting_readers_(0) {}
 42 | 
 43 |   void lock() ACQUIRE() {
 44 |     while (true) {
 45 |       mutex_.lock();
 46 |       if (waiting_readers_ == 0) return;
 47 |       mutex_.unlock();
 48 |     }
 49 |   }
 50 |   void unlock() RELEASE() { mutex_.unlock(); }
 51 |   void lock_shared() ACQUIRE_SHARED() {
 52 |     ++waiting_readers_;
 53 |     mutex_.lock_shared();
 54 |   }
 55 |   void unlock_shared() RELEASE_SHARED() {
 56 |     --waiting_readers_;
 57 |     mutex_.unlock_shared();
 58 |   }
 59 | 
 60 |  private:
 61 |   std::shared_timed_mutex mutex_;
 62 |   std::atomic<int> waiting_readers_;
 63 | };
 64 | 
 65 | // std::mutex wrapper for clang thread safety annotation.
 66 | class CAPABILITY("mutex") Mutex {
 67 |  public:
 68 |   // std::unique_lock<std::mutex> wrapper.
 69 |   class SCOPED_CAPABILITY Lock {
 70 |    public:
 71 |     Lock(Mutex& m) ACQUIRE(m) : lock_(m.get_raw()) {}
 72 |     ~Lock() RELEASE() {}
 73 |     std::unique_lock<std::mutex>& get_raw() { return lock_; }
 74 | 
 75 |    private:
 76 |     std::unique_lock<std::mutex> lock_;
 77 |   };
 78 | 
 79 |   void lock() ACQUIRE() { mutex_.lock(); }
 80 |   void unlock() RELEASE() { mutex_.unlock(); }
 81 |   std::mutex& get_raw() { return mutex_; }
 82 | 
 83 |  private:
 84 |   std::mutex mutex_;
 85 | };
 86 | 
 87 | // std::shared_mutex wrapper for clang thread safety annotation.
 88 | class CAPABILITY("mutex") SharedMutex {
 89 |  public:
 90 |   // std::unique_lock<std::shared_mutex> wrapper.
 91 |   class SCOPED_CAPABILITY Lock {
 92 |    public:
 93 |     Lock(SharedMutex& m) ACQUIRE(m) : lock_(m.get_raw()) {}
 94 |     ~Lock() RELEASE() {}
 95 | 
 96 |    private:
 97 |     std::unique_lock<std::shared_timed_mutex> lock_;
 98 |   };
 99 | 
100 |   // std::shared_lock<std::shared_mutex> wrapper.
101 |   class SCOPED_CAPABILITY SharedLock {
102 |    public:
103 |     SharedLock(SharedMutex& m) ACQUIRE_SHARED(m) : lock_(m.get_raw()) {}
104 |     ~SharedLock() RELEASE() {}
105 | 
106 |    private:
107 |     std::shared_lock<std::shared_timed_mutex> lock_;
108 |   };
109 | 
110 |   void lock() ACQUIRE() { mutex_.lock(); }
111 |   void unlock() RELEASE() { mutex_.unlock(); }
112 |   void lock_shared() ACQUIRE_SHARED() { mutex_.lock_shared(); }
113 |   void unlock_shared() RELEASE_SHARED() { mutex_.unlock_shared(); }
114 | 
115 |   std::shared_timed_mutex& get_raw() { return mutex_; }
116 | 
117 |  private:
118 |   std::shared_timed_mutex mutex_;
119 | };
120 | 
121 | }  // namespace lczero
122 | 


--------------------------------------------------------------------------------
/src/utils/optional.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | namespace lczero {
31 | 
32 | // Very poor-man implementation of std::optional. It literally cannot do
33 | // anything, but it's enough for our use case.
34 | template <class T>
35 | class optional {
36 |  public:
37 |   operator bool() const { return has_value_; }
38 |   constexpr const T& operator*() const& { return value_; }
39 |   constexpr const T* operator->() const& { return &value_; }
40 |   optional<T>& operator=(const T& value) {
41 |     value_ = value;
42 |     has_value_ = true;
43 |     return *this;
44 |   }
45 |   void reset() { has_value_ = false; }
46 |   T value_or(const T& def) const { return has_value_ ? value_ : def; }
47 | 
48 |  private:
49 |   T value_;
50 |   bool has_value_ = false;
51 | };
52 | 
53 | }  // namespace lczero
54 | 


--------------------------------------------------------------------------------
/src/utils/optionsparser_test.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | */
18 | 
19 | #include "utils/optionsparser.h"
20 | #include <gtest/gtest.h>
21 | #include <iostream>
22 | 
23 | namespace lczero {
24 | 
25 | TEST(OptionsParser, CheckInvalidOption) {
26 |   OptionsParser options;
27 |   const OptionId id{"this-is-a-valid-option", "this-is-a-valid-option", "help",
28 |                     'a'};
29 |   options.Add<StringOption>(id) = "";
30 |   EXPECT_NO_THROW(
31 |       options.SetUciOption("this-is-a-valid-option", "valid-value"));
32 |   EXPECT_THROW(options.SetUciOption("this-is-an-invalid-option", "0"),
33 |                Exception);
34 | }
35 | 
36 | TEST(OptionsParser, IntOptionCheckValueConstraints) {
37 |   OptionsParser options;
38 |   const OptionId id{"int-test-a", "int-test-a", "help", 'a'};
39 |   options.Add<IntOption>(id, 25, 75) = 50;
40 | 
41 |   EXPECT_NO_THROW(options.SetUciOption("int-test-a", "25"));
42 |   EXPECT_NO_THROW(options.SetUciOption("int-test-a", "50"));
43 |   EXPECT_NO_THROW(options.SetUciOption("int-test-a", "75"));
44 |   EXPECT_THROW(options.SetUciOption("int-test-a", "0"), Exception);
45 |   EXPECT_THROW(options.SetUciOption("int-test-a", "100"), Exception);
46 | }
47 | 
48 | TEST(OptionsParser, FloatOptionCheckValueConstraints) {
49 |   OptionsParser options;
50 |   const OptionId id{"float-test-a", "float-test-a", "help", 'a'};
51 |   options.Add<FloatOption>(id, 25.0f, 75.0f) = 50.0f;
52 | 
53 |   EXPECT_NO_THROW(options.SetUciOption("float-test-a", "25.0"));
54 |   EXPECT_NO_THROW(options.SetUciOption("float-test-a", "50.0"));
55 |   EXPECT_NO_THROW(options.SetUciOption("float-test-a", "75.0"));
56 |   EXPECT_THROW(options.SetUciOption("float-test-a", "0.0"), Exception);
57 |   EXPECT_THROW(options.SetUciOption("float-test-a", "100.0"), Exception);
58 | }
59 | 
60 | TEST(OptionsParser, BoolOptionsCheckValueConstraints) {
61 |   OptionsParser options;
62 |   const OptionId id{"bool-test-a", "bool-test-a", "help", 'a'};
63 |   options.Add<BoolOption>(id) = false;
64 | 
65 |   EXPECT_NO_THROW(options.SetUciOption("bool-test-a", "true"));
66 |   EXPECT_NO_THROW(options.SetUciOption("bool-test-a", "false"));
67 |   EXPECT_THROW(options.SetUciOption("bool-test-a", "leela"), Exception);
68 | }
69 | 
70 | TEST(OptionsParser, ChoiceOptionCheckValueConstraints) {
71 |   OptionsParser options;
72 |   const OptionId id{"choice-test-a", "choice-test-a", "help", 'a'};
73 |   std::vector<std::string> choices;
74 |   choices.push_back("choice-a");
75 |   choices.push_back("choice-b");
76 |   choices.push_back("choice-c");
77 |   options.Add<ChoiceOption>(id, choices) = "choice-a";
78 | 
79 |   EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-a"));
80 |   EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-b"));
81 |   EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-c"));
82 |   EXPECT_THROW(options.SetUciOption("choice-test-a", "choice-d"), Exception);
83 | }
84 | 
85 | }  // namespace lczero
86 | 
87 | int main(int argc, char** argv) {
88 |   ::testing::InitGoogleTest(&argc, argv);
89 |   return RUN_ALL_TESTS();
90 | }
91 | 


--------------------------------------------------------------------------------
/src/utils/random.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "random.h"
29 | #include <random>
30 | 
31 | namespace lczero {
32 | 
33 | Random::Random() : gen_(std::random_device()()) {}
34 | 
35 | Random& Random::Get() {
36 |   static Random rand;
37 |   return rand;
38 | }
39 | 
40 | int Random::GetInt(int min, int max) {
41 |   Mutex::Lock lock(mutex_);
42 |   std::uniform_int_distribution<> dist(min, max);
43 |   return dist(gen_);
44 | }
45 | 
46 | bool Random::GetBool() { return GetInt(0, 1) != 0; }
47 | 
48 | double Random::GetDouble(double maxval) {
49 |   Mutex::Lock lock(mutex_);
50 |   std::uniform_real_distribution<> dist(0.0, maxval);
51 |   return dist(gen_);
52 | }
53 | 
54 | float Random::GetFloat(float maxval) {
55 |   Mutex::Lock lock(mutex_);
56 |   std::uniform_real_distribution<> dist(0.0, maxval);
57 |   return dist(gen_);
58 | }
59 | 
60 | std::string Random::GetString(int length) {
61 |   std::string result;
62 |   for (int i = 0; i < length; ++i) {
63 |     result += 'a' + GetInt(0, 25);
64 |   }
65 |   return result;
66 | }
67 | 
68 | double Random::GetGamma(double alpha, double beta) {
69 |   Mutex::Lock lock(mutex_);
70 |   std::gamma_distribution<double> dist(alpha, beta);
71 |   return dist(gen_);
72 | }
73 | 
74 | }  // namespace lczero
75 | 


--------------------------------------------------------------------------------
/src/utils/random.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <random>
31 | #include <string>
32 | #include "utils/mutex.h"
33 | 
34 | namespace lczero {
35 | 
36 | class Random {
37 |  public:
38 |   static Random& Get();
39 |   double GetDouble(double max_val);
40 |   float GetFloat(float max_val);
41 |   double GetGamma(double alpha, double beta);
42 |   // Both sides are included.
43 |   int GetInt(int min, int max);
44 |   std::string GetString(int length);
45 |   bool GetBool();
46 | 
47 |  private:
48 |   Random();
49 | 
50 |   Mutex mutex_;
51 |   std::mt19937 gen_ GUARDED_BY(mutex_);
52 | };
53 | 
54 | }  // namespace lczero
55 | 


--------------------------------------------------------------------------------
/src/utils/smallarray.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <memory>
31 | 
32 | namespace lczero {
33 | 
34 | // Non resizeable array which can contain up to 255 elements.
35 | template <typename T>
36 | class SmallArray {
37 |  public:
38 |   SmallArray() = delete;
39 |   SmallArray(size_t size) : size_(size), data_(std::make_unique<T[]>(size)) {}
40 |   SmallArray(SmallArray&&);  // TODO implement when needed
41 |   T& operator[](int idx) { return data_[idx]; }
42 |   const T& operator[](int idx) const { return data_[idx]; }
43 |   int size() const { return size_; }
44 | 
45 |  private:
46 |   unsigned char size_;
47 |   std::unique_ptr<T[]> data_;
48 | };
49 | 
50 | }  // namespace lczero
51 | 


--------------------------------------------------------------------------------
/src/utils/string.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |   This file is part of Leela Chess Zero.
  3 |   Copyright (C) 2018 The LCZero Authors
  4 | 
  5 |   Leela Chess is free software: you can redistribute it and/or modify
  6 |   it under the terms of the GNU General Public License as published by
  7 |   the Free Software Foundation, either version 3 of the License, or
  8 |   (at your option) any later version.
  9 | 
 10 |   Leela Chess is distributed in the hope that it will be useful,
 11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |   GNU General Public License for more details.
 14 | 
 15 |   You should have received a copy of the GNU General Public License
 16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 |   Additional permission under GNU GPL version 3 section 7
 19 | 
 20 |   If you modify this Program, or any covered work, by linking or
 21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
 22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
 23 |   modified version of those libraries), containing parts covered by the
 24 |   terms of the respective license agreement, the licensors of this
 25 |   Program grant you additional permission to convey the resulting work.
 26 | */
 27 | 
 28 | #include "utils/string.h"
 29 | 
 30 | #include <algorithm>
 31 | #include <cctype>
 32 | #include <sstream>
 33 | #include <vector>
 34 | 
 35 | namespace lczero {
 36 | 
 37 | std::string StrJoin(const std::vector<std::string>& strings,
 38 |                     const std::string& delim) {
 39 |   std::string res;
 40 |   for (const auto& str : strings) {
 41 |     if (!res.empty()) res += delim;
 42 |     res += str;
 43 |   }
 44 |   return res;
 45 | }
 46 | 
 47 | std::vector<std::string> StrSplitAtWhitespace(const std::string& str) {
 48 |   std::vector<std::string> result;
 49 |   std::istringstream iss(str);
 50 |   std::string tmp;
 51 |   while (iss >> tmp) result.emplace_back(std::move(tmp));
 52 |   return result;
 53 | }
 54 | 
 55 | std::vector<std::string> StrSplit(const std::string& str,
 56 |                                   const std::string& delim) {
 57 |   std::vector<std::string> result;
 58 |   for (std::string::size_type pos = 0, next = 0; pos != std::string::npos;
 59 |        pos = next) {
 60 |     next = str.find(delim, pos);
 61 |     result.push_back(str.substr(pos, next - pos));
 62 |     if (next != std::string::npos) next += delim.size();
 63 |   }
 64 |   return result;
 65 | }
 66 | 
 67 | std::vector<int> ParseIntList(const std::string& str) {
 68 |   std::vector<int> result;
 69 |   for (const auto& x : StrSplit(str, ",")) {
 70 |     result.push_back(std::stoi(x));
 71 |   }
 72 |   return result;
 73 | }
 74 | 
 75 | std::string LeftTrim(std::string str) {
 76 |   const auto it = std::find_if(str.begin(), str.end(),
 77 |                          [](int ch) { return !std::isspace(ch); });
 78 |   str.erase(str.begin(), it);
 79 |   return str;
 80 | }
 81 | 
 82 | std::string RightTrim(std::string str) {
 83 |   auto it = std::find_if(str.rbegin(), str.rend(),
 84 |                          [](int ch) { return !std::isspace(ch); });
 85 |   str.erase(it.base(), str.end());
 86 |   return str;
 87 | }
 88 | 
 89 | std::string Trim(std::string str) {
 90 |   return LeftTrim(RightTrim(std::move(str)));
 91 | }
 92 | 
 93 | bool StringsEqualIgnoreCase(const std::string& a, const std::string& b) {
 94 |   return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) {
 95 |     return std::tolower(a) == std::tolower(b);
 96 |   });
 97 | }
 98 | 
 99 | std::vector<std::string> FlowText(const std::string& src, size_t width) {
100 |   std::vector<std::string> result;
101 |   auto paragraphs = StrSplit(src, "\n");
102 |   for (const auto& paragraph : paragraphs) {
103 |     result.emplace_back();
104 |     auto words = StrSplit(paragraph, " ");
105 |     for (const auto& word : words) {
106 |       if (result.back().empty()) {
107 |         // First word in line, always add.
108 |       } else if (result.back().size() + word.size() + 1 > width) {
109 |         // The line doesn't have space for a new word.
110 |         result.emplace_back();
111 |       } else {
112 |         // Appending to the current line.
113 |         result.back() += " ";
114 |       }
115 |       result.back() += word;
116 |     }
117 |   }
118 |   return result;
119 | }
120 | 
121 | }  // namespace lczero
122 | 


--------------------------------------------------------------------------------
/src/utils/string.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <string>
31 | #include <vector>
32 | 
33 | namespace lczero {
34 | 
35 | // Joins strings using @delim as delimiter.
36 | std::string StrJoin(const std::vector<std::string>& strings,
37 |                     const std::string& delim = " ");
38 | 
39 | // Splits strings at whitespace.
40 | std::vector<std::string> StrSplitAtWhitespace(const std::string& str);
41 | 
42 | // Split string by delimiter.
43 | std::vector<std::string> StrSplit(const std::string& str,
44 |                                   const std::string& delim);
45 | 
46 | // Parses comma-separated list of integers.
47 | std::vector<int> ParseIntList(const std::string& str);
48 | 
49 | // Trims a string of whitespace from the start.
50 | std::string LeftTrim(std::string str);
51 | 
52 | // Trims a string of whitespace from the end.
53 | std::string RightTrim(std::string str);
54 | 
55 | // Trims a string of whitespace from both ends.
56 | std::string Trim(std::string str);
57 | 
58 | // Returns whether strings are equal, ignoring case.
59 | bool StringsEqualIgnoreCase(const std::string& a, const std::string& b);
60 | 
61 | // Flow text into lines of width up to @width.
62 | std::vector<std::string> FlowText(const std::string& src, size_t width);
63 | 
64 | }  // namespace lczero
65 | 


--------------------------------------------------------------------------------
/src/utils/transpose.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "utils/transpose.h"
29 | #include <cstddef>
30 | 
31 | namespace lczero {
32 | void TransposeTensor(const std::vector<int>& dims, std::vector<int> order,
33 |                      const std::vector<float> from, float* to) {
34 |   if (order.empty()) {
35 |     for (size_t i = 0; i < dims.size(); ++i)
36 |       order.push_back(dims.size() - i - 1);
37 |   }
38 |   std::vector<int> cur_idx(dims.size());
39 |   for (size_t _ = 0; _ < from.size(); ++_) {
40 |     size_t from_idx = 0;
41 |     for (int i : order) {
42 |       from_idx *= dims[i];
43 |       from_idx += cur_idx[i];
44 |     }
45 |     *to++ = from[from_idx];
46 |     for (int i = static_cast<int>(dims.size()) - 1; i >= 0; --i) {
47 |       if (++cur_idx[i] == dims[i]) {
48 |         cur_idx[i] = 0;
49 |       } else {
50 |         break;
51 |       }
52 |     }
53 |   }
54 | }
55 | }  // namespace lczero
56 | 


--------------------------------------------------------------------------------
/src/utils/transpose.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #pragma once
29 | 
30 | #include <vector>
31 | 
32 | namespace lczero {
33 | 
34 | // Transposes flattened tensor from @from into @to. @to must have space for
35 | // from.size() elements.
36 | // @dims -- Dimensions of @from tensor. For example, {120, 60, 3, 3}
37 | // @order -- New-to-old dimension index mapping. For example {3, 2, 0, 1}
38 | void TransposeTensor(const std::vector<int>& dims, std::vector<int> order,
39 |                      const std::vector<float> from, float* to);
40 | 
41 | }  // namespace lczero
42 | 


--------------------------------------------------------------------------------
/src/utils/weights_adapter.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include "src/utils/weights_adapter.h"
29 | 
30 | namespace lczero {
31 | float LayerAdapter::Iterator::ExtractValue(const uint16_t* ptr,
32 |                                            const LayerAdapter* adapter) {
33 |   return *ptr / static_cast<float>(0xffff) * adapter->range_ + adapter->min_;
34 | }
35 | 
36 | LayerAdapter::LayerAdapter(const pblczero::Weights_Layer& layer)
37 |     : data_(reinterpret_cast<const uint16_t*>(layer.params().data())),
38 |       size_(layer.params().size() / sizeof(uint16_t)),
39 |       min_(layer.min_val()),
40 |       range_(layer.max_val() - min_) {}
41 | 
42 | std::vector<float> LayerAdapter::as_vector() const {
43 |   return std::vector<float>(begin(), end());
44 | }
45 | float LayerAdapter::Iterator::operator*() const {
46 |   return ExtractValue(data_, adapter_);
47 | }
48 | float LayerAdapter::Iterator::operator[](size_t idx) const {
49 |   return ExtractValue(data_ + idx, adapter_);
50 | }
51 | 
52 | }  // namespace lczero


--------------------------------------------------------------------------------
/src/utils/weights_adapter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | 
28 | #include <iterator>
29 | #include <vector>
30 | #include "proto/net.pb.h"
31 | 
32 | namespace lczero {
33 | 
34 | class LayerAdapter {
35 |  public:
36 |   class Iterator
37 |       : public std::iterator<std::random_access_iterator_tag, float> {
38 |    public:
39 |     Iterator() = default;
40 |     Iterator(const Iterator& other) = default;
41 | 
42 |     float operator*() const;
43 |     float operator[](size_t idx) const;
44 |     bool operator==(const LayerAdapter::Iterator& other) const {
45 |       return data_ == other.data_;
46 |     }
47 |     bool operator!=(const LayerAdapter::Iterator& other) const {
48 |       return data_ != other.data_;
49 |     }
50 |     Iterator& operator++() {
51 |       ++data_;
52 |       return *this;
53 |     }
54 |     Iterator& operator--() {
55 |       --data_;
56 |       return *this;
57 |     }
58 |     ptrdiff_t operator-(const Iterator& other) const {
59 |       return data_ - other.data_;
60 |     }
61 | 
62 |     // TODO(crem) implement other iterator functions when they are needed.
63 | 
64 |    private:
65 |     friend class LayerAdapter;
66 |     Iterator(const LayerAdapter* adapter, const uint16_t* ptr)
67 |         : adapter_(adapter), data_(ptr) {}
68 |     static float ExtractValue(const uint16_t* ptr, const LayerAdapter* adapter);
69 | 
70 |     const LayerAdapter* adapter_ = nullptr;
71 |     const uint16_t* data_ = nullptr;
72 |   };
73 | 
74 |   LayerAdapter(const pblczero::Weights_Layer& layer);
75 |   std::vector<float> as_vector() const;
76 |   size_t size() const { return size_; }
77 |   float operator[](size_t idx) const { return begin()[idx]; }
78 |   Iterator begin() const { return {this, data_}; }
79 |   Iterator end() const { return {this, data_ + size_}; }
80 | 
81 |  private:
82 |   const uint16_t* data_ = nullptr;
83 |   const size_t size_ = 0;
84 |   const float min_;
85 |   const float range_;
86 | };
87 | 
88 | }  // namespace lczero
89 | 


--------------------------------------------------------------------------------
/src/version.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | #include "version.h"
28 | 
29 | std::uint32_t GetVersionInt(int major, int minor, int patch) {
30 |   return major * 1000000 + minor * 1000 + patch;
31 | }
32 | 
33 | std::string GetVersionStr(int major, int minor, int patch,
34 |                           const std::string& postfix) {
35 |   auto v = std::to_string(major) + "." + std::to_string(minor) + "." +
36 |            std::to_string(patch);
37 |   if (postfix.empty()) return v;
38 |   return v + "-" + postfix;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This file is part of Leela Chess Zero.
 3 |   Copyright (C) 2018 The LCZero Authors
 4 | 
 5 |   Leela Chess is free software: you can redistribute it and/or modify
 6 |   it under the terms of the GNU General Public License as published by
 7 |   the Free Software Foundation, either version 3 of the License, or
 8 |   (at your option) any later version.
 9 | 
10 |   Leela Chess is distributed in the hope that it will be useful,
11 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |   GNU General Public License for more details.
14 | 
15 |   You should have received a copy of the GNU General Public License
16 |   along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 |   Additional permission under GNU GPL version 3 section 7
19 | 
20 |   If you modify this Program, or any covered work, by linking or
21 |   combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 |   Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 |   modified version of those libraries), containing parts covered by the
24 |   terms of the respective license agreement, the licensors of this
25 |   Program grant you additional permission to convey the resulting work.
26 | */
27 | #pragma once
28 | 
29 | // Versioning is performed according to the standard at <https://semver.org/>
30 | // Creating a new version should be performed using scripts/bumpversion.py.
31 | 
32 | #include <string>
33 | #include "version.inc"
34 | 
35 | std::uint32_t GetVersionInt(int major = LC0_VERSION_MAJOR,
36 |                             int minor = LC0_VERSION_MINOR,
37 |                             int patch = LC0_VERSION_PATCH);
38 | 
39 | std::string GetVersionStr(int major = LC0_VERSION_MAJOR,
40 |                           int minor = LC0_VERSION_MINOR,
41 |                           int patch = LC0_VERSION_PATCH,
42 |                           const std::string& postfix = LC0_VERSION_POSTFIX);
43 | 


--------------------------------------------------------------------------------
/src/version.inc:
--------------------------------------------------------------------------------
1 | #define LC0_VERSION_MAJOR 0
2 | #define LC0_VERSION_MINOR 21
3 | #define LC0_VERSION_PATCH 0
4 | #define LC0_VERSION_POSTFIX "fish-v0.8"
5 | 


--------------------------------------------------------------------------------
/subprojects/gtest.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = googletest-release-1.8.0
 3 | 
 4 | source_url = https://github.com/google/googletest/archive/release-1.8.0.zip
 5 | source_filename = gtest-1.8.0.zip
 6 | source_hash = f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
 7 | 
 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.0/5/get_zip
 9 | patch_filename = gtest-1.8.0-5-wrap.zip
10 | patch_hash = 7eeaede4aa2610a403313b74e04baf91ccfbaef03203d8f56312e22df1834ec5
11 | 


--------------------------------------------------------------------------------
/subprojects/protobuf-3.6.0.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = protobuf-3.6.0
 3 | 
 4 | source_url = https://github.com/protocolbuffers/protobuf/releases/download/v3.6.0/protobuf-all-3.6.0.tar.gz
 5 | source_filename = protobuf-all-3.6.0.tar.gz
 6 | source_hash = 1532154addf85080330fdd037949d4653dfce16550df5c70ea0cd212d8aff3af
 7 | 
 8 | patch_url = https://github.com/borg323/protobuf/releases/download/3.6.0/protobuf-3.6.0-wrap.zip
 9 | patch_filename = protobuf-3.6.0-wrap.zip
10 | patch_hash = a14730d2e3702c4a0d7b3f05a380ec6b2c0b138a5b00539705b5c3a8df9885e3
11 | 


--------------------------------------------------------------------------------
/subprojects/protobuf.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = protobuf-3.5.1
 3 | 
 4 | source_url = https://github.com/google/protobuf/releases/download/v3.5.1/protobuf-all-3.5.1.tar.gz
 5 | source_filename = protobuf-all-3.5.1.tar.gz
 6 | source_hash = 72d43863f58567a9ea2054671fdb667867f9cf7865df623c7be630978ff97dff
 7 | 
 8 | patch_url = https://github.com/borg323/protobuf/releases/download/3.5.1-2w/protobuf-3.5.1-2w-wrap.zip
 9 | patch_filename = protobuf-3.5.1-2w-wrap.zip
10 | patch_hash = 5185ae7252941e252b075d3f845768296b079516f9f6feb0bd3ae63de7e9a52e
11 | 


--------------------------------------------------------------------------------
/subprojects/zlib.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = zlib-1.2.11
 3 | 
 4 | source_url = http://zlib.net/fossils/zlib-1.2.11.tar.gz
 5 | source_filename = zlib-1.2.11.tar.gz
 6 | source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
 7 | 
 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/3/get_zip
 9 | patch_filename = zlib-1.2.11-3-wrap.zip
10 | patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0


--------------------------------------------------------------------------------
/tensorflow.md:
--------------------------------------------------------------------------------
 1 | To build with tensorflow under linux you need to install Tensorflow_cc from
 2 | <https://github.com/FloopCZ/tensorflow_cc>. Either release v1.9.0, v1.12.0 or
 3 | v1.13.0. Tensorflow_cc requires a specific version of protobuf, which constrains
 4 | the build. Release v1.9.0 works out of the box, since the default protobuf
 5 | subproject (v3.5.1) is compatible and is used instead of a system installed
 6 | version. In contrast release v1.12.0 needs protobuf v3.6.0 and release v1.13.0
 7 | is built with protobuf 3.6.1 but also works with 3.6.0. For those versions
 8 | `-Dprotobuf-3-6-0=true` should be added to the build command line. Note that
 9 | this protobuf version has issues with static builds and crashes so is not
10 | recommended for normal use. The crashes look very similar to:
11 | * <https://github.com/protocolbuffers/protobuf/issues/5107>
12 | * <https://github.com/protocolbuffers/protobuf/issues/5353>
13 | 


--------------------------------------------------------------------------------
/windows_build.md:
--------------------------------------------------------------------------------
 1 | ## Windows BLAS/OpenCL
 2 | 
 3 | 0. [Install Microsoft Visual Studio](https://visualstudio.microsoft.com/). For VS2017 make sure the
 4 |    option "Desktop development with C++" is installed (you can add it later if not).
 5 | 
 6 | 1. [Install git for windows](https://git-scm.com/download/win) - this can be used to get lc0 but is also
 7 |    needed for meson.
 8 | 
 9 | 2. Install a BLAS library. This can be either OpenBLAS or Intel MKL.
10 | *  For [OpenBLAS go here](http://www.openblas.net/), you need a binary package with a filename of the
11 |    form `OpenBLAS-version-Win64-int32.zip`, they are not available for all versions, which you just unpack
12 |    at a location of your choise (but not inside the lc0 directory).
13 | *  For [Intel MKL go here](https://software.intel.com/en-us/mkl), where you need to register. After
14 |    installation don't forget to run `mklvars.bat intel64` to set up the paths to the dlls.
15 | 
16 | 3. For OpenCL you also need to install OpenCL developer libraries.
17 | *  For AMD cards the AMD APP SDK 3.0 seems to be the appropriate one, to be installed after the card drivers.
18 |    This is not currently available on the AMD website, but links to a signed installer are available in the
19 |    [AMD community forum](https://community.amd.com/thread/222855).
20 | *  For nVIDIA cards you probably need the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads).
21 | 
22 | 4. [Install Python3](https://www.python.org/) - be sure to check the box to add python to the path.
23 | 
24 | 5. Install Meson: `pip3 install --upgrade meson`
25 | 
26 | 6. Edit `build-cl.cmd`:
27 | *  If you use MSVS other than 2017 community edition (or if it's installed into non-standard location)
28 |    replace the path to vcvarsall.bat and MSBuild.exe. If you can't find vcvarsall.bat on VS2017, you
29 |    need to install option "Desktop development with C++". Some example paths are in comments.
30 | *  In `--backend 2017` replace 2017 with the correct MSVS version.
31 | *  Set the BLAS (and optionally OpenCL) library `include` and `lib` directories in the appropriate
32 |    variables.
33 |     - For OpenBLAS, they are `openblas_include` and `openblas_libdirs`.
34 |     - For Intel MKL, they are `mkl_include` and `mkl_libdirs`. The `lib` directory typically ends in
35 |       `\lib\intel64`.
36 |     - For OpenCL, they are `opencl_libdirs` and `opencl_include`. The include directory is the one with
37 |       the `CL` directory containing `opencl.h`, not directly the one containing `opencl.h`.
38 | 
39 | 7. Run `build-cl.cmd`. It will generate MSVS project and pause.
40 | 
41 | 8. Hit `Enter` to build it.
42 | 
43 | 9. Resulting binary will be `build/lc0.exe`
44 | 
45 | Alternatively you can
46 | 
47 | 8. open generated solution `build/lc0.sln` in Visual Studio and build yourself.
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------