├── .circleci
├── Dockerfile
└── config.yml
├── .clang-format
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── COPYING
├── FLAGS.md
├── README.md
├── appveyor.yml
├── build-cl.cmd
├── build-cuda.cmd
├── build.sh
├── changelog.txt
├── checkdir.py
├── dist
└── README-cuda.txt
├── meson.build
├── meson_options.txt
├── scripts
└── bumpversion.py
├── src
├── benchmark
│ ├── benchmark.cc
│ └── benchmark.h
├── chess
│ ├── bitboard.cc
│ ├── bitboard.h
│ ├── board.cc
│ ├── board.h
│ ├── board_test.cc
│ ├── callbacks.h
│ ├── position.cc
│ ├── position.h
│ ├── position_test.cc
│ ├── uciloop.cc
│ └── uciloop.h
├── engine.cc
├── engine.h
├── main.cc
├── mcts
│ ├── auxengine.cc
│ ├── node.cc
│ ├── node.h
│ ├── params.cc
│ ├── params.h
│ ├── search.cc
│ └── search.h
├── neural
│ ├── blas
│ │ ├── README.md
│ │ ├── blas.h
│ │ ├── convolution1.cc
│ │ ├── convolution1.h
│ │ ├── fully_connected_layer.cc
│ │ ├── fully_connected_layer.h
│ │ ├── network_blas.cc
│ │ ├── se_unit.cc
│ │ ├── se_unit.h
│ │ ├── winograd_convolution3.cc
│ │ ├── winograd_convolution3.h
│ │ └── winograd_transform.ispc
│ ├── cache.cc
│ ├── cache.h
│ ├── cuda
│ │ ├── common_kernels.cu
│ │ ├── cuda_common.h
│ │ ├── fp16_kernels.cu
│ │ ├── kernels.h
│ │ ├── layers.cc
│ │ ├── layers.h
│ │ ├── network_cudnn.cc
│ │ └── readme.txt
│ ├── encoder.cc
│ ├── encoder.h
│ ├── encoder_test.cc
│ ├── factory.cc
│ ├── factory.h
│ ├── loader.cc
│ ├── loader.h
│ ├── network.h
│ ├── network_check.cc
│ ├── network_demux.cc
│ ├── network_legacy.cc
│ ├── network_legacy.h
│ ├── network_mux.cc
│ ├── network_random.cc
│ ├── network_rr.cc
│ ├── network_st_batch.cc
│ ├── network_st_batch.h
│ ├── network_tf.cc
│ ├── opencl
│ │ ├── OpenCL.cc
│ │ ├── OpenCL.h
│ │ ├── OpenCLBuffers.cc
│ │ ├── OpenCLBuffers.h
│ │ ├── OpenCLParams.h
│ │ ├── OpenCLTuner.cc
│ │ ├── OpenCLTuner.h
│ │ ├── README.md
│ │ ├── clblast_level3
│ │ │ ├── common.opencl
│ │ │ ├── xgemm_batched.opencl
│ │ │ ├── xgemm_part1.opencl
│ │ │ ├── xgemm_part2.opencl
│ │ │ ├── xgemm_part3.opencl
│ │ │ └── xgemv.opencl
│ │ ├── clsource
│ │ │ ├── config.opencl
│ │ │ ├── convolve1.opencl
│ │ │ ├── convolve3.opencl
│ │ │ ├── policymap.opencl
│ │ │ └── se.opencl
│ │ └── network_opencl.cc
│ ├── shared
│ │ ├── activation.cc
│ │ ├── activation.h
│ │ ├── policy_map.h
│ │ ├── winograd_filter.cc
│ │ └── winograd_filter.h
│ ├── writer.cc
│ └── writer.h
├── selfplay
│ ├── game.cc
│ ├── game.h
│ ├── loop.cc
│ ├── loop.h
│ ├── tournament.cc
│ └── tournament.h
├── syzygy
│ ├── syzygy.cc
│ ├── syzygy.h
│ └── syzygy_test.cc
├── utils
│ ├── bititer.h
│ ├── cache-old.h
│ ├── cache.h
│ ├── commandline.cc
│ ├── commandline.h
│ ├── configfile.cc
│ ├── configfile.h
│ ├── cppattributes.h
│ ├── exception.h
│ ├── fastmath.h
│ ├── filesystem.h
│ ├── filesystem.posix.cc
│ ├── filesystem.win32.cc
│ ├── hashcat.h
│ ├── hashcat_test.cc
│ ├── histogram.cc
│ ├── histogram.h
│ ├── logging.cc
│ ├── logging.h
│ ├── mutex.h
│ ├── optional.h
│ ├── optionsdict.cc
│ ├── optionsdict.h
│ ├── optionsparser.cc
│ ├── optionsparser.h
│ ├── optionsparser_test.cc
│ ├── random.cc
│ ├── random.h
│ ├── smallarray.h
│ ├── string.cc
│ ├── string.h
│ ├── transpose.cc
│ ├── transpose.h
│ ├── weights_adapter.cc
│ └── weights_adapter.h
├── version.cc
├── version.h
└── version.inc
├── subprojects
├── gtest.wrap
├── protobuf-3.6.0.wrap
├── protobuf.wrap
└── zlib.wrap
├── tensorflow.md
├── third_party
└── cl2.hpp
└── windows_build.md
/.circleci/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM floopcz/tensorflow_cc:ubuntu-shared-cuda
2 |
3 | RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && apt-get update && apt-get install -y intel-mkl-64bit-2018.2-046
4 | RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev git ssh tar gzip ca-certificates sudo
5 | RUN pip3 install meson
6 | RUN ln -s /usr/include/ /usr/include/openblas
7 |
8 | RUN curl -OL https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip
9 | RUN unzip protoc-3.5.1-linux-x86_64.zip -d protoc3
10 | RUN sudo mv protoc3/bin/* /usr/local/bin/
11 | RUN sudo mv protoc3/include/* /usr/local/include/
12 |
--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | build:
4 | docker:
5 | - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.4
6 | steps:
7 | - checkout
8 | - run:
9 | name: "Pull Submodules"
10 | command: |
11 | git submodule init
12 | git submodule update --remote
13 | - run:
14 | name: Build clang version
15 | command: CC=clang-6.0 CXX=clang++-6.0 ./build.sh
16 | - run:
17 | command: cp build/release/lc0 /tmp/lc0-clang
18 | - run:
19 | name: Build g++ version
20 | command: ./build.sh
21 | - run:
22 | command: cp build/release/lc0 /tmp/lc0-g++
23 | - store_artifacts:
24 | path: /tmp/lc0-clang
25 | destination: lc0-ubuntu-18-04-clang
26 | - store_artifacts:
27 | path: /tmp/lc0-g++
28 | destination: lc0-ubuntu-18-04-g++
29 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | BasedOnStyle: Google
4 | DerivePointerAlignment: false
5 | ...
6 |
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | testdata/
3 | LC0VSProj/
4 | CUDA_NN/
5 | .DS_Store
6 | xcuserdata
7 | subprojects/*
8 | !subprojects/*.wrap
9 | lc0.xcodeproj/
10 | *.swp
11 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libs/lczero-common"]
2 | path = libs/lczero-common
3 | url = https://github.com/LeelaChessZero/lczero-common.git
4 |
--------------------------------------------------------------------------------
/build-cl.cmd:
--------------------------------------------------------------------------------
1 | rd /s build
2 |
3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
5 |
6 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
7 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
8 |
9 | meson.py build --backend vs2017 --buildtype release ^
10 | -Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^
11 | -Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^
12 | -Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^
13 | -Dopencl_include="C:\Program Files (x86)\AMD APP SDK\3.0\include" ^
14 | -Ddefault_library=static
15 |
16 | pause
17 |
18 | cd build
19 |
20 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
21 | /p:PreferredToolArchitecture=x64 "subprojects\zlib-1.2.11\Windows resource for file 'win32_zlib1.rc'@cus.vcxproj" ^
22 | /filelogger
23 |
24 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
25 | /p:PreferredToolArchitecture=x64 subprojects\zlib-1.2.11\subprojects@zlib-1.2.11@@z@sta.vcxproj ^
26 | /filelogger
27 |
28 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
29 | /p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^
30 | /filelogger
31 |
32 |
--------------------------------------------------------------------------------
/build-cuda.cmd:
--------------------------------------------------------------------------------
1 | rd /s build
2 |
3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
5 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
6 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
7 | meson.py build --backend vs2017 --buildtype release ^
8 | -Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^
9 | -Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^
10 | -Ddefault_library=static
11 |
12 | pause
13 |
14 |
15 | cd build
16 |
17 | %MSBuild% ^
18 | /p:Configuration=Release ^
19 | /p:Platform=x64 ^
20 | /p:PreferredToolArchitecture=x64 lc0.sln ^
21 | /filelogger
22 |
23 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | case $1 in
6 | plain|debug|debugoptimized|release|minsize)
7 | BUILDTYPE=$1
8 | shift
9 | ;;
10 | *)
11 | BUILDTYPE=release
12 | ;;
13 | esac
14 |
15 | BUILDDIR=build/${BUILDTYPE}
16 |
17 | if [ -f ${BUILDDIR}/build.ninja ]
18 | then
19 | meson configure ${BUILDDIR} -Dbuildtype=${BUILDTYPE} -Dprefix=${INSTALL_PREFIX:-/usr/local} "$@"
20 | else
21 | meson ${BUILDDIR} --buildtype ${BUILDTYPE} --prefix ${INSTALL_PREFIX:-/usr/local} "$@"
22 | fi
23 |
24 | pushd ${BUILDDIR}
25 |
26 | NINJA=$(awk '/ninja/ {ninja=$4} END {print ninja}' meson-logs/meson-log.txt)
27 |
28 | if [ -n "${INSTALL_PREFIX}" ]
29 | then
30 | ${NINJA} install
31 | else
32 | ${NINJA}
33 | fi
34 |
35 | popd
36 |
--------------------------------------------------------------------------------
/checkdir.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import sys
4 | import os
5 | if len(sys.argv) > 1 and os.path.isdir(sys.argv[1]):
6 | exit(0)
7 | exit(1)
8 |
--------------------------------------------------------------------------------
/dist/README-cuda.txt:
--------------------------------------------------------------------------------
1 | Lc0
2 |
3 | Lc0 is a UCI-compliant chess engine designed to play chess via
4 | neural network, specifically those of the LeelaChessZero project
5 | (https://lczero.org).
6 |
7 | This binary uses CUDA and cuDNN dynamic link libraries copyrighted
8 | by Nvidia corporation (http://www.nvidia.com), and redistributed as
9 | permitted by the respective license file (see CUDA.txt section 2.2
10 | and CUDNN.txt section "CUDNN DISTRIBUTION" for details). You are
11 | authorized to redistribute these libraries together with this
12 | package as a whole but not individually.
13 |
14 |
15 | License
16 |
17 | Leela Chess is free software: you can redistribute it and/or modify
18 | it under the terms of the GNU General Public License as published by
19 | the Free Software Foundation, either version 3 of the License, or
20 | (at your option) any later version.
21 |
22 | Leela Chess is distributed in the hope that it will be useful,
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | GNU General Public License for more details.
26 |
27 | You should have received a copy of the GNU General Public License
28 | along with Leela Chess. If not, see .
29 |
30 | Additional permission under GNU GPL version 3 section 7
31 |
32 | If you modify this Program, or any covered work, by linking or
33 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
34 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
35 | modified version of those libraries), containing parts covered by the
36 | terms of the respective license agreement, the licensors of this
37 | Program grant you additional permission to convey the resulting work.
38 |
39 |
--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('tensorflow_include',
2 | type: 'array',
3 | value: ['/usr/local/include/tensorflow/'],
4 | description: 'Paths to tensorflow include directories')
5 |
6 | option('protobuf_include',
7 | type: 'array',
8 | value: ['/usr/local/include/'],
9 | description: 'Paths to protobuf include directories')
10 |
11 | option('openblas_include',
12 | type: 'array',
13 | value: ['/usr/include/openblas/'],
14 | description: 'Paths to openblas include directories')
15 |
16 | option('opencl_include',
17 | type: 'array',
18 | value: ['/usr/include/'],
19 | description: 'Paths to OpenCL include directories')
20 |
21 | option('tensorflow_libdir',
22 | type: 'array',
23 | value: ['/usr/local/lib/tensorflow_cc/'],
24 | description: 'Paths to tensorflow libraries')
25 |
26 | option('protobuf_libdir',
27 | type: 'array',
28 | value: ['/usr/lib/x86_64-linux-gnu/'],
29 | description: 'Paths to protobuf libraries')
30 |
31 | option('openblas_libdirs',
32 | type: 'array',
33 | value: ['/usr/lib/'],
34 | description: 'Paths to OpenBLAS libraries')
35 |
36 | option('opencl_libdirs',
37 | type: 'array',
38 | value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'],
39 | description: 'Paths to OpenCL libraries')
40 |
41 | option('cudnn_libdirs',
42 | type: 'array',
43 | value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'],
44 | description: 'Paths to Cuda/cudnn libraries')
45 |
46 | option('mkl_libdirs',
47 | type: 'array',
48 | value: ['/opt/intel/lib/intel64', '/opt/intel/mkl/lib/intel64', '/opt/intel/mkl/lib'],
49 | description: 'Paths to MKL libraries')
50 |
51 | option('mkl_include',
52 | type: 'array',
53 | value: ['/opt/intel/mkl/include'],
54 | description: 'Paths to MKL libraries')
55 |
56 | option('cudnn_include',
57 | type: 'array',
58 | value: ['/opt/cuda/include/', '/usr/local/cuda/include/'],
59 | description: 'Paths to cudnn include directory')
60 |
61 | option('build_backends',
62 | type: 'boolean',
63 | value: true,
64 | description: 'Build backends for NN computation')
65 |
66 | option('blas',
67 | type: 'boolean',
68 | value: true,
69 | description: 'Enable BLAS backend')
70 |
71 | option('ispc',
72 | type: 'boolean',
73 | value: true,
74 | description: 'use ispc')
75 |
76 | option('ispc_native_only',
77 | type: 'boolean',
78 | value: true,
79 | description: 'use ispc and enable native arch only')
80 |
81 | option('cudnn',
82 | type: 'boolean',
83 | value: true,
84 | description: 'Enable cuDNN backend')
85 |
86 | option('opencl',
87 | type: 'boolean',
88 | value: true,
89 | description: 'Enable OpenCL backend')
90 |
91 | option('tensorflow',
92 | type: 'boolean',
93 | value: false,
94 | description: 'Enable TensorFlow backend')
95 |
96 | option('openblas',
97 | type: 'boolean',
98 | value: true,
99 | description: 'Enable OpenBLAS support')
100 |
101 | option('mkl',
102 | type: 'boolean',
103 | value: true,
104 | description: 'Enable MKL BLAS support')
105 |
106 | option('accelerate',
107 | type: 'boolean',
108 | value: true,
109 | description: 'Enable Accelerate BLAS support')
110 |
111 | option('popcnt',
112 | type: 'boolean',
113 | value: true,
114 | description: 'Use the popcnt instruction')
115 |
116 | option('pext',
117 | type: 'boolean',
118 | value: false,
119 | description: 'Use the pext instruction')
120 |
121 | option('gtest',
122 | type: 'boolean',
123 | value: true,
124 | description: 'Build gtest tests')
125 |
126 | option('protobuf-3-6-0',
127 | type: 'boolean',
128 | value: false,
129 | description: 'Use the protobuf 3.6.0 subproject')
130 |
--------------------------------------------------------------------------------
/scripts/bumpversion.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import os
5 |
6 |
7 | VERSION_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../src/version.inc")
8 | VERSION_CONTENT = """
9 | #define LC0_VERSION_MAJOR {}
10 | #define LC0_VERSION_MINOR {}
11 | #define LC0_VERSION_PATCH {}
12 | #define LC0_VERSION_POSTFIX "{}"
13 | """
14 | VERSION_CONTENT = VERSION_CONTENT.strip()
15 |
16 |
17 | def get_version():
18 | with open(VERSION_FILE, 'r') as f:
19 | major = int(f.readline().split()[2])
20 | minor = int(f.readline().split()[2])
21 | patch = int(f.readline().split()[2])
22 | postfix = f.readline().split()[2]
23 |
24 | postfix = postfix.replace('"', '')
25 | return major, minor, patch, postfix
26 |
27 |
28 | def set_version(major, minor, patch, postfix=""):
29 | version_inc = VERSION_CONTENT.format(major, minor, patch, postfix)
30 |
31 | with open(VERSION_FILE, 'w') as f:
32 | f.write(version_inc)
33 |
34 |
35 | def update(major, minor, patch, postfix=""):
36 | set_version(major, minor, patch, postfix)
37 |
38 |
39 | def main(argv):
40 | major, minor, patch, postfix = get_version()
41 |
42 | if argv.major:
43 | major += 1
44 | minor = 0
45 | patch = 0
46 | postfix = ""
47 | update(major, minor, patch)
48 | if argv.minor:
49 | minor += 1
50 | patch = 0
51 | postfix = ""
52 | update(major, minor, patch)
53 | if argv.patch:
54 | patch += 1
55 | postfix = ""
56 | update(major, minor, patch)
57 | if argv.postfix and len(argv.postfix) > 0:
58 | postfix = argv.postfix
59 | update(major, minor, patch, postfix)
60 |
61 | if len(postfix) == 0:
62 | print('v{}.{}.{}'.format(major, minor, patch))
63 | else:
64 | print('v{}.{}.{}-{}'.format(major, minor, patch, postfix))
65 |
66 |
67 | if __name__ == "__main__":
68 | argparser = argparse.ArgumentParser(description=\
69 | 'Set or read current version.')
70 | argparser.add_argument('--major', action='store_true',
71 | help='bumps major version')
72 | argparser.add_argument('--minor', action='store_true',
73 | help='bumps minor version')
74 | argparser.add_argument('--patch', action='store_true',
75 | help='bumps patch')
76 | argparser.add_argument('--postfix', type=str,
77 | help='set postfix')
78 | argv = argparser.parse_args()
79 | main(argv)
80 |
81 |
--------------------------------------------------------------------------------
/src/benchmark/benchmark.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018-2019 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include "benchmark/benchmark.h"
29 | #include "mcts/search.h"
30 |
31 | namespace lczero {
32 | namespace {
33 | const int kDefaultThreads = 2;
34 |
35 | const OptionId kThreadsOptionId{"threads", "Threads",
36 | "Number of (CPU) worker threads to use.", 't'};
37 | const OptionId kNNCacheSizeId{
38 | "nncache", "NNCacheSize",
39 | "Number of positions to store in a memory cache. A large cache can speed "
40 | "up searching, but takes memory."};
41 | const OptionId kNodesId{"nodes", "", "Number of nodes to run as a benchmark."};
42 | const OptionId kMovetimeId{"movetime", "",
43 | "Benchmark time allocation, in milliseconds."};
44 | const OptionId kFenId{"fen", "", "Benchmark initial position FEN."};
45 |
46 | } // namespace
47 |
48 | void Benchmark::Run() {
49 | OptionsParser options;
50 | NetworkFactory::PopulateOptions(&options);
51 | options.Add(kThreadsOptionId, 1, 128) = kDefaultThreads;
52 | options.Add(kNNCacheSizeId, 0, 999999999) = 200000;
53 | SearchParams::Populate(&options);
54 |
55 | options.Add(kNodesId, -1, 999999999) = -1;
56 | options.Add(kMovetimeId, -1, 999999999) = 10000;
57 | options.Add(kFenId) = ChessBoard::kStartposFen;
58 |
59 | if (!options.ProcessAllFlags()) return;
60 |
61 | try {
62 | auto option_dict = options.GetOptionsDict();
63 |
64 | auto network = NetworkFactory::LoadNetwork(option_dict);
65 |
66 | NodeTree tree;
67 | tree.ResetToPosition(option_dict.Get(kFenId.GetId()), {});
68 |
69 | NNCache cache;
70 | cache.SetCapacity(option_dict.Get(kNNCacheSizeId.GetId()));
71 |
72 | const auto start = std::chrono::steady_clock::now();
73 |
74 | SearchLimits limits;
75 | int visits = option_dict.Get(kNodesId.GetId());
76 | const int movetime = option_dict.Get(kMovetimeId.GetId());
77 | if (movetime > -1) {
78 | limits.search_deadline = start + std::chrono::milliseconds(movetime);
79 | }
80 | if (visits > -1) {
81 | limits.visits = visits;
82 | }
83 |
84 | auto search = std::make_unique(
85 | tree, network.get(),
86 | std::bind(&Benchmark::OnBestMove, this, std::placeholders::_1),
87 | std::bind(&Benchmark::OnInfo, this, std::placeholders::_1), limits,
88 | option_dict, &cache, nullptr);
89 |
90 | search->StartThreads(option_dict.Get(kThreadsOptionId.GetId()));
91 |
92 | search->Wait();
93 |
94 | const auto end = std::chrono::steady_clock::now();
95 | std::chrono::duration time = end - start;
96 | std::cout << "Benchmark final time " << time.count() << "s calculating "
97 | << search->GetTotalPlayouts() / time.count()
98 | << " nodes per second." << std::endl;
99 | } catch (Exception& ex) {
100 | std::cerr << ex.what() << std::endl;
101 | }
102 | }
103 |
104 | void Benchmark::OnBestMove(const BestMoveInfo& move) {
105 | std::cout << "bestmove " << move.bestmove.as_string() << std::endl;
106 | }
107 |
108 | void Benchmark::OnInfo(const std::vector& infos) {
109 | std::string line = "Benchmark time " + std::to_string(infos[0].time);
110 | line += "ms, " + std::to_string(infos[0].nodes) + " nodes, ";
111 | line += std::to_string(infos[0].nps) + " nps";
112 | if (!infos[0].pv.empty()) line += ", move " + infos[0].pv[0].as_string();
113 | std::cout << line << std::endl;
114 | }
115 |
116 | } // namespace lczero
117 |
--------------------------------------------------------------------------------
/src/benchmark/benchmark.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include "mcts/search.h"
31 | #include "neural/cache.h"
32 | #include "neural/factory.h"
33 | #include "utils/optionsparser.h"
34 |
35 | namespace lczero {
36 |
37 | class Benchmark{
38 | public:
39 | Benchmark() = default;
40 |
41 | void Run();
42 | void OnBestMove(const BestMoveInfo& move);
43 | void OnInfo(const std::vector& infos);
44 | };
45 |
46 | } // namespace lczero
47 |
--------------------------------------------------------------------------------
/src/chess/callbacks.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include
31 | #include
32 | #include
33 | #include "chess/bitboard.h"
34 | #include "chess/position.h"
35 | #include "utils/optional.h"
36 |
37 | namespace lczero {
38 |
39 | // Is sent when search decides on the best move.
40 | struct BestMoveInfo {
41 | BestMoveInfo(Move bestmove, Move ponder = Move{})
42 | : bestmove(bestmove), ponder(ponder) {}
43 | Move bestmove;
44 | Move ponder;
45 | // Those are extensions and not really UCI protocol.
46 | // 1 if it's "player1", 2 if it's "player2"
47 | int player = -1;
48 | // Index of the game in the tournament (0-based).
49 | int game_id = -1;
50 | // The color of the player, if known.
51 | optional is_black;
52 |
53 | using Callback = std::function;
54 | };
55 |
56 | // Is sent during the search.
57 | struct ThinkingInfo {
58 | // Full depth.
59 | int depth = -1;
60 | // Maximum depth.
61 | int seldepth = -1;
62 | // Time since start of thinking.
63 | int64_t time = -1;
64 | // Nodes visited.
65 | int64_t nodes = -1;
66 | // Nodes per second.
67 | int nps = -1;
68 | // Hash fullness * 1000
69 | int hashfull = -1;
70 | // Win in centipawns.
71 | optional score;
72 | // Number of successful TB probes (not the same as playouts ending in TB hit).
73 | int tb_hits = -1;
74 | // Best line found. Moves are from perspective of white player.
75 | std::vector pv;
76 | // Multipv index.
77 | int multipv = -1;
78 | // Freeform comment.
79 | std::string comment;
80 |
81 | // Those are extensions and not really UCI protocol.
82 | // 1 if it's "player1", 2 if it's "player2"
83 | int player = -1;
84 | // Index of the game in the tournament (0-based).
85 | int game_id = -1;
86 | // The color of the player, if known.
87 | optional is_black;
88 |
89 | using Callback = std::function&)>;
90 | };
91 |
92 | // Is sent when a single game is finished.
93 | struct GameInfo {
94 | // Game result.
95 | GameResult game_result = GameResult::UNDECIDED;
96 | // Name of the file with training data.
97 | std::string training_filename;
98 | // Game moves.
99 | std::vector moves;
100 | // Index of the game in the tournament (0-based).
101 | int game_id = -1;
102 | // The color of the player1, if known.
103 | optional is_black;
104 | // Minimum resign threshold which would have resulted in a false positive
105 | // if resign had of been enabled.
106 | // Only provided if the game wasn't played with resign enabled.
107 | optional min_false_positive_threshold;
108 |
109 | using Callback = std::function;
110 | };
111 |
112 | // Is sent in the end of tournament and also during the tournament.
113 | struct TournamentInfo {
114 | // Did tournament finish, so those results are final.
115 | bool finished = false;
116 |
117 | // Player1's [win/draw/lose] as [white/black].
118 | // e.g. results[2][1] is how many times player 1 lost as black.
119 | int results[3][2] = {{0, 0}, {0, 0}, {0, 0}};
120 | using Callback = std::function;
121 | };
122 |
123 | } // namespace lczero
124 |
--------------------------------------------------------------------------------
/src/chess/uciloop.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include "chess/callbacks.h"
35 | #include "utils/exception.h"
36 |
37 | namespace lczero {
38 |
39 | struct GoParams {
40 | optional wtime;
41 | optional btime;
42 | optional winc;
43 | optional binc;
44 | optional movestogo;
45 | optional depth;
46 | optional nodes;
47 | optional movetime;
48 | bool infinite = false;
49 | std::vector searchmoves;
50 | bool ponder = false;
51 | };
52 |
53 | class UciLoop {
54 | public:
55 | virtual ~UciLoop() {}
56 | virtual void RunLoop();
57 |
58 | // Sends response to host.
59 | void SendResponse(const std::string& response);
60 | // Sends responses to host ensuring they are received as a block.
61 | virtual void SendResponses(const std::vector& responses);
62 | void SendBestMove(const BestMoveInfo& move);
63 | void SendInfo(const std::vector& infos);
64 | void SendId();
65 |
66 | // Command handlers.
67 | virtual void CmdUci() { throw Exception("Not supported"); }
68 | virtual void CmdIsReady() { throw Exception("Not supported"); }
69 | virtual void CmdSetOption(const std::string& /*name*/,
70 | const std::string& /*value*/,
71 | const std::string& /*context*/) {
72 | throw Exception("Not supported");
73 | }
74 | virtual void CmdUciNewGame() { throw Exception("Not supported"); }
75 | virtual void CmdPosition(const std::string& /*position*/,
76 | const std::vector& /*moves*/) {
77 | throw Exception("Not supported");
78 | }
79 | virtual void CmdGo(const GoParams& /*params*/) {
80 | throw Exception("Not supported");
81 | }
82 | virtual void CmdStop() { throw Exception("Not supported"); }
83 | virtual void CmdPonderHit() { throw Exception("Not supported"); }
84 | virtual void CmdStart() { throw Exception("Not supported"); }
85 |
86 | private:
87 | bool DispatchCommand(
88 | const std::string& command,
89 | const std::unordered_map& params);
90 | };
91 |
92 | } // namespace lczero
93 |
--------------------------------------------------------------------------------
/src/main.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018-2019 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include "benchmark/benchmark.h"
29 | #include "chess/board.h"
30 | #include "engine.h"
31 | #include "selfplay/loop.h"
32 | #include "utils/commandline.h"
33 | #include "utils/logging.h"
34 | #include "version.h"
35 |
36 | int main(int argc, const char** argv) {
37 | LOGFILE << "Leelafish, based on Lc0, started.";
38 | CERR << "Leelafish, based on:";
39 | CERR << " _";
40 | CERR << "| _ | |";
41 | CERR << "|_ |_ |_| v" << GetVersionStr() << " built " << __DATE__;
42 | using namespace lczero;
43 |
44 | InitializeMagicBitboards();
45 |
46 | CommandLine::Init(argc, argv);
47 | CommandLine::RegisterMode("uci", "(default) Act as UCI engine");
48 | CommandLine::RegisterMode("selfplay", "Play games with itself");
49 | CommandLine::RegisterMode("benchmark", "Quick benchmark");
50 |
51 | if (CommandLine::ConsumeCommand("selfplay")) {
52 | // Selfplay mode.
53 | SelfPlayLoop loop;
54 | loop.RunLoop();
55 | } else if (CommandLine::ConsumeCommand("benchmark")) {
56 | // Benchmark mode.
57 | Benchmark benchmark;
58 | benchmark.Run();
59 | } else {
60 | // Consuming optional "uci" mode.
61 | CommandLine::ConsumeCommand("uci");
62 | // Ordinary UCI engine.
63 | EngineLoop loop;
64 | loop.RunLoop();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/neural/blas/README.md:
--------------------------------------------------------------------------------
1 | The files in this directory comprise the BLAS backend of Lc0.
2 |
3 | ## License
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | **The source files of this directory are not covered by any additional
19 | permission.**
20 |
21 |
22 |
--------------------------------------------------------------------------------
/src/neural/blas/blas.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | // Select the BLAS vendor based on defines
22 |
23 | #ifdef USE_MKL
24 | #include
25 | #else
26 |
27 | #ifdef USE_OPENBLAS
28 | #include
29 |
30 | // Specific openblas routines.
31 | extern "C" {
32 | int openblas_get_num_procs(void);
33 | void openblas_set_num_threads(int num_threads);
34 | char* openblas_get_corename(void);
35 | char* openblas_get_config(void);
36 | }
37 |
38 | #else
39 |
40 | #ifdef __APPLE__
41 | #include
42 | #define USE_ACCELERATE
43 | #endif
44 |
45 | #endif // USE_OPENBLAS
46 |
47 | #endif // USE_MKL
48 |
--------------------------------------------------------------------------------
/src/neural/blas/convolution1.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #include "neural/blas/convolution1.h"
20 | #include "neural/blas/blas.h"
21 |
22 | namespace lczero {
23 |
24 | void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
25 | const size_t output_channels, const float* input,
26 | const float* weights, float* output) {
27 | for (size_t i = 0; i < batch_size; i++) {
28 | // C←αAB + βC
29 | // M Number of rows in matrices A and C.
30 | // N Number of columns in matrices B and C.
31 | // K Number of columns in matrix A; number of rows in matrix B.
32 | // lda The size of the first dimension of matrix A; if you are
33 | // passing a matrix A[m][n], the value should be m.
34 | // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
35 | // ldb, beta, C, N);
36 |
37 | // C A B
38 | //
39 | // outputs := weights x input
40 | //
41 | // cols: kSquares (N) input_channels (K) kSquares(N)
42 | //
43 | // rows: output_channels (M) output_channels (M) input_channels (K)
44 |
45 | const float* batch_input = input + i * kSquares * input_channels;
46 | float* batch_output = output + i * kSquares * output_channels;
47 |
48 | cblas_sgemm(CblasRowMajor, // Row major formar
49 | CblasNoTrans, // A not transposed
50 | CblasNoTrans, // B not transposed
51 | (int)output_channels, // M
52 | kSquares, // N
53 | (int)input_channels, // K
54 | 1.0f, // Alpha
55 | weights, // A
56 | (int)input_channels, // lda, leading rank of A
57 | batch_input, // B
58 | kSquares, // ldb, leading rank of B
59 | 0.0f, // beta
60 | batch_output, // C
61 | kSquares); // ldc, leading rank of B
62 | }
63 | }
64 |
65 | } // namespace lczero
66 |
--------------------------------------------------------------------------------
/src/neural/blas/convolution1.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | #include
22 | #include
23 |
24 | namespace lczero {
25 |
26 | // Convolution 1x1
27 | class Convolution1 {
28 | public:
29 | Convolution1() = delete;
30 |
31 | // Batched forward inference.
32 | static void Forward(const size_t batch_size, const size_t input_channels,
33 | const size_t output_channels, const float* input,
34 | const float* weights, float* output);
35 |
36 | private:
37 | static constexpr auto kWidth = 8;
38 | static constexpr auto kHeight = 8;
39 | static constexpr auto kSquares = kWidth * kHeight;
40 | };
41 | } // namespace lczero
42 |
--------------------------------------------------------------------------------
/src/neural/blas/fully_connected_layer.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #include "neural/blas/fully_connected_layer.h"
20 | #include "neural/blas/blas.h"
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | namespace lczero {
27 |
28 | void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
29 | const size_t output_size,
30 | const float* inputs, const float* weights,
31 | const float* biases, bool apply_relu,
32 | float* outputs) {
33 | if (batch_size == 1) {
34 | // Just a matrix-vector multiplication
35 | //
36 | // C A B
37 | //
38 | // outputs := weights x inputs
39 | //
40 | // cols: 1 input_size 1
41 | //
42 | // rows output_size output_size input_size
43 | //
44 |
45 | cblas_sgemv(CblasRowMajor, CblasNoTrans,
46 | // M K
47 | (int)output_size, (int)input_size, 1.0f, weights,
48 | (int)input_size, inputs, 1, 0.0f, outputs, 1);
49 | } else {
50 | // more columns, matrix-matrix multiplication
51 | //
52 | // C A B
53 | //
54 | // outputs := weights x inputs
55 | //
56 | // cols: batch_size (N) input_size (K) batch_size (N)
57 | //
58 | // rows output_size (M) output_size (M) input_size (K)
59 | //
60 |
61 | // C←αAB + βC
62 | // M Number of rows in matrices A and C.
63 | // N Number of columns in matrices B and C.
64 | // K Number of columns in matrix A; number of rows in matrix B.
65 | // lda The size of the first dimension of matrix A; if you are
66 | // passing a matrix A[m][n], the value should be m.
67 | // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
68 | // ldb, beta, C, N);
69 |
70 | cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
71 | (int)output_size, // M
72 | (int)batch_size, // N
73 | (int)input_size, // K
74 | 1.0f, // alpha
75 | weights, // A
76 | (int)input_size, // lda, leading rank of A
77 | inputs, // B
78 | (int)input_size, // ldb, leading rank of B
79 | 0.0f, // beta
80 | outputs, // C
81 | (int)output_size); // ldc, leading rank of C
82 | }
83 | if (apply_relu) {
84 | for (size_t i = 0; i < batch_size; i++) {
85 | float* batch_outputs = outputs + i * output_size;
86 | for (size_t o = 0; o < output_size; o++) {
87 | float val = biases[o] + batch_outputs[o];
88 | batch_outputs[o] = val >= 0 ? val : 0;
89 | }
90 | }
91 | } else {
92 | for (size_t i = 0; i < batch_size; i++) {
93 | float* batch_outputs = outputs + i * output_size;
94 | for (size_t o = 0; o < output_size; o++) {
95 | batch_outputs[o] += biases[o];
96 | }
97 | }
98 | }
99 | }
100 |
101 | float FullyConnectedLayer::Forward0D(const size_t size, const float* x,
102 | const float* y) {
103 | // A scalar product, also known as a dot-product.
104 | // float cblas_sdot(const int N, const float *X, const int incX, const float
105 | // *Y,
106 | // const int incY);
107 | return cblas_sdot((int)size, x, 1, y, 1);
108 | }
109 |
110 | } // namespace lczero
111 |
--------------------------------------------------------------------------------
/src/neural/blas/fully_connected_layer.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | #include
22 | #include
23 |
24 | namespace lczero {
25 |
26 | class FullyConnectedLayer {
27 | public:
28 | FullyConnectedLayer() = delete;
29 |
30 | // Forward inference, batched, from input_size to output_size
31 | static void Forward1D(const size_t batch_size, const size_t input_size,
32 | const size_t output_size, const float* input,
33 | const float* weights, const float* biases,
34 | bool apply_relu, float* output);
35 |
36 | // Forward inference, no batched, from input_size to scalar
37 | static float Forward0D(const size_t input_size, const float* input,
38 | const float* weights);
39 |
40 | };
41 |
42 | } // namespace lczero
43 |
--------------------------------------------------------------------------------
/src/neural/blas/se_unit.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #include "neural/blas/se_unit.h"
20 | #include "neural/blas/fully_connected_layer.h"
21 |
22 | #include
23 |
24 | namespace lczero {
25 | namespace {
26 | constexpr int kWidth = 8;
27 | constexpr int kHeight = 8;
28 | constexpr int kSquares = kWidth * kHeight;
29 | } // namespace
30 |
31 | static void global_avg_pooling(const size_t channels, const float* input,
32 | float* output) {
33 | for (auto c = size_t{0}; c < channels; c++) {
34 | auto acc = 0.0f;
35 | for (auto i = size_t{0}; i < kSquares; i++) {
36 | acc += input[c * kSquares + i];
37 | }
38 | output[c] = acc / kSquares;
39 | }
40 | }
41 |
42 | static void apply_se(const size_t channels, const size_t batch_size,
43 | const float* input, const float* res, const float* scale,
44 | float* output) {
45 | const auto lambda_ReLU = [](const auto val) {
46 | return (val > 0.0f) ? val : 0;
47 | };
48 |
49 | const auto lambda_sigmoid = [](const auto val) {
50 | return 1.0f / (1.0f + exp(-val));
51 | };
52 |
53 | for (auto c = size_t{0}; c < channels * batch_size; c++) {
54 | auto batch = c / channels;
55 | auto gamma = lambda_sigmoid(scale[c + batch * channels]);
56 | auto beta = scale[c + batch * channels + channels];
57 | for (auto i = size_t{0}; i < kSquares; i++) {
58 | output[c * kSquares + i] = lambda_ReLU(gamma * input[c * kSquares + i] +
59 | beta + res[c * kSquares + i]);
60 | }
61 | }
62 | }
63 |
64 | void ApplySEUnit(const size_t batch_size, const size_t channels,
65 | const size_t se_fc_outputs, const float* input,
66 | const float* residual, const float* weights_w1,
67 | const float* weights_b1, const float* weights_w2,
68 | const float* weights_b2, float* output) {
69 | std::vector pool(2 * channels * batch_size);
70 | std::vector fc_out1(batch_size * se_fc_outputs);
71 |
72 | global_avg_pooling(channels * batch_size, input, pool.data());
73 |
74 | FullyConnectedLayer::Forward1D(batch_size, channels, se_fc_outputs,
75 | pool.data(), weights_w1, weights_b1,
76 | true, // Relu On
77 | fc_out1.data());
78 |
79 | FullyConnectedLayer::Forward1D(batch_size, se_fc_outputs, 2 * channels,
80 | fc_out1.data(), weights_w2, weights_b2,
81 | false, // Relu Off
82 | pool.data());
83 |
84 | // Sigmoid, scale and add residual
85 | apply_se(channels, batch_size, input, residual, pool.data(), output);
86 | }
87 |
88 | } // namespace lczero
89 |
--------------------------------------------------------------------------------
/src/neural/blas/se_unit.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | #include
22 |
23 | namespace lczero {
24 |
25 | void ApplySEUnit(const size_t batch_size, const size_t channels,
26 | const size_t se_fc_outputs, const float* input,
27 | const float* residual, const float* weights_w1,
28 | const float* weights_b1, const float* weights_w2,
29 | const float* weights_b2, float* output);
30 |
31 | } // namespace lczero
32 |
--------------------------------------------------------------------------------
/src/neural/blas/winograd_convolution3.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | #include
22 | #include
23 |
24 | namespace lczero {
25 |
26 | // Convolution 3x3 on a 8x8 board using the Winograd algorithm.
27 | //
28 | // Ref:
29 | //
30 | // Fast Algorithms for Convolutional Neural Networks
31 | // https://arxiv.org/abs/1509.09308
32 | //
33 | // https://ai.intel.com/winograd/
34 | // https://ai.intel.com/winograd-2/
35 |
36 | // Convolution 3x3 using the Winograd algorithm
37 | class WinogradConvolution3 {
38 | public:
39 | // The instance will allocate memory resources for the
40 | // largest batch size, and the largest input and output
41 | // layers.
42 | WinogradConvolution3(const size_t max_batch_size,
43 | const size_t max_input_layers,
44 | const size_t max_output_layers);
45 |
46 | // Forward inference, batched.
47 | void Forward(const size_t batch_size, const size_t input_channels,
48 | const size_t output_channels, const float* input,
49 | const float* weights, float* output);
50 |
51 | private:
52 | void TransformIn(const size_t batch_size, const float* input,
53 | const size_t channels);
54 |
55 | void Sgemm(const size_t batch_size, const float* weights,
56 | const size_t input_channels, const size_t output_channels);
57 |
58 | void TransformOut(const size_t batch_size, float* output,
59 | const size_t channels);
60 |
61 | static constexpr auto kWidth = 8;
62 | static constexpr auto kHeight = 8;
63 | static constexpr auto kSquares = kWidth * kHeight;
64 |
65 | static constexpr auto kWtiles = (kWidth + 1) / 2; // 4
66 | static constexpr auto kTiles = kWtiles * kWtiles; // 16
67 |
68 | static constexpr auto kWinogradAlpha = 4;
69 | static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha;
70 |
71 | std::vector V_;
72 | std::vector M_;
73 | };
74 | } // namespace lczero
75 |
--------------------------------------------------------------------------------
/src/neural/cache.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 | #include "neural/cache.h"
28 | #include
29 | #include
30 |
31 | namespace lczero {
32 | CachingComputation::CachingComputation(
33 | std::unique_ptr parent, NNCache* cache)
34 | : parent_(std::move(parent)), cache_(cache) {}
35 |
36 | int CachingComputation::GetCacheMisses() const {
37 | return parent_->GetBatchSize();
38 | }
39 |
40 | int CachingComputation::GetBatchSize() const { return batch_.size(); }
41 |
42 | bool CachingComputation::AddInputByHash(uint64_t hash) {
43 | NNCacheLock lock(cache_, hash);
44 | if (!lock) return false;
45 | batch_.emplace_back();
46 | batch_.back().lock = std::move(lock);
47 | batch_.back().hash = hash;
48 | return true;
49 | }
50 |
51 | void CachingComputation::PopCacheHit() {
52 | assert(!batch_.empty());
53 | assert(batch_.back().lock);
54 | assert(batch_.back().idx_in_parent == -1);
55 | batch_.pop_back();
56 | }
57 |
58 | void CachingComputation::AddInput(
59 | uint64_t hash, InputPlanes&& input,
60 | std::vector&& probabilities_to_cache) {
61 | if (AddInputByHash(hash)) return;
62 | batch_.emplace_back();
63 | batch_.back().hash = hash;
64 | batch_.back().idx_in_parent = parent_->GetBatchSize();
65 | batch_.back().probabilities_to_cache = probabilities_to_cache;
66 | parent_->AddInput(std::move(input));
67 | }
68 |
69 | void CachingComputation::PopLastInputHit() {
70 | assert(!batch_.empty());
71 | assert(batch_.back().idx_in_parent == -1);
72 | batch_.pop_back();
73 | }
74 |
75 | void CachingComputation::ComputeBlocking() {
76 | if (parent_->GetBatchSize() == 0) return;
77 | parent_->ComputeBlocking();
78 |
79 | // Fill cache with data from NN.
80 | for (const auto& item : batch_) {
81 | if (item.idx_in_parent == -1) continue;
82 | auto req =
83 | std::make_unique(item.probabilities_to_cache.size());
84 | req->q = parent_->GetQVal(item.idx_in_parent);
85 | req->d = parent_->GetDVal(item.idx_in_parent);
86 | int idx = 0;
87 | for (auto x : item.probabilities_to_cache) {
88 | req->p[idx++] =
89 | std::make_pair(x, parent_->GetPVal(item.idx_in_parent, x));
90 | }
91 | cache_->Insert(item.hash, std::move(req));
92 | }
93 | }
94 |
95 | float CachingComputation::GetQVal(int sample) const {
96 | const auto& item = batch_[sample];
97 | if (item.idx_in_parent >= 0) return parent_->GetQVal(item.idx_in_parent);
98 | return item.lock->q;
99 | }
100 |
101 | float CachingComputation::GetDVal(int sample) const {
102 | const auto& item = batch_[sample];
103 | if (item.idx_in_parent >= 0) return parent_->GetDVal(item.idx_in_parent);
104 | return item.lock->d;
105 | }
106 |
107 | float CachingComputation::GetPVal(int sample, int move_id) const {
108 | auto& item = batch_[sample];
109 | if (item.idx_in_parent >= 0)
110 | return parent_->GetPVal(item.idx_in_parent, move_id);
111 | const auto& moves = item.lock->p;
112 |
113 | int total_count = 0;
114 | while (total_count < moves.size()) {
115 | // Optimization: usually moves are stored in the same order as queried.
116 | const auto& move = moves[item.last_idx++];
117 | if (item.last_idx == moves.size()) item.last_idx = 0;
118 | if (move.first == move_id) return move.second;
119 | ++total_count;
120 | }
121 | assert(false); // Move not found.
122 | return 0;
123 | }
124 |
125 | } // namespace lczero
126 |
--------------------------------------------------------------------------------
/src/neural/cache.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 | #pragma once
28 |
29 | #include "neural/network.h"
30 | #include "utils/cache.h"
31 | #include "utils/smallarray.h"
32 |
33 | namespace lczero {
34 |
35 | struct CachedNNRequest {
36 | CachedNNRequest(size_t size) : p(size) {}
37 | typedef std::pair IdxAndProb;
38 | float q;
39 | float d;
40 | // TODO(mooskagh) Don't really need index if using perfect hash.
41 | SmallArray p;
42 | };
43 |
44 | typedef LruCache NNCache;
45 | typedef LruCacheLock NNCacheLock;
46 |
47 | // Wraps around NetworkComputation and caches result.
48 | // While it mostly repeats NetworkComputation interface, it's not derived
49 | // from it, as AddInput() needs hash and index of probabilities to store.
50 | class CachingComputation {
51 | public:
52 | CachingComputation(std::unique_ptr parent,
53 | NNCache* cache);
54 |
55 | // How many inputs are not found in cache and will be forwarded to a wrapped
56 | // computation.
57 | int GetCacheMisses() const;
58 | // Total number of times AddInput/AddInputByHash were (successfully) called.
59 | int GetBatchSize() const;
60 | // Adds input by hash only. If that hash is not in cache, returns false
61 | // and does nothing. Otherwise adds.
62 | bool AddInputByHash(uint64_t hash);
63 | // Adds a sample to the batch.
64 | // @hash is a hash to store/lookup it in the cache.
65 | // @probabilities_to_cache is which indices of policy head to store.
66 | void AddInput(uint64_t hash, InputPlanes&& input,
67 | std::vector&& probabilities_to_cache);
68 | // Undos last AddInput. If it was a cache miss, the it's actually not removed
69 | // from parent's batch.
70 | void PopLastInputHit();
71 | // Do the computation.
72 | void ComputeBlocking();
73 | // Returns Q value of @sample.
74 | float GetQVal(int sample) const;
75 | // Returns probability of draw if NN has WDL value head
76 | float GetDVal(int sample) const;
77 | // Returns P value @move_id of @sample.
78 | float GetPVal(int sample, int move_id) const;
79 | // Pops last input from the computation. Only allowed for inputs which were
80 | // cached.
81 | void PopCacheHit();
82 |
83 | private:
84 | struct WorkItem {
85 | uint64_t hash;
86 | NNCacheLock lock;
87 | int idx_in_parent = -1;
88 | std::vector probabilities_to_cache;
89 | mutable int last_idx = 0;
90 | };
91 |
92 | std::unique_ptr parent_;
93 | NNCache* cache_;
94 | std::vector batch_;
95 | };
96 |
97 | } // namespace lczero
98 |
--------------------------------------------------------------------------------
/src/neural/cuda/cuda_common.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include
29 | #include
30 | #include
31 | #include
32 |
33 | #include "utils/exception.h"
34 |
35 | namespace lczero {
36 | namespace cudnn_backend {
37 |
38 | void CudnnError(cudnnStatus_t status, const char* file, const int& line);
39 | void CublasError(cublasStatus_t status, const char* file, const int& line);
40 | void CudaError(cudaError_t status, const char* file, const int& line);
41 |
42 | #define ReportCUDNNErrors(status) CudnnError(status, __FILE__, __LINE__)
43 | #define ReportCUBLASErrors(status) CublasError(status, __FILE__, __LINE__)
44 | #define ReportCUDAErrors(status) CudaError(status, __FILE__, __LINE__)
45 |
46 | inline int DivUp(int a, int b) { return (a + b - 1) / b; }
47 |
48 | } // namespace cudnn_backend
49 | } // namespace lczero
50 |
--------------------------------------------------------------------------------
/src/neural/cuda/kernels.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018-2019 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | namespace lczero {
29 | namespace cudnn_backend {
30 |
31 | // Adds two vectors (possibly of different sizes), also do optional
32 | // activation (relu, tanh or sigmoid).
33 | template
34 | void addVectors(T* c, T* a, T* b, int size, int asize, int bsize, bool relu,
35 | bool use_tanh, bool use_sigmoid);
36 |
37 | // Add bias to convolution's output.
38 | template
39 | void addBias_NCHW(T* c, T* a, T* b, int N, int C, int H, int W);
40 |
41 | // Conversion from: fp32 -> fp16 datatype, and NCHW -> NHWC layout.
42 | // Cudnn kernels work best with NCHW layout for fp32, and with NHWC for fp16.
43 | void fp32NCHWtofp16NHWC(half* output_tensor, float* input_tensor, int Nin,
44 | int Cin, int Nout, int Cout, int H, int W);
45 |
46 | // Plain data-type conversion (no layout conversion).
47 | template
48 | void copyTypeConverted(DstType* op, SrcType* ip, int N);
49 |
50 | // Perform batch normilization.
51 | template
52 | void batchNorm(T* output, const T* input, const T* skipInput, int N, int C,
53 | int H, int W, float* means, float* var_multipliers, bool relu);
54 |
55 | // Unpack planes (input to network).
56 | void expandPlanes_Fp32_NCHW(float* output, const uint64_t* masks,
57 | const float* values, int n);
58 |
59 | void expandPlanes_Fp16_NHWC(half* output, const uint64_t* masks,
60 | const float* values, int n);
61 |
62 | // Perform global avg pool.
63 | template
64 | void globalAvgPool(int N, int C, T* output, const T* input,
65 | const T* prevLayerBias);
66 |
67 | // Perform global scale.
68 | template
69 | void globalScale(int N, int C, T* output, const T* input, const T* scaleBias,
70 | const T* prevLayerBias);
71 |
72 | // Perform Squeeze-and-Excitation (SE) in a single fused kernel.
73 | // Returns false if the fused kernel can't handle the sizes.
74 | bool Se_Fp16_NHWC(int N, int C, int numFc1Out, half* output, const half* skip,
75 | const half* input, const half* w1, const half* b1,
76 | const half* w2, const half* b2, const half* bPrev);
77 |
78 | template
79 | void PolicyMap(int N, T* output, const T* input, const short* indices,
80 | int inputSize, int usedSize, int outputSize);
81 |
82 | } // namespace cudnn_backend
83 | } // namespace lczero
84 |
--------------------------------------------------------------------------------
/src/neural/cuda/readme.txt:
--------------------------------------------------------------------------------
1 | cuda/cudnn backend for lc0. Here is a brief description of various files:
2 |
3 | 1. network_cudnn.cc -> cpp file containing network, computation, etc stuff related to lc0
4 | 2. layers.cc -> cpp files containing layer classes
5 | 3. layers.h -> header file for layer classes.
6 | 4. kernels.h -> header file for cuda kernels
7 | 5. common_kernels.cu -> common kernels (fp32, and fp16 that can work with old GPUs)
8 | 6. fp16_kernels.cu -> fp16 specific kernels (not used on other GPUs)
9 | 7. cuda_common.h -> header for common cuda stuff like ReportCUDAErrors, etc.
10 | 8. readme.txt -> this file
11 |
12 | High level overview: network is built of layer objects, layers are either implemented using cudnn/cublas libraries, or custom cuda kernels.
13 |
14 | lc0 search -> network_cudnn -> layers -> kernels
--------------------------------------------------------------------------------
/src/neural/encoder.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include "chess/position.h"
31 | #include "neural/network.h"
32 |
33 | namespace lczero {
34 |
35 | enum class FillEmptyHistory {NO, FEN_ONLY, ALWAYS};
36 |
37 | // Encodes the last position in history for the neural network request.
38 | InputPlanes EncodePositionForNN(const PositionHistory& history,
39 | int history_planes,
40 | FillEmptyHistory fill_empty_history);
41 |
42 | } // namespace lczero
43 |
--------------------------------------------------------------------------------
/src/neural/loader.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include
31 | #include
32 |
33 | #include "neural/network.h"
34 | #include "proto/net.pb.h"
35 |
36 | namespace lczero {
37 |
38 | using FloatVector = std::vector;
39 | using FloatVectors = std::vector;
40 |
41 | using WeightsFile = pblczero::Net;
42 |
43 | // Read weights file and fill the weights structure.
44 | WeightsFile LoadWeightsFromFile(const std::string& filename);
45 |
46 | // Tries to find a file which looks like a weights file, and located in
47 | // directory of binary_name or one of subdirectories. If there are several such
48 | // files, returns one which has the latest modification date.
49 | std::string DiscoverWeightsFile();
50 |
51 | } // namespace lczero
52 |
--------------------------------------------------------------------------------
/src/neural/network.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include
31 | #include
32 |
33 | namespace lczero {
34 |
35 | const int kInputPlanes = 112;
36 |
37 | // All input planes are 64 value vectors, every element of which is either
38 | // 0 or some value, unique for the plane. Therefore, input is defined as
39 | // a bitmask showing where to set the value, and the value itself.
40 | struct InputPlane {
41 | InputPlane() = default;
42 | void SetAll() { mask = ~0ull; }
43 | void Fill(float val) {
44 | SetAll();
45 | value = val;
46 | }
47 | std::uint64_t mask = 0ull;
48 | float value = 1.0f;
49 | };
50 | using InputPlanes = std::vector;
51 |
52 | // An interface to implement by computing backends.
53 | class NetworkComputation {
54 | public:
55 | // Adds a sample to the batch.
56 | virtual void AddInput(InputPlanes&& input) = 0;
57 | // Do the computation.
58 | virtual void ComputeBlocking() = 0;
59 | // Returns how many times AddInput() was called.
60 | virtual int GetBatchSize() const = 0;
61 | // Returns Q value of @sample.
62 | virtual float GetQVal(int sample) const = 0;
63 | virtual float GetDVal(int sample) const = 0;
64 | // Returns P value @move_id of @sample.
65 | virtual float GetPVal(int sample, int move_id) const = 0;
66 | virtual ~NetworkComputation() {}
67 | };
68 |
69 | class Network {
70 | public:
71 | virtual std::unique_ptr NewComputation() = 0;
72 | virtual ~Network(){};
73 | };
74 |
75 | } // namespace lczero
76 |
--------------------------------------------------------------------------------
/src/neural/network_legacy.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018-2019 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #include "neural/network_legacy.h"
20 |
21 | #include
22 | #include
23 | #include "utils/weights_adapter.h"
24 |
25 | namespace lczero {
26 | namespace {
27 | static constexpr float kEpsilon = 1e-5f;
28 | } // namespace
29 |
30 | LegacyWeights::LegacyWeights(const pblczero::Weights& weights)
31 | : input(weights.input()),
32 | policy1(weights.policy1()),
33 | policy(weights.policy()),
34 | ip_pol_w(LayerAdapter(weights.ip_pol_w()).as_vector()),
35 | ip_pol_b(LayerAdapter(weights.ip_pol_b()).as_vector()),
36 | value(weights.value()),
37 | ip1_val_w(LayerAdapter(weights.ip1_val_w()).as_vector()),
38 | ip1_val_b(LayerAdapter(weights.ip1_val_b()).as_vector()),
39 | ip2_val_w(LayerAdapter(weights.ip2_val_w()).as_vector()),
40 | ip2_val_b(LayerAdapter(weights.ip2_val_b()).as_vector()) {
41 | for (const auto& res : weights.residual()) {
42 | residual.emplace_back(res);
43 | }
44 | }
45 |
46 | LegacyWeights::SEunit::SEunit(const pblczero::Weights::SEunit& se)
47 | : w1(LayerAdapter(se.w1()).as_vector()),
48 | b1(LayerAdapter(se.b1()).as_vector()),
49 | w2(LayerAdapter(se.w2()).as_vector()),
50 | b2(LayerAdapter(se.b2()).as_vector()) {}
51 |
52 | LegacyWeights::Residual::Residual(const pblczero::Weights::Residual& residual)
53 | : conv1(residual.conv1()),
54 | conv2(residual.conv2()),
55 | se(residual.se()),
56 | has_se(residual.has_se()) {}
57 |
58 | LegacyWeights::ConvBlock::ConvBlock(const pblczero::Weights::ConvBlock& block)
59 | : weights(LayerAdapter(block.weights()).as_vector()),
60 | biases(LayerAdapter(block.biases()).as_vector()),
61 | bn_gammas(LayerAdapter(block.bn_gammas()).as_vector()),
62 | bn_betas(LayerAdapter(block.bn_betas()).as_vector()),
63 | bn_means(LayerAdapter(block.bn_means()).as_vector()),
64 | bn_stddivs(LayerAdapter(block.bn_stddivs()).as_vector()) {
65 | if (weights.size() == 0) {
66 | // Empty ConvBlock.
67 | return;
68 | }
69 |
70 | if (bn_betas.size() == 0) {
71 | // Old net without gamma and beta.
72 | for (auto i = size_t{0}; i < bn_means.size(); i++) {
73 | bn_betas.emplace_back(0.0f);
74 | bn_gammas.emplace_back(1.0f);
75 | }
76 | }
77 | if (biases.size() == 0) {
78 | for (auto i = size_t{0}; i < bn_means.size(); i++) {
79 | biases.emplace_back(0.0f);
80 | }
81 | }
82 |
83 | if (bn_means.size() == 0) {
84 | // No batch norm.
85 | return;
86 | }
87 |
88 | // Fold batch norm into weights and biases.
89 | // Variance to gamma.
90 | for (auto i = size_t{0}; i < bn_stddivs.size(); i++) {
91 | bn_gammas[i] *= 1.0f / std::sqrt(bn_stddivs[i] + kEpsilon);
92 | bn_means[i] -= biases[i];
93 | }
94 |
95 | auto outputs = biases.size();
96 |
97 | // We can treat the [inputs, filter_size, filter_size] dimensions as one.
98 | auto inputs = weights.size() / outputs;
99 |
100 | for (auto o = size_t{0}; o < outputs; o++) {
101 | for (auto c = size_t{0}; c < inputs; c++) {
102 | weights[o * inputs + c] *= bn_gammas[o];
103 | }
104 |
105 | biases[o] = -bn_gammas[o] * bn_means[o] + bn_betas[o];
106 | }
107 |
108 | // Batch norm weights are not needed anymore.
109 | bn_stddivs.clear();
110 | bn_means.clear();
111 | bn_betas.clear();
112 | bn_gammas.clear();
113 | }
114 | } // namespace lczero
115 |
--------------------------------------------------------------------------------
/src/neural/network_legacy.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018-2019 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 | */
18 |
19 | #pragma once
20 |
21 | #include
22 | #include "proto/net.pb.h"
23 |
24 | namespace lczero {
25 |
26 | // DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED!!!
27 | // Legacy structure describing network weights.
28 | // Please try to migrate away from this struture do not add anything new
29 | // to it.
30 |
31 | struct LegacyWeights {
32 | explicit LegacyWeights(const pblczero::Weights& weights);
33 |
34 | using Vec = std::vector;
35 | struct ConvBlock {
36 | explicit ConvBlock(const pblczero::Weights::ConvBlock& block);
37 |
38 | Vec weights;
39 | Vec biases;
40 | Vec bn_gammas;
41 | Vec bn_betas;
42 | Vec bn_means;
43 | Vec bn_stddivs;
44 | };
45 |
46 | struct SEunit {
47 | explicit SEunit(const pblczero::Weights::SEunit& se);
48 | Vec w1;
49 | Vec b1;
50 | Vec w2;
51 | Vec b2;
52 | };
53 |
54 | struct Residual {
55 | explicit Residual(const pblczero::Weights::Residual& residual);
56 | ConvBlock conv1;
57 | ConvBlock conv2;
58 | SEunit se;
59 | bool has_se;
60 | };
61 |
62 | // Input convnet.
63 | ConvBlock input;
64 |
65 | // Residual tower.
66 | std::vector residual;
67 |
68 | // Policy head
69 | // Extra convolution for AZ-style policy head
70 | ConvBlock policy1;
71 | ConvBlock policy;
72 | Vec ip_pol_w;
73 | Vec ip_pol_b;
74 |
75 | // Value head
76 | ConvBlock value;
77 | Vec ip1_val_w;
78 | Vec ip1_val_b;
79 | Vec ip2_val_w;
80 | Vec ip2_val_b;
81 | };
82 |
83 | } // namespace lczero
84 |
--------------------------------------------------------------------------------
/src/neural/network_random.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include "neural/factory.h"
34 | #include "utils/hashcat.h"
35 |
36 | namespace lczero {
37 | namespace {
38 |
39 | class RandomNetworkComputation : public NetworkComputation {
40 | public:
41 | RandomNetworkComputation(int delay, int seed, bool uniform_mode)
42 | : delay_ms_(delay), seed_(seed), uniform_mode_(uniform_mode) {}
43 |
44 | void AddInput(InputPlanes&& input) override {
45 | std::uint64_t hash = seed_;
46 | for (const auto& plane : input) {
47 | hash = HashCat({hash, plane.mask});
48 | std::uint32_t tmp;
49 | std::memcpy(&tmp, &plane.value, sizeof(float));
50 | const std::uint64_t value_hash = tmp;
51 | hash = HashCat({hash, value_hash});
52 | }
53 | inputs_.push_back(hash);
54 | }
55 |
56 | void ComputeBlocking() override {
57 | if (delay_ms_) {
58 | std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms_));
59 | }
60 | }
61 |
62 | int GetBatchSize() const override { return inputs_.size(); }
63 |
64 | float GetQVal(int sample) const override {
65 | if (uniform_mode_) return 0.0f;
66 | return (int(inputs_[sample] % 200000) - 100000) / 100000.0;
67 | }
68 |
69 | float GetDVal(int sample) const override {
70 | if (uniform_mode_) return 0.0f;
71 | // Maximum D value is 1 - abs(Q) for W, D, L to be in range [0.0, 1.0].
72 | float q = GetQVal(sample);
73 | float max_d = 1.0f - std::fabs(q);
74 | // Hash in arbitrary constant to make D return different value from Q.
75 | float d = max_d * (HashCat({inputs_[sample], 1234}) % 10000) / 10000.0;
76 | return d;
77 | }
78 |
79 | float GetPVal(int sample, int move_id) const override {
80 | if (uniform_mode_) return 1.0f;
81 | return (HashCat({inputs_[sample], static_cast(move_id)}) %
82 | 10000) /
83 | 10000.0;
84 | }
85 |
86 | private:
87 | std::vector inputs_;
88 | int delay_ms_ = 0;
89 | int seed_ = 0;
90 | bool uniform_mode_ = false;
91 | };
92 |
93 | class RandomNetwork : public Network {
94 | public:
95 | RandomNetwork(const OptionsDict& options)
96 | : delay_ms_(options.GetOrDefault("delay", 0)),
97 | seed_(options.GetOrDefault("seed", 0)),
98 | uniform_mode_(options.GetOrDefault("uniform", false)) {}
99 | std::unique_ptr NewComputation() override {
100 | return std::make_unique(delay_ms_, seed_, uniform_mode_);
101 | }
102 |
103 | private:
104 | int delay_ms_ = 0;
105 | int seed_ = 0;
106 | bool uniform_mode_ = false;
107 | };
108 | } // namespace
109 |
110 | std::unique_ptr MakeRandomNetwork(const WeightsFile& /*weights*/,
111 | const OptionsDict& options) {
112 | return std::make_unique(options);
113 | }
114 |
115 | REGISTER_NETWORK("random", MakeRandomNetwork, -900)
116 |
117 | } // namespace lczero
118 |
--------------------------------------------------------------------------------
/src/neural/network_rr.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include "neural/factory.h"
29 |
30 | #include
31 | #include
32 | #include
33 | #include "utils/exception.h"
34 |
35 | namespace lczero {
36 | namespace {
37 |
38 | class RoundRobinNetwork : public Network {
39 | public:
40 | RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) {
41 | const auto parents = options.ListSubdicts();
42 | if (parents.empty()) {
43 | // If options are empty, or multiplexer configured in root object,
44 | // initialize on root object and default backend.
45 | auto backends = NetworkFactory::Get()->GetBackendsList();
46 | AddBackend(backends[0], weights, options);
47 | }
48 |
49 | for (const auto& name : parents) {
50 | AddBackend(name, weights, options.GetSubdict(name));
51 | }
52 | }
53 |
54 | void AddBackend(const std::string& name, const WeightsFile& weights,
55 | const OptionsDict& opts) {
56 | const std::string backend = opts.GetOrDefault("backend", name);
57 |
58 | networks_.emplace_back(
59 | NetworkFactory::Get()->Create(backend, weights, opts));
60 | }
61 |
62 | std::unique_ptr NewComputation() override {
63 | const long long val = ++counter_;
64 | return networks_[val % networks_.size()]->NewComputation();
65 | }
66 |
67 | ~RoundRobinNetwork() {}
68 |
69 | private:
70 | std::vector> networks_;
71 | std::atomic counter_;
72 | };
73 |
74 | std::unique_ptr MakeRoundRobinNetwork(const WeightsFile& weights,
75 | const OptionsDict& options) {
76 | return std::make_unique(weights, options);
77 | }
78 |
79 | REGISTER_NETWORK("roundrobin", MakeRoundRobinNetwork, -999)
80 |
81 | } // namespace
82 | } // namespace lczero
83 |
--------------------------------------------------------------------------------
/src/neural/network_st_batch.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #include "neural/network_st_batch.h"
29 |
30 | #include
31 |
32 | namespace lczero {
33 |
34 | SingleThreadBatchingNetwork::SingleThreadBatchingNetwork(
35 | std::unique_ptr parent)
36 | : parent_(std::move(parent)) {}
37 |
38 | std::unique_ptr
39 | SingleThreadBatchingNetwork::NewComputation() {
40 | ++computations_pending_;
41 | return std::make_unique(this);
42 | }
43 |
44 | void SingleThreadBatchingNetwork::Reset() {
45 | assert(computations_pending_ == 0);
46 | parent_computation_ = parent_->NewComputation();
47 | }
48 |
49 | SingleThreadBatchingNetworkComputation::SingleThreadBatchingNetworkComputation(
50 | SingleThreadBatchingNetwork* network)
51 | : network_(network),
52 | start_idx_(network_->parent_computation_->GetBatchSize()) {}
53 |
54 | void SingleThreadBatchingNetworkComputation::AddInput(InputPlanes&& input) {
55 | assert(start_idx_ + batch_size_ ==
56 | network_->parent_computation_->GetBatchSize());
57 | ++batch_size_;
58 | network_->parent_computation_->AddInput(std::move(input));
59 | }
60 |
61 | void SingleThreadBatchingNetworkComputation::ComputeBlocking() {
62 | if (--network_->computations_pending_ == 0)
63 | network_->parent_computation_->ComputeBlocking();
64 | }
65 |
66 | float SingleThreadBatchingNetworkComputation::GetQVal(int sample) const {
67 | return network_->parent_computation_->GetQVal(sample - start_idx_);
68 | }
69 |
70 | float SingleThreadBatchingNetworkComputation::GetDVal(int sample) const {
71 | return network_->parent_computation_->GetDVal(sample - start_idx_);
72 | }
73 |
74 | float SingleThreadBatchingNetworkComputation::GetPVal(int sample,
75 | int move_id) const {
76 | return network_->parent_computation_->GetPVal(sample - start_idx_, move_id);
77 | }
78 |
79 | } // namespace lczero
80 |
--------------------------------------------------------------------------------
/src/neural/network_st_batch.h:
--------------------------------------------------------------------------------
1 | /*
2 | This file is part of Leela Chess Zero.
3 | Copyright (C) 2018 The LCZero Authors
4 |
5 | Leela Chess is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | Leela Chess is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with Leela Chess. If not, see .
17 |
18 | Additional permission under GNU GPL version 3 section 7
19 |
20 | If you modify this Program, or any covered work, by linking or
21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
23 | modified version of those libraries), containing parts covered by the
24 | terms of the respective license agreement, the licensors of this
25 | Program grant you additional permission to convey the resulting work.
26 | */
27 |
28 | #pragma once
29 |
30 | #include "neural/network.h"
31 |
32 | namespace lczero {
33 |
34 | // This is a network that helps to combine batches from multiple games running
35 | // is a single thread. Not thread safe.
36 | // Usage:
37 | // network.Reset(); // Creates new parent computation
38 | // computations = []
39 | // multiple times:
40 | // x = network.NewComputation()
41 | // computations += x
42 | // x.AddInput();
43 | // x.AddInput();
44 | // x.AddInput();
45 | // ...
46 | // for x in computations:
47 | // x.ComputeBlocking() // Only last call actually computes, and they are
48 | // // computed together in one batch.
49 | // for x in computations:
50 | // use(x)
51 | class SingleThreadBatchingNetwork : public Network {
52 | public:
53 | SingleThreadBatchingNetwork(std::unique_ptr parent);
54 | std::unique_ptr NewComputation() override;
55 |
56 | // Start a fresh batch.
57 | void Reset();
58 |
59 | private:
60 | std::unique_ptr parent_;
61 | std::unique_ptr parent_computation_;
62 | int computations_pending_ = 0;
63 | friend class SingleThreadBatchingNetworkComputation;
64 | };
65 |
66 | class SingleThreadBatchingNetworkComputation : public NetworkComputation {
67 | public:
68 | SingleThreadBatchingNetworkComputation(SingleThreadBatchingNetwork* network);
69 |
70 | // Adds a sample to the parent batch.
71 | void AddInput(InputPlanes&& input) override;
72 | // May not actually compute immediately. Instead computes when all
73 | // computations of the network called this.
74 | void ComputeBlocking() override;
75 | // Returns how many times AddInput() was called.
76 | int GetBatchSize() const override { return batch_size_; }
77 | // Returns Q value of @sample.
78 | float GetQVal(int sample) const override;
79 | float GetDVal(int sample) const override;
80 | // Returns P value @move_id of @sample.
81 | float GetPVal(int sample, int move_id) const override;
82 |
83 | private:
84 | SingleThreadBatchingNetwork* const network_;
85 | int start_idx_;
86 | int batch_size_ = 0;
87 | };
88 |
89 | } // namespace lczero
90 |
--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLBuffers.h:
--------------------------------------------------------------------------------
1 | /*
2 | Originally from the Leela Zero project.
3 | Copyright (C) 2017 Gian-Carlo Pascutto
4 |
5 | This file is part of Leela Chess Zero.
6 | Copyright (C) 2018-2019 The LCZero Authors
7 |
8 | Leela Chess is free software: you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation, either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | Leela Chess is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with Leela Chess. If not, see .
20 | */
21 |
22 | #pragma once
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include
35 | #include
36 |
37 | #include "neural/opencl/OpenCL.h"
38 | #include "neural/opencl/OpenCLParams.h"
39 | #include "neural/opencl/OpenCLTuner.h"
40 | #include "utils/logging.h"
41 |
42 | class OpenCL_Network;
43 |
44 | class OpenCLBuffers {
45 | friend class OpenCL;
46 | friend class OpenCL_Network;
47 |
48 | public:
49 | OpenCLBuffers(const OpenCL_Network& opencl_net);
50 |
51 | void forward(const std::vector& input, std::vector& output_pol,
52 | std::vector& output_val, const int batch_size);
53 |
54 | private:
55 | using weight_slice_t = std::vector::const_iterator;
56 |
57 | void convolve3(int channels, int outputs, cl::Buffer& bufferIn,
58 | cl::Buffer& bufferOut, cl::Buffer& bufferV,
59 | cl::Buffer& bufferM, weight_slice_t weights,
60 | cl::Buffer* bufferResidual, weight_slice_t biases,
61 | bool skip_in_transform, bool fuse_in_transform,
62 | bool store_inout, bool relu, int batch_size);
63 |
64 | void convolve1(int channels, int outputs, cl::Buffer& bufferInput,
65 | cl::Buffer& bufferOutput, cl::Buffer& bufferMerge,
66 | weight_slice_t weights, weight_slice_t biases, int batch_size);
67 |
68 | void innerproduct(cl::Buffer& input, weight_slice_t weights,
69 | weight_slice_t biases, cl::Buffer& output, const int inputs,
70 | const int outputs, const int relu, int batch_size);
71 |
72 | void squeeze_excitation(int channels, int fc_outputs, cl::Buffer& bufferIn,
73 | cl::Buffer& bufferTemp1, cl::Buffer& bufferTemp2,
74 | weight_slice_t weights, cl::Buffer& bufferResidual,
75 | int batch_size);
76 |
77 | void policymap(int N, const cl::Buffer& input, cl::Buffer& output,
78 | const cl::Buffer& indices, int inputSize, int usedSize,
79 | int outputSize);
80 |
81 | const OpenCL_Network& m_opencl_net;
82 | const OpenCL& m_opencl;
83 |
84 | cl::CommandQueue m_commandqueue;
85 | cl::Kernel m_convolve1_kernel;
86 | cl::Kernel m_merge_kernel;
87 | cl::Kernel m_in_transform_kernel;
88 | cl::Kernel m_sgemm_kernel;
89 | cl::Kernel m_sgemv_kernel;
90 | cl::Kernel m_out_transform_bn_kernel;
91 | cl::Kernel m_out_transform_bn_in_kernel;
92 | cl::Kernel m_global_avg_pooling_kernel;
93 | cl::Kernel m_apply_se_kernel;
94 | cl::Kernel m_policymap_kernel;
95 | cl::Buffer m_inBuffer;
96 | cl::Buffer m_inBuffer2;
97 | cl::Buffer m_VBuffer;
98 | cl::Buffer m_MBuffer;
99 | cl::Buffer m_pool_buffer;
100 | cl::Buffer m_pinnedOutBuffer_pol;
101 | cl::Buffer m_pinnedOutBuffer_val;
102 | };
103 |
--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLParams.h:
--------------------------------------------------------------------------------
1 | /*
2 | Originally from the Leela Zero project.
3 | Copyright (C) 2017 Gian-Carlo Pascutto
4 |
5 | This file is part of Leela Chess Zero.
6 | Copyright (C) 2018 The LCZero Authors
7 |
8 | Leela Chess is free software: you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation, either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | Leela Chess is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with Leela Chess. If not, see .
20 | */
21 |
22 | #pragma once
23 |
24 | struct OpenCLParams {
25 | int gpuId = -1;
26 |
27 | bool tune_only = false;
28 | bool force_tune = false;
29 | bool tune_exhaustive = false;
30 | int tune_batch_size = 1;
31 | };
32 |
--------------------------------------------------------------------------------
/src/neural/opencl/OpenCLTuner.h:
--------------------------------------------------------------------------------
1 | /*
2 | Originally from the Leela Zero project.
3 | Copyright (C) 2017 Gian-Carlo Pascutto
4 |
5 | This file is part of Leela Chess Zero.
6 | Copyright (C) 2018 The LCZero Authors
7 |
8 | Leela Chess is free software: you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation, either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | Leela Chess is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with Leela Chess. If not, see .
20 | */
21 |
22 | #pragma once
23 |
24 | #include