├── .circleci ├── Dockerfile └── config.yml ├── .clang-format ├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── COPYING ├── FLAGS.md ├── README.md ├── appveyor.yml ├── build-cl.cmd ├── build-cuda.cmd ├── build.sh ├── changelog.txt ├── checkdir.py ├── dist └── README-cuda.txt ├── meson.build ├── meson_options.txt ├── scripts └── bumpversion.py ├── src ├── benchmark │ ├── benchmark.cc │ └── benchmark.h ├── chess │ ├── bitboard.cc │ ├── bitboard.h │ ├── board.cc │ ├── board.h │ ├── board_test.cc │ ├── callbacks.h │ ├── position.cc │ ├── position.h │ ├── position_test.cc │ ├── uciloop.cc │ └── uciloop.h ├── engine.cc ├── engine.h ├── main.cc ├── mcts │ ├── auxengine.cc │ ├── node.cc │ ├── node.h │ ├── params.cc │ ├── params.h │ ├── search.cc │ └── search.h ├── neural │ ├── blas │ │ ├── README.md │ │ ├── blas.h │ │ ├── convolution1.cc │ │ ├── convolution1.h │ │ ├── fully_connected_layer.cc │ │ ├── fully_connected_layer.h │ │ ├── network_blas.cc │ │ ├── se_unit.cc │ │ ├── se_unit.h │ │ ├── winograd_convolution3.cc │ │ ├── winograd_convolution3.h │ │ └── winograd_transform.ispc │ ├── cache.cc │ ├── cache.h │ ├── cuda │ │ ├── common_kernels.cu │ │ ├── cuda_common.h │ │ ├── fp16_kernels.cu │ │ ├── kernels.h │ │ ├── layers.cc │ │ ├── layers.h │ │ ├── network_cudnn.cc │ │ └── readme.txt │ ├── encoder.cc │ ├── encoder.h │ ├── encoder_test.cc │ ├── factory.cc │ ├── factory.h │ ├── loader.cc │ ├── loader.h │ ├── network.h │ ├── network_check.cc │ ├── network_demux.cc │ ├── network_legacy.cc │ ├── network_legacy.h │ ├── network_mux.cc │ ├── network_random.cc │ ├── network_rr.cc │ ├── network_st_batch.cc │ ├── network_st_batch.h │ ├── network_tf.cc │ ├── opencl │ │ ├── OpenCL.cc │ │ ├── OpenCL.h │ │ ├── OpenCLBuffers.cc │ │ ├── OpenCLBuffers.h │ │ ├── OpenCLParams.h │ │ ├── OpenCLTuner.cc │ │ ├── OpenCLTuner.h │ │ ├── README.md │ │ ├── clblast_level3 │ │ │ ├── common.opencl │ │ │ ├── xgemm_batched.opencl │ │ │ ├── xgemm_part1.opencl │ │ │ ├── xgemm_part2.opencl │ │ │ ├── xgemm_part3.opencl │ │ │ └── xgemv.opencl │ │ ├── clsource │ │ │ ├── config.opencl │ │ │ ├── convolve1.opencl │ │ │ ├── convolve3.opencl │ │ │ ├── policymap.opencl │ │ │ └── se.opencl │ │ └── network_opencl.cc │ ├── shared │ │ ├── activation.cc │ │ ├── activation.h │ │ ├── policy_map.h │ │ ├── winograd_filter.cc │ │ └── winograd_filter.h │ ├── writer.cc │ └── writer.h ├── selfplay │ ├── game.cc │ ├── game.h │ ├── loop.cc │ ├── loop.h │ ├── tournament.cc │ └── tournament.h ├── syzygy │ ├── syzygy.cc │ ├── syzygy.h │ └── syzygy_test.cc ├── utils │ ├── bititer.h │ ├── cache-old.h │ ├── cache.h │ ├── commandline.cc │ ├── commandline.h │ ├── configfile.cc │ ├── configfile.h │ ├── cppattributes.h │ ├── exception.h │ ├── fastmath.h │ ├── filesystem.h │ ├── filesystem.posix.cc │ ├── filesystem.win32.cc │ ├── hashcat.h │ ├── hashcat_test.cc │ ├── histogram.cc │ ├── histogram.h │ ├── logging.cc │ ├── logging.h │ ├── mutex.h │ ├── optional.h │ ├── optionsdict.cc │ ├── optionsdict.h │ ├── optionsparser.cc │ ├── optionsparser.h │ ├── optionsparser_test.cc │ ├── random.cc │ ├── random.h │ ├── smallarray.h │ ├── string.cc │ ├── string.h │ ├── transpose.cc │ ├── transpose.h │ ├── weights_adapter.cc │ └── weights_adapter.h ├── version.cc ├── version.h └── version.inc ├── subprojects ├── gtest.wrap ├── protobuf-3.6.0.wrap ├── protobuf.wrap └── zlib.wrap ├── tensorflow.md ├── third_party └── cl2.hpp └── windows_build.md /.circleci/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM floopcz/tensorflow_cc:ubuntu-shared-cuda 2 | 3 | RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && apt-get update && apt-get install -y intel-mkl-64bit-2018.2-046 4 | RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev git ssh tar gzip ca-certificates sudo 5 | RUN pip3 install meson 6 | RUN ln -s /usr/include/ /usr/include/openblas 7 | 8 | RUN curl -OL https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip 9 | RUN unzip protoc-3.5.1-linux-x86_64.zip -d protoc3 10 | RUN sudo mv protoc3/bin/* /usr/local/bin/ 11 | RUN sudo mv protoc3/include/* /usr/local/include/ 12 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.4 6 | steps: 7 | - checkout 8 | - run: 9 | name: "Pull Submodules" 10 | command: | 11 | git submodule init 12 | git submodule update --remote 13 | - run: 14 | name: Build clang version 15 | command: CC=clang-6.0 CXX=clang++-6.0 ./build.sh 16 | - run: 17 | command: cp build/release/lc0 /tmp/lc0-clang 18 | - run: 19 | name: Build g++ version 20 | command: ./build.sh 21 | - run: 22 | command: cp build/release/lc0 /tmp/lc0-g++ 23 | - store_artifacts: 24 | path: /tmp/lc0-clang 25 | destination: lc0-ubuntu-18-04-clang 26 | - store_artifacts: 27 | path: /tmp/lc0-g++ 28 | destination: lc0-ubuntu-18-04-g++ 29 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: Google 4 | DerivePointerAlignment: false 5 | ... 6 | 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | testdata/ 3 | LC0VSProj/ 4 | CUDA_NN/ 5 | .DS_Store 6 | xcuserdata 7 | subprojects/* 8 | !subprojects/*.wrap 9 | lc0.xcodeproj/ 10 | *.swp 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libs/lczero-common"] 2 | path = libs/lczero-common 3 | url = https://github.com/LeelaChessZero/lczero-common.git 4 | -------------------------------------------------------------------------------- /build-cl.cmd: -------------------------------------------------------------------------------- 1 | rd /s build 2 | 3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" 4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" 5 | 6 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 7 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 8 | 9 | meson.py build --backend vs2017 --buildtype release ^ 10 | -Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^ 11 | -Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^ 12 | -Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^ 13 | -Dopencl_include="C:\Program Files (x86)\AMD APP SDK\3.0\include" ^ 14 | -Ddefault_library=static 15 | 16 | pause 17 | 18 | cd build 19 | 20 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^ 21 | /p:PreferredToolArchitecture=x64 "subprojects\zlib-1.2.11\Windows resource for file 'win32_zlib1.rc'@cus.vcxproj" ^ 22 | /filelogger 23 | 24 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^ 25 | /p:PreferredToolArchitecture=x64 subprojects\zlib-1.2.11\subprojects@zlib-1.2.11@@z@sta.vcxproj ^ 26 | /filelogger 27 | 28 | %MSBuild% /p:Configuration=Release /p:Platform=x64 ^ 29 | /p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^ 30 | /filelogger 31 | 32 | -------------------------------------------------------------------------------- /build-cuda.cmd: -------------------------------------------------------------------------------- 1 | rd /s build 2 | 3 | rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" 4 | set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" 5 | rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 6 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 7 | meson.py build --backend vs2017 --buildtype release ^ 8 | -Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^ 9 | -Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^ 10 | -Ddefault_library=static 11 | 12 | pause 13 | 14 | 15 | cd build 16 | 17 | %MSBuild% ^ 18 | /p:Configuration=Release ^ 19 | /p:Platform=x64 ^ 20 | /p:PreferredToolArchitecture=x64 lc0.sln ^ 21 | /filelogger 22 | 23 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | case $1 in 6 | plain|debug|debugoptimized|release|minsize) 7 | BUILDTYPE=$1 8 | shift 9 | ;; 10 | *) 11 | BUILDTYPE=release 12 | ;; 13 | esac 14 | 15 | BUILDDIR=build/${BUILDTYPE} 16 | 17 | if [ -f ${BUILDDIR}/build.ninja ] 18 | then 19 | meson configure ${BUILDDIR} -Dbuildtype=${BUILDTYPE} -Dprefix=${INSTALL_PREFIX:-/usr/local} "$@" 20 | else 21 | meson ${BUILDDIR} --buildtype ${BUILDTYPE} --prefix ${INSTALL_PREFIX:-/usr/local} "$@" 22 | fi 23 | 24 | pushd ${BUILDDIR} 25 | 26 | NINJA=$(awk '/ninja/ {ninja=$4} END {print ninja}' meson-logs/meson-log.txt) 27 | 28 | if [ -n "${INSTALL_PREFIX}" ] 29 | then 30 | ${NINJA} install 31 | else 32 | ${NINJA} 33 | fi 34 | 35 | popd 36 | -------------------------------------------------------------------------------- /checkdir.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | if len(sys.argv) > 1 and os.path.isdir(sys.argv[1]): 6 | exit(0) 7 | exit(1) 8 | -------------------------------------------------------------------------------- /dist/README-cuda.txt: -------------------------------------------------------------------------------- 1 | Lc0 2 | 3 | Lc0 is a UCI-compliant chess engine designed to play chess via 4 | neural network, specifically those of the LeelaChessZero project 5 | (https://lczero.org). 6 | 7 | This binary uses CUDA and cuDNN dynamic link libraries copyrighted 8 | by Nvidia corporation (http://www.nvidia.com), and redistributed as 9 | permitted by the respective license file (see CUDA.txt section 2.2 10 | and CUDNN.txt section "CUDNN DISTRIBUTION" for details). You are 11 | authorized to redistribute these libraries together with this 12 | package as a whole but not individually. 13 | 14 | 15 | License 16 | 17 | Leela Chess is free software: you can redistribute it and/or modify 18 | it under the terms of the GNU General Public License as published by 19 | the Free Software Foundation, either version 3 of the License, or 20 | (at your option) any later version. 21 | 22 | Leela Chess is distributed in the hope that it will be useful, 23 | but WITHOUT ANY WARRANTY; without even the implied warranty of 24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 | GNU General Public License for more details. 26 | 27 | You should have received a copy of the GNU General Public License 28 | along with Leela Chess. If not, see . 29 | 30 | Additional permission under GNU GPL version 3 section 7 31 | 32 | If you modify this Program, or any covered work, by linking or 33 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 34 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 35 | modified version of those libraries), containing parts covered by the 36 | terms of the respective license agreement, the licensors of this 37 | Program grant you additional permission to convey the resulting work. 38 | 39 | -------------------------------------------------------------------------------- /meson_options.txt: -------------------------------------------------------------------------------- 1 | option('tensorflow_include', 2 | type: 'array', 3 | value: ['/usr/local/include/tensorflow/'], 4 | description: 'Paths to tensorflow include directories') 5 | 6 | option('protobuf_include', 7 | type: 'array', 8 | value: ['/usr/local/include/'], 9 | description: 'Paths to protobuf include directories') 10 | 11 | option('openblas_include', 12 | type: 'array', 13 | value: ['/usr/include/openblas/'], 14 | description: 'Paths to openblas include directories') 15 | 16 | option('opencl_include', 17 | type: 'array', 18 | value: ['/usr/include/'], 19 | description: 'Paths to OpenCL include directories') 20 | 21 | option('tensorflow_libdir', 22 | type: 'array', 23 | value: ['/usr/local/lib/tensorflow_cc/'], 24 | description: 'Paths to tensorflow libraries') 25 | 26 | option('protobuf_libdir', 27 | type: 'array', 28 | value: ['/usr/lib/x86_64-linux-gnu/'], 29 | description: 'Paths to protobuf libraries') 30 | 31 | option('openblas_libdirs', 32 | type: 'array', 33 | value: ['/usr/lib/'], 34 | description: 'Paths to OpenBLAS libraries') 35 | 36 | option('opencl_libdirs', 37 | type: 'array', 38 | value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'], 39 | description: 'Paths to OpenCL libraries') 40 | 41 | option('cudnn_libdirs', 42 | type: 'array', 43 | value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'], 44 | description: 'Paths to Cuda/cudnn libraries') 45 | 46 | option('mkl_libdirs', 47 | type: 'array', 48 | value: ['/opt/intel/lib/intel64', '/opt/intel/mkl/lib/intel64', '/opt/intel/mkl/lib'], 49 | description: 'Paths to MKL libraries') 50 | 51 | option('mkl_include', 52 | type: 'array', 53 | value: ['/opt/intel/mkl/include'], 54 | description: 'Paths to MKL libraries') 55 | 56 | option('cudnn_include', 57 | type: 'array', 58 | value: ['/opt/cuda/include/', '/usr/local/cuda/include/'], 59 | description: 'Paths to cudnn include directory') 60 | 61 | option('build_backends', 62 | type: 'boolean', 63 | value: true, 64 | description: 'Build backends for NN computation') 65 | 66 | option('blas', 67 | type: 'boolean', 68 | value: true, 69 | description: 'Enable BLAS backend') 70 | 71 | option('ispc', 72 | type: 'boolean', 73 | value: true, 74 | description: 'use ispc') 75 | 76 | option('ispc_native_only', 77 | type: 'boolean', 78 | value: true, 79 | description: 'use ispc and enable native arch only') 80 | 81 | option('cudnn', 82 | type: 'boolean', 83 | value: true, 84 | description: 'Enable cuDNN backend') 85 | 86 | option('opencl', 87 | type: 'boolean', 88 | value: true, 89 | description: 'Enable OpenCL backend') 90 | 91 | option('tensorflow', 92 | type: 'boolean', 93 | value: false, 94 | description: 'Enable TensorFlow backend') 95 | 96 | option('openblas', 97 | type: 'boolean', 98 | value: true, 99 | description: 'Enable OpenBLAS support') 100 | 101 | option('mkl', 102 | type: 'boolean', 103 | value: true, 104 | description: 'Enable MKL BLAS support') 105 | 106 | option('accelerate', 107 | type: 'boolean', 108 | value: true, 109 | description: 'Enable Accelerate BLAS support') 110 | 111 | option('popcnt', 112 | type: 'boolean', 113 | value: true, 114 | description: 'Use the popcnt instruction') 115 | 116 | option('pext', 117 | type: 'boolean', 118 | value: false, 119 | description: 'Use the pext instruction') 120 | 121 | option('gtest', 122 | type: 'boolean', 123 | value: true, 124 | description: 'Build gtest tests') 125 | 126 | option('protobuf-3-6-0', 127 | type: 'boolean', 128 | value: false, 129 | description: 'Use the protobuf 3.6.0 subproject') 130 | -------------------------------------------------------------------------------- /scripts/bumpversion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | 6 | 7 | VERSION_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../src/version.inc") 8 | VERSION_CONTENT = """ 9 | #define LC0_VERSION_MAJOR {} 10 | #define LC0_VERSION_MINOR {} 11 | #define LC0_VERSION_PATCH {} 12 | #define LC0_VERSION_POSTFIX "{}" 13 | """ 14 | VERSION_CONTENT = VERSION_CONTENT.strip() 15 | 16 | 17 | def get_version(): 18 | with open(VERSION_FILE, 'r') as f: 19 | major = int(f.readline().split()[2]) 20 | minor = int(f.readline().split()[2]) 21 | patch = int(f.readline().split()[2]) 22 | postfix = f.readline().split()[2] 23 | 24 | postfix = postfix.replace('"', '') 25 | return major, minor, patch, postfix 26 | 27 | 28 | def set_version(major, minor, patch, postfix=""): 29 | version_inc = VERSION_CONTENT.format(major, minor, patch, postfix) 30 | 31 | with open(VERSION_FILE, 'w') as f: 32 | f.write(version_inc) 33 | 34 | 35 | def update(major, minor, patch, postfix=""): 36 | set_version(major, minor, patch, postfix) 37 | 38 | 39 | def main(argv): 40 | major, minor, patch, postfix = get_version() 41 | 42 | if argv.major: 43 | major += 1 44 | minor = 0 45 | patch = 0 46 | postfix = "" 47 | update(major, minor, patch) 48 | if argv.minor: 49 | minor += 1 50 | patch = 0 51 | postfix = "" 52 | update(major, minor, patch) 53 | if argv.patch: 54 | patch += 1 55 | postfix = "" 56 | update(major, minor, patch) 57 | if argv.postfix and len(argv.postfix) > 0: 58 | postfix = argv.postfix 59 | update(major, minor, patch, postfix) 60 | 61 | if len(postfix) == 0: 62 | print('v{}.{}.{}'.format(major, minor, patch)) 63 | else: 64 | print('v{}.{}.{}-{}'.format(major, minor, patch, postfix)) 65 | 66 | 67 | if __name__ == "__main__": 68 | argparser = argparse.ArgumentParser(description=\ 69 | 'Set or read current version.') 70 | argparser.add_argument('--major', action='store_true', 71 | help='bumps major version') 72 | argparser.add_argument('--minor', action='store_true', 73 | help='bumps minor version') 74 | argparser.add_argument('--patch', action='store_true', 75 | help='bumps patch') 76 | argparser.add_argument('--postfix', type=str, 77 | help='set postfix') 78 | argv = argparser.parse_args() 79 | main(argv) 80 | 81 | -------------------------------------------------------------------------------- /src/benchmark/benchmark.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "benchmark/benchmark.h" 29 | #include "mcts/search.h" 30 | 31 | namespace lczero { 32 | namespace { 33 | const int kDefaultThreads = 2; 34 | 35 | const OptionId kThreadsOptionId{"threads", "Threads", 36 | "Number of (CPU) worker threads to use.", 't'}; 37 | const OptionId kNNCacheSizeId{ 38 | "nncache", "NNCacheSize", 39 | "Number of positions to store in a memory cache. A large cache can speed " 40 | "up searching, but takes memory."}; 41 | const OptionId kNodesId{"nodes", "", "Number of nodes to run as a benchmark."}; 42 | const OptionId kMovetimeId{"movetime", "", 43 | "Benchmark time allocation, in milliseconds."}; 44 | const OptionId kFenId{"fen", "", "Benchmark initial position FEN."}; 45 | 46 | } // namespace 47 | 48 | void Benchmark::Run() { 49 | OptionsParser options; 50 | NetworkFactory::PopulateOptions(&options); 51 | options.Add(kThreadsOptionId, 1, 128) = kDefaultThreads; 52 | options.Add(kNNCacheSizeId, 0, 999999999) = 200000; 53 | SearchParams::Populate(&options); 54 | 55 | options.Add(kNodesId, -1, 999999999) = -1; 56 | options.Add(kMovetimeId, -1, 999999999) = 10000; 57 | options.Add(kFenId) = ChessBoard::kStartposFen; 58 | 59 | if (!options.ProcessAllFlags()) return; 60 | 61 | try { 62 | auto option_dict = options.GetOptionsDict(); 63 | 64 | auto network = NetworkFactory::LoadNetwork(option_dict); 65 | 66 | NodeTree tree; 67 | tree.ResetToPosition(option_dict.Get(kFenId.GetId()), {}); 68 | 69 | NNCache cache; 70 | cache.SetCapacity(option_dict.Get(kNNCacheSizeId.GetId())); 71 | 72 | const auto start = std::chrono::steady_clock::now(); 73 | 74 | SearchLimits limits; 75 | int visits = option_dict.Get(kNodesId.GetId()); 76 | const int movetime = option_dict.Get(kMovetimeId.GetId()); 77 | if (movetime > -1) { 78 | limits.search_deadline = start + std::chrono::milliseconds(movetime); 79 | } 80 | if (visits > -1) { 81 | limits.visits = visits; 82 | } 83 | 84 | auto search = std::make_unique( 85 | tree, network.get(), 86 | std::bind(&Benchmark::OnBestMove, this, std::placeholders::_1), 87 | std::bind(&Benchmark::OnInfo, this, std::placeholders::_1), limits, 88 | option_dict, &cache, nullptr); 89 | 90 | search->StartThreads(option_dict.Get(kThreadsOptionId.GetId())); 91 | 92 | search->Wait(); 93 | 94 | const auto end = std::chrono::steady_clock::now(); 95 | std::chrono::duration time = end - start; 96 | std::cout << "Benchmark final time " << time.count() << "s calculating " 97 | << search->GetTotalPlayouts() / time.count() 98 | << " nodes per second." << std::endl; 99 | } catch (Exception& ex) { 100 | std::cerr << ex.what() << std::endl; 101 | } 102 | } 103 | 104 | void Benchmark::OnBestMove(const BestMoveInfo& move) { 105 | std::cout << "bestmove " << move.bestmove.as_string() << std::endl; 106 | } 107 | 108 | void Benchmark::OnInfo(const std::vector& infos) { 109 | std::string line = "Benchmark time " + std::to_string(infos[0].time); 110 | line += "ms, " + std::to_string(infos[0].nodes) + " nodes, "; 111 | line += std::to_string(infos[0].nps) + " nps"; 112 | if (!infos[0].pv.empty()) line += ", move " + infos[0].pv[0].as_string(); 113 | std::cout << line << std::endl; 114 | } 115 | 116 | } // namespace lczero 117 | -------------------------------------------------------------------------------- /src/benchmark/benchmark.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include "mcts/search.h" 31 | #include "neural/cache.h" 32 | #include "neural/factory.h" 33 | #include "utils/optionsparser.h" 34 | 35 | namespace lczero { 36 | 37 | class Benchmark{ 38 | public: 39 | Benchmark() = default; 40 | 41 | void Run(); 42 | void OnBestMove(const BestMoveInfo& move); 43 | void OnInfo(const std::vector& infos); 44 | }; 45 | 46 | } // namespace lczero 47 | -------------------------------------------------------------------------------- /src/chess/callbacks.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include "chess/bitboard.h" 34 | #include "chess/position.h" 35 | #include "utils/optional.h" 36 | 37 | namespace lczero { 38 | 39 | // Is sent when search decides on the best move. 40 | struct BestMoveInfo { 41 | BestMoveInfo(Move bestmove, Move ponder = Move{}) 42 | : bestmove(bestmove), ponder(ponder) {} 43 | Move bestmove; 44 | Move ponder; 45 | // Those are extensions and not really UCI protocol. 46 | // 1 if it's "player1", 2 if it's "player2" 47 | int player = -1; 48 | // Index of the game in the tournament (0-based). 49 | int game_id = -1; 50 | // The color of the player, if known. 51 | optional is_black; 52 | 53 | using Callback = std::function; 54 | }; 55 | 56 | // Is sent during the search. 57 | struct ThinkingInfo { 58 | // Full depth. 59 | int depth = -1; 60 | // Maximum depth. 61 | int seldepth = -1; 62 | // Time since start of thinking. 63 | int64_t time = -1; 64 | // Nodes visited. 65 | int64_t nodes = -1; 66 | // Nodes per second. 67 | int nps = -1; 68 | // Hash fullness * 1000 69 | int hashfull = -1; 70 | // Win in centipawns. 71 | optional score; 72 | // Number of successful TB probes (not the same as playouts ending in TB hit). 73 | int tb_hits = -1; 74 | // Best line found. Moves are from perspective of white player. 75 | std::vector pv; 76 | // Multipv index. 77 | int multipv = -1; 78 | // Freeform comment. 79 | std::string comment; 80 | 81 | // Those are extensions and not really UCI protocol. 82 | // 1 if it's "player1", 2 if it's "player2" 83 | int player = -1; 84 | // Index of the game in the tournament (0-based). 85 | int game_id = -1; 86 | // The color of the player, if known. 87 | optional is_black; 88 | 89 | using Callback = std::function&)>; 90 | }; 91 | 92 | // Is sent when a single game is finished. 93 | struct GameInfo { 94 | // Game result. 95 | GameResult game_result = GameResult::UNDECIDED; 96 | // Name of the file with training data. 97 | std::string training_filename; 98 | // Game moves. 99 | std::vector moves; 100 | // Index of the game in the tournament (0-based). 101 | int game_id = -1; 102 | // The color of the player1, if known. 103 | optional is_black; 104 | // Minimum resign threshold which would have resulted in a false positive 105 | // if resign had of been enabled. 106 | // Only provided if the game wasn't played with resign enabled. 107 | optional min_false_positive_threshold; 108 | 109 | using Callback = std::function; 110 | }; 111 | 112 | // Is sent in the end of tournament and also during the tournament. 113 | struct TournamentInfo { 114 | // Did tournament finish, so those results are final. 115 | bool finished = false; 116 | 117 | // Player1's [win/draw/lose] as [white/black]. 118 | // e.g. results[2][1] is how many times player 1 lost as black. 119 | int results[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 120 | using Callback = std::function; 121 | }; 122 | 123 | } // namespace lczero 124 | -------------------------------------------------------------------------------- /src/chess/uciloop.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "chess/callbacks.h" 35 | #include "utils/exception.h" 36 | 37 | namespace lczero { 38 | 39 | struct GoParams { 40 | optional wtime; 41 | optional btime; 42 | optional winc; 43 | optional binc; 44 | optional movestogo; 45 | optional depth; 46 | optional nodes; 47 | optional movetime; 48 | bool infinite = false; 49 | std::vector searchmoves; 50 | bool ponder = false; 51 | }; 52 | 53 | class UciLoop { 54 | public: 55 | virtual ~UciLoop() {} 56 | virtual void RunLoop(); 57 | 58 | // Sends response to host. 59 | void SendResponse(const std::string& response); 60 | // Sends responses to host ensuring they are received as a block. 61 | virtual void SendResponses(const std::vector& responses); 62 | void SendBestMove(const BestMoveInfo& move); 63 | void SendInfo(const std::vector& infos); 64 | void SendId(); 65 | 66 | // Command handlers. 67 | virtual void CmdUci() { throw Exception("Not supported"); } 68 | virtual void CmdIsReady() { throw Exception("Not supported"); } 69 | virtual void CmdSetOption(const std::string& /*name*/, 70 | const std::string& /*value*/, 71 | const std::string& /*context*/) { 72 | throw Exception("Not supported"); 73 | } 74 | virtual void CmdUciNewGame() { throw Exception("Not supported"); } 75 | virtual void CmdPosition(const std::string& /*position*/, 76 | const std::vector& /*moves*/) { 77 | throw Exception("Not supported"); 78 | } 79 | virtual void CmdGo(const GoParams& /*params*/) { 80 | throw Exception("Not supported"); 81 | } 82 | virtual void CmdStop() { throw Exception("Not supported"); } 83 | virtual void CmdPonderHit() { throw Exception("Not supported"); } 84 | virtual void CmdStart() { throw Exception("Not supported"); } 85 | 86 | private: 87 | bool DispatchCommand( 88 | const std::string& command, 89 | const std::unordered_map& params); 90 | }; 91 | 92 | } // namespace lczero 93 | -------------------------------------------------------------------------------- /src/main.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "benchmark/benchmark.h" 29 | #include "chess/board.h" 30 | #include "engine.h" 31 | #include "selfplay/loop.h" 32 | #include "utils/commandline.h" 33 | #include "utils/logging.h" 34 | #include "version.h" 35 | 36 | int main(int argc, const char** argv) { 37 | LOGFILE << "Leelafish, based on Lc0, started."; 38 | CERR << "Leelafish, based on:"; 39 | CERR << " _"; 40 | CERR << "| _ | |"; 41 | CERR << "|_ |_ |_| v" << GetVersionStr() << " built " << __DATE__; 42 | using namespace lczero; 43 | 44 | InitializeMagicBitboards(); 45 | 46 | CommandLine::Init(argc, argv); 47 | CommandLine::RegisterMode("uci", "(default) Act as UCI engine"); 48 | CommandLine::RegisterMode("selfplay", "Play games with itself"); 49 | CommandLine::RegisterMode("benchmark", "Quick benchmark"); 50 | 51 | if (CommandLine::ConsumeCommand("selfplay")) { 52 | // Selfplay mode. 53 | SelfPlayLoop loop; 54 | loop.RunLoop(); 55 | } else if (CommandLine::ConsumeCommand("benchmark")) { 56 | // Benchmark mode. 57 | Benchmark benchmark; 58 | benchmark.Run(); 59 | } else { 60 | // Consuming optional "uci" mode. 61 | CommandLine::ConsumeCommand("uci"); 62 | // Ordinary UCI engine. 63 | EngineLoop loop; 64 | loop.RunLoop(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/neural/blas/README.md: -------------------------------------------------------------------------------- 1 | The files in this directory comprise the BLAS backend of Lc0. 2 | 3 | ## License 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | **The source files of this directory are not covered by any additional 19 | permission.** 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/neural/blas/blas.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | // Select the BLAS vendor based on defines 22 | 23 | #ifdef USE_MKL 24 | #include 25 | #else 26 | 27 | #ifdef USE_OPENBLAS 28 | #include 29 | 30 | // Specific openblas routines. 31 | extern "C" { 32 | int openblas_get_num_procs(void); 33 | void openblas_set_num_threads(int num_threads); 34 | char* openblas_get_corename(void); 35 | char* openblas_get_config(void); 36 | } 37 | 38 | #else 39 | 40 | #ifdef __APPLE__ 41 | #include 42 | #define USE_ACCELERATE 43 | #endif 44 | 45 | #endif // USE_OPENBLAS 46 | 47 | #endif // USE_MKL 48 | -------------------------------------------------------------------------------- /src/neural/blas/convolution1.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/blas/convolution1.h" 20 | #include "neural/blas/blas.h" 21 | 22 | namespace lczero { 23 | 24 | void Convolution1::Forward(const size_t batch_size, const size_t input_channels, 25 | const size_t output_channels, const float* input, 26 | const float* weights, float* output) { 27 | for (size_t i = 0; i < batch_size; i++) { 28 | // C←αAB + βC 29 | // M Number of rows in matrices A and C. 30 | // N Number of columns in matrices B and C. 31 | // K Number of columns in matrix A; number of rows in matrix B. 32 | // lda The size of the first dimension of matrix A; if you are 33 | // passing a matrix A[m][n], the value should be m. 34 | // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 35 | // ldb, beta, C, N); 36 | 37 | // C A B 38 | // 39 | // outputs := weights x input 40 | // 41 | // cols: kSquares (N) input_channels (K) kSquares(N) 42 | // 43 | // rows: output_channels (M) output_channels (M) input_channels (K) 44 | 45 | const float* batch_input = input + i * kSquares * input_channels; 46 | float* batch_output = output + i * kSquares * output_channels; 47 | 48 | cblas_sgemm(CblasRowMajor, // Row major formar 49 | CblasNoTrans, // A not transposed 50 | CblasNoTrans, // B not transposed 51 | (int)output_channels, // M 52 | kSquares, // N 53 | (int)input_channels, // K 54 | 1.0f, // Alpha 55 | weights, // A 56 | (int)input_channels, // lda, leading rank of A 57 | batch_input, // B 58 | kSquares, // ldb, leading rank of B 59 | 0.0f, // beta 60 | batch_output, // C 61 | kSquares); // ldc, leading rank of B 62 | } 63 | } 64 | 65 | } // namespace lczero 66 | -------------------------------------------------------------------------------- /src/neural/blas/convolution1.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | 26 | // Convolution 1x1 27 | class Convolution1 { 28 | public: 29 | Convolution1() = delete; 30 | 31 | // Batched forward inference. 32 | static void Forward(const size_t batch_size, const size_t input_channels, 33 | const size_t output_channels, const float* input, 34 | const float* weights, float* output); 35 | 36 | private: 37 | static constexpr auto kWidth = 8; 38 | static constexpr auto kHeight = 8; 39 | static constexpr auto kSquares = kWidth * kHeight; 40 | }; 41 | } // namespace lczero 42 | -------------------------------------------------------------------------------- /src/neural/blas/fully_connected_layer.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/blas/fully_connected_layer.h" 20 | #include "neural/blas/blas.h" 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | namespace lczero { 27 | 28 | void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, 29 | const size_t output_size, 30 | const float* inputs, const float* weights, 31 | const float* biases, bool apply_relu, 32 | float* outputs) { 33 | if (batch_size == 1) { 34 | // Just a matrix-vector multiplication 35 | // 36 | // C A B 37 | // 38 | // outputs := weights x inputs 39 | // 40 | // cols: 1 input_size 1 41 | // 42 | // rows output_size output_size input_size 43 | // 44 | 45 | cblas_sgemv(CblasRowMajor, CblasNoTrans, 46 | // M K 47 | (int)output_size, (int)input_size, 1.0f, weights, 48 | (int)input_size, inputs, 1, 0.0f, outputs, 1); 49 | } else { 50 | // more columns, matrix-matrix multiplication 51 | // 52 | // C A B 53 | // 54 | // outputs := weights x inputs 55 | // 56 | // cols: batch_size (N) input_size (K) batch_size (N) 57 | // 58 | // rows output_size (M) output_size (M) input_size (K) 59 | // 60 | 61 | // C←αAB + βC 62 | // M Number of rows in matrices A and C. 63 | // N Number of columns in matrices B and C. 64 | // K Number of columns in matrix A; number of rows in matrix B. 65 | // lda The size of the first dimension of matrix A; if you are 66 | // passing a matrix A[m][n], the value should be m. 67 | // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 68 | // ldb, beta, C, N); 69 | 70 | cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, 71 | (int)output_size, // M 72 | (int)batch_size, // N 73 | (int)input_size, // K 74 | 1.0f, // alpha 75 | weights, // A 76 | (int)input_size, // lda, leading rank of A 77 | inputs, // B 78 | (int)input_size, // ldb, leading rank of B 79 | 0.0f, // beta 80 | outputs, // C 81 | (int)output_size); // ldc, leading rank of C 82 | } 83 | if (apply_relu) { 84 | for (size_t i = 0; i < batch_size; i++) { 85 | float* batch_outputs = outputs + i * output_size; 86 | for (size_t o = 0; o < output_size; o++) { 87 | float val = biases[o] + batch_outputs[o]; 88 | batch_outputs[o] = val >= 0 ? val : 0; 89 | } 90 | } 91 | } else { 92 | for (size_t i = 0; i < batch_size; i++) { 93 | float* batch_outputs = outputs + i * output_size; 94 | for (size_t o = 0; o < output_size; o++) { 95 | batch_outputs[o] += biases[o]; 96 | } 97 | } 98 | } 99 | } 100 | 101 | float FullyConnectedLayer::Forward0D(const size_t size, const float* x, 102 | const float* y) { 103 | // A scalar product, also known as a dot-product. 104 | // float cblas_sdot(const int N, const float *X, const int incX, const float 105 | // *Y, 106 | // const int incY); 107 | return cblas_sdot((int)size, x, 1, y, 1); 108 | } 109 | 110 | } // namespace lczero 111 | -------------------------------------------------------------------------------- /src/neural/blas/fully_connected_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | 26 | class FullyConnectedLayer { 27 | public: 28 | FullyConnectedLayer() = delete; 29 | 30 | // Forward inference, batched, from input_size to output_size 31 | static void Forward1D(const size_t batch_size, const size_t input_size, 32 | const size_t output_size, const float* input, 33 | const float* weights, const float* biases, 34 | bool apply_relu, float* output); 35 | 36 | // Forward inference, no batched, from input_size to scalar 37 | static float Forward0D(const size_t input_size, const float* input, 38 | const float* weights); 39 | 40 | }; 41 | 42 | } // namespace lczero 43 | -------------------------------------------------------------------------------- /src/neural/blas/se_unit.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/blas/se_unit.h" 20 | #include "neural/blas/fully_connected_layer.h" 21 | 22 | #include 23 | 24 | namespace lczero { 25 | namespace { 26 | constexpr int kWidth = 8; 27 | constexpr int kHeight = 8; 28 | constexpr int kSquares = kWidth * kHeight; 29 | } // namespace 30 | 31 | static void global_avg_pooling(const size_t channels, const float* input, 32 | float* output) { 33 | for (auto c = size_t{0}; c < channels; c++) { 34 | auto acc = 0.0f; 35 | for (auto i = size_t{0}; i < kSquares; i++) { 36 | acc += input[c * kSquares + i]; 37 | } 38 | output[c] = acc / kSquares; 39 | } 40 | } 41 | 42 | static void apply_se(const size_t channels, const size_t batch_size, 43 | const float* input, const float* res, const float* scale, 44 | float* output) { 45 | const auto lambda_ReLU = [](const auto val) { 46 | return (val > 0.0f) ? val : 0; 47 | }; 48 | 49 | const auto lambda_sigmoid = [](const auto val) { 50 | return 1.0f / (1.0f + exp(-val)); 51 | }; 52 | 53 | for (auto c = size_t{0}; c < channels * batch_size; c++) { 54 | auto batch = c / channels; 55 | auto gamma = lambda_sigmoid(scale[c + batch * channels]); 56 | auto beta = scale[c + batch * channels + channels]; 57 | for (auto i = size_t{0}; i < kSquares; i++) { 58 | output[c * kSquares + i] = lambda_ReLU(gamma * input[c * kSquares + i] + 59 | beta + res[c * kSquares + i]); 60 | } 61 | } 62 | } 63 | 64 | void ApplySEUnit(const size_t batch_size, const size_t channels, 65 | const size_t se_fc_outputs, const float* input, 66 | const float* residual, const float* weights_w1, 67 | const float* weights_b1, const float* weights_w2, 68 | const float* weights_b2, float* output) { 69 | std::vector pool(2 * channels * batch_size); 70 | std::vector fc_out1(batch_size * se_fc_outputs); 71 | 72 | global_avg_pooling(channels * batch_size, input, pool.data()); 73 | 74 | FullyConnectedLayer::Forward1D(batch_size, channels, se_fc_outputs, 75 | pool.data(), weights_w1, weights_b1, 76 | true, // Relu On 77 | fc_out1.data()); 78 | 79 | FullyConnectedLayer::Forward1D(batch_size, se_fc_outputs, 2 * channels, 80 | fc_out1.data(), weights_w2, weights_b2, 81 | false, // Relu Off 82 | pool.data()); 83 | 84 | // Sigmoid, scale and add residual 85 | apply_se(channels, batch_size, input, residual, pool.data(), output); 86 | } 87 | 88 | } // namespace lczero 89 | -------------------------------------------------------------------------------- /src/neural/blas/se_unit.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | 23 | namespace lczero { 24 | 25 | void ApplySEUnit(const size_t batch_size, const size_t channels, 26 | const size_t se_fc_outputs, const float* input, 27 | const float* residual, const float* weights_w1, 28 | const float* weights_b1, const float* weights_w2, 29 | const float* weights_b2, float* output); 30 | 31 | } // namespace lczero 32 | -------------------------------------------------------------------------------- /src/neural/blas/winograd_convolution3.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | 26 | // Convolution 3x3 on a 8x8 board using the Winograd algorithm. 27 | // 28 | // Ref: 29 | // 30 | // Fast Algorithms for Convolutional Neural Networks 31 | // https://arxiv.org/abs/1509.09308 32 | // 33 | // https://ai.intel.com/winograd/ 34 | // https://ai.intel.com/winograd-2/ 35 | 36 | // Convolution 3x3 using the Winograd algorithm 37 | class WinogradConvolution3 { 38 | public: 39 | // The instance will allocate memory resources for the 40 | // largest batch size, and the largest input and output 41 | // layers. 42 | WinogradConvolution3(const size_t max_batch_size, 43 | const size_t max_input_layers, 44 | const size_t max_output_layers); 45 | 46 | // Forward inference, batched. 47 | void Forward(const size_t batch_size, const size_t input_channels, 48 | const size_t output_channels, const float* input, 49 | const float* weights, float* output); 50 | 51 | private: 52 | void TransformIn(const size_t batch_size, const float* input, 53 | const size_t channels); 54 | 55 | void Sgemm(const size_t batch_size, const float* weights, 56 | const size_t input_channels, const size_t output_channels); 57 | 58 | void TransformOut(const size_t batch_size, float* output, 59 | const size_t channels); 60 | 61 | static constexpr auto kWidth = 8; 62 | static constexpr auto kHeight = 8; 63 | static constexpr auto kSquares = kWidth * kHeight; 64 | 65 | static constexpr auto kWtiles = (kWidth + 1) / 2; // 4 66 | static constexpr auto kTiles = kWtiles * kWtiles; // 16 67 | 68 | static constexpr auto kWinogradAlpha = 4; 69 | static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha; 70 | 71 | std::vector V_; 72 | std::vector M_; 73 | }; 74 | } // namespace lczero 75 | -------------------------------------------------------------------------------- /src/neural/cache.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | #include "neural/cache.h" 28 | #include 29 | #include 30 | 31 | namespace lczero { 32 | CachingComputation::CachingComputation( 33 | std::unique_ptr parent, NNCache* cache) 34 | : parent_(std::move(parent)), cache_(cache) {} 35 | 36 | int CachingComputation::GetCacheMisses() const { 37 | return parent_->GetBatchSize(); 38 | } 39 | 40 | int CachingComputation::GetBatchSize() const { return batch_.size(); } 41 | 42 | bool CachingComputation::AddInputByHash(uint64_t hash) { 43 | NNCacheLock lock(cache_, hash); 44 | if (!lock) return false; 45 | batch_.emplace_back(); 46 | batch_.back().lock = std::move(lock); 47 | batch_.back().hash = hash; 48 | return true; 49 | } 50 | 51 | void CachingComputation::PopCacheHit() { 52 | assert(!batch_.empty()); 53 | assert(batch_.back().lock); 54 | assert(batch_.back().idx_in_parent == -1); 55 | batch_.pop_back(); 56 | } 57 | 58 | void CachingComputation::AddInput( 59 | uint64_t hash, InputPlanes&& input, 60 | std::vector&& probabilities_to_cache) { 61 | if (AddInputByHash(hash)) return; 62 | batch_.emplace_back(); 63 | batch_.back().hash = hash; 64 | batch_.back().idx_in_parent = parent_->GetBatchSize(); 65 | batch_.back().probabilities_to_cache = probabilities_to_cache; 66 | parent_->AddInput(std::move(input)); 67 | } 68 | 69 | void CachingComputation::PopLastInputHit() { 70 | assert(!batch_.empty()); 71 | assert(batch_.back().idx_in_parent == -1); 72 | batch_.pop_back(); 73 | } 74 | 75 | void CachingComputation::ComputeBlocking() { 76 | if (parent_->GetBatchSize() == 0) return; 77 | parent_->ComputeBlocking(); 78 | 79 | // Fill cache with data from NN. 80 | for (const auto& item : batch_) { 81 | if (item.idx_in_parent == -1) continue; 82 | auto req = 83 | std::make_unique(item.probabilities_to_cache.size()); 84 | req->q = parent_->GetQVal(item.idx_in_parent); 85 | req->d = parent_->GetDVal(item.idx_in_parent); 86 | int idx = 0; 87 | for (auto x : item.probabilities_to_cache) { 88 | req->p[idx++] = 89 | std::make_pair(x, parent_->GetPVal(item.idx_in_parent, x)); 90 | } 91 | cache_->Insert(item.hash, std::move(req)); 92 | } 93 | } 94 | 95 | float CachingComputation::GetQVal(int sample) const { 96 | const auto& item = batch_[sample]; 97 | if (item.idx_in_parent >= 0) return parent_->GetQVal(item.idx_in_parent); 98 | return item.lock->q; 99 | } 100 | 101 | float CachingComputation::GetDVal(int sample) const { 102 | const auto& item = batch_[sample]; 103 | if (item.idx_in_parent >= 0) return parent_->GetDVal(item.idx_in_parent); 104 | return item.lock->d; 105 | } 106 | 107 | float CachingComputation::GetPVal(int sample, int move_id) const { 108 | auto& item = batch_[sample]; 109 | if (item.idx_in_parent >= 0) 110 | return parent_->GetPVal(item.idx_in_parent, move_id); 111 | const auto& moves = item.lock->p; 112 | 113 | int total_count = 0; 114 | while (total_count < moves.size()) { 115 | // Optimization: usually moves are stored in the same order as queried. 116 | const auto& move = moves[item.last_idx++]; 117 | if (item.last_idx == moves.size()) item.last_idx = 0; 118 | if (move.first == move_id) return move.second; 119 | ++total_count; 120 | } 121 | assert(false); // Move not found. 122 | return 0; 123 | } 124 | 125 | } // namespace lczero 126 | -------------------------------------------------------------------------------- /src/neural/cache.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | #pragma once 28 | 29 | #include "neural/network.h" 30 | #include "utils/cache.h" 31 | #include "utils/smallarray.h" 32 | 33 | namespace lczero { 34 | 35 | struct CachedNNRequest { 36 | CachedNNRequest(size_t size) : p(size) {} 37 | typedef std::pair IdxAndProb; 38 | float q; 39 | float d; 40 | // TODO(mooskagh) Don't really need index if using perfect hash. 41 | SmallArray p; 42 | }; 43 | 44 | typedef LruCache NNCache; 45 | typedef LruCacheLock NNCacheLock; 46 | 47 | // Wraps around NetworkComputation and caches result. 48 | // While it mostly repeats NetworkComputation interface, it's not derived 49 | // from it, as AddInput() needs hash and index of probabilities to store. 50 | class CachingComputation { 51 | public: 52 | CachingComputation(std::unique_ptr parent, 53 | NNCache* cache); 54 | 55 | // How many inputs are not found in cache and will be forwarded to a wrapped 56 | // computation. 57 | int GetCacheMisses() const; 58 | // Total number of times AddInput/AddInputByHash were (successfully) called. 59 | int GetBatchSize() const; 60 | // Adds input by hash only. If that hash is not in cache, returns false 61 | // and does nothing. Otherwise adds. 62 | bool AddInputByHash(uint64_t hash); 63 | // Adds a sample to the batch. 64 | // @hash is a hash to store/lookup it in the cache. 65 | // @probabilities_to_cache is which indices of policy head to store. 66 | void AddInput(uint64_t hash, InputPlanes&& input, 67 | std::vector&& probabilities_to_cache); 68 | // Undos last AddInput. If it was a cache miss, the it's actually not removed 69 | // from parent's batch. 70 | void PopLastInputHit(); 71 | // Do the computation. 72 | void ComputeBlocking(); 73 | // Returns Q value of @sample. 74 | float GetQVal(int sample) const; 75 | // Returns probability of draw if NN has WDL value head 76 | float GetDVal(int sample) const; 77 | // Returns P value @move_id of @sample. 78 | float GetPVal(int sample, int move_id) const; 79 | // Pops last input from the computation. Only allowed for inputs which were 80 | // cached. 81 | void PopCacheHit(); 82 | 83 | private: 84 | struct WorkItem { 85 | uint64_t hash; 86 | NNCacheLock lock; 87 | int idx_in_parent = -1; 88 | std::vector probabilities_to_cache; 89 | mutable int last_idx = 0; 90 | }; 91 | 92 | std::unique_ptr parent_; 93 | NNCache* cache_; 94 | std::vector batch_; 95 | }; 96 | 97 | } // namespace lczero 98 | -------------------------------------------------------------------------------- /src/neural/cuda/cuda_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "utils/exception.h" 34 | 35 | namespace lczero { 36 | namespace cudnn_backend { 37 | 38 | void CudnnError(cudnnStatus_t status, const char* file, const int& line); 39 | void CublasError(cublasStatus_t status, const char* file, const int& line); 40 | void CudaError(cudaError_t status, const char* file, const int& line); 41 | 42 | #define ReportCUDNNErrors(status) CudnnError(status, __FILE__, __LINE__) 43 | #define ReportCUBLASErrors(status) CublasError(status, __FILE__, __LINE__) 44 | #define ReportCUDAErrors(status) CudaError(status, __FILE__, __LINE__) 45 | 46 | inline int DivUp(int a, int b) { return (a + b - 1) / b; } 47 | 48 | } // namespace cudnn_backend 49 | } // namespace lczero 50 | -------------------------------------------------------------------------------- /src/neural/cuda/kernels.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | namespace lczero { 29 | namespace cudnn_backend { 30 | 31 | // Adds two vectors (possibly of different sizes), also do optional 32 | // activation (relu, tanh or sigmoid). 33 | template 34 | void addVectors(T* c, T* a, T* b, int size, int asize, int bsize, bool relu, 35 | bool use_tanh, bool use_sigmoid); 36 | 37 | // Add bias to convolution's output. 38 | template 39 | void addBias_NCHW(T* c, T* a, T* b, int N, int C, int H, int W); 40 | 41 | // Conversion from: fp32 -> fp16 datatype, and NCHW -> NHWC layout. 42 | // Cudnn kernels work best with NCHW layout for fp32, and with NHWC for fp16. 43 | void fp32NCHWtofp16NHWC(half* output_tensor, float* input_tensor, int Nin, 44 | int Cin, int Nout, int Cout, int H, int W); 45 | 46 | // Plain data-type conversion (no layout conversion). 47 | template 48 | void copyTypeConverted(DstType* op, SrcType* ip, int N); 49 | 50 | // Perform batch normilization. 51 | template 52 | void batchNorm(T* output, const T* input, const T* skipInput, int N, int C, 53 | int H, int W, float* means, float* var_multipliers, bool relu); 54 | 55 | // Unpack planes (input to network). 56 | void expandPlanes_Fp32_NCHW(float* output, const uint64_t* masks, 57 | const float* values, int n); 58 | 59 | void expandPlanes_Fp16_NHWC(half* output, const uint64_t* masks, 60 | const float* values, int n); 61 | 62 | // Perform global avg pool. 63 | template 64 | void globalAvgPool(int N, int C, T* output, const T* input, 65 | const T* prevLayerBias); 66 | 67 | // Perform global scale. 68 | template 69 | void globalScale(int N, int C, T* output, const T* input, const T* scaleBias, 70 | const T* prevLayerBias); 71 | 72 | // Perform Squeeze-and-Excitation (SE) in a single fused kernel. 73 | // Returns false if the fused kernel can't handle the sizes. 74 | bool Se_Fp16_NHWC(int N, int C, int numFc1Out, half* output, const half* skip, 75 | const half* input, const half* w1, const half* b1, 76 | const half* w2, const half* b2, const half* bPrev); 77 | 78 | template 79 | void PolicyMap(int N, T* output, const T* input, const short* indices, 80 | int inputSize, int usedSize, int outputSize); 81 | 82 | } // namespace cudnn_backend 83 | } // namespace lczero 84 | -------------------------------------------------------------------------------- /src/neural/cuda/readme.txt: -------------------------------------------------------------------------------- 1 | cuda/cudnn backend for lc0. Here is a brief description of various files: 2 | 3 | 1. network_cudnn.cc -> cpp file containing network, computation, etc stuff related to lc0 4 | 2. layers.cc -> cpp files containing layer classes 5 | 3. layers.h -> header file for layer classes. 6 | 4. kernels.h -> header file for cuda kernels 7 | 5. common_kernels.cu -> common kernels (fp32, and fp16 that can work with old GPUs) 8 | 6. fp16_kernels.cu -> fp16 specific kernels (not used on other GPUs) 9 | 7. cuda_common.h -> header for common cuda stuff like ReportCUDAErrors, etc. 10 | 8. readme.txt -> this file 11 | 12 | High level overview: network is built of layer objects, layers are either implemented using cudnn/cublas libraries, or custom cuda kernels. 13 | 14 | lc0 search -> network_cudnn -> layers -> kernels -------------------------------------------------------------------------------- /src/neural/encoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include "chess/position.h" 31 | #include "neural/network.h" 32 | 33 | namespace lczero { 34 | 35 | enum class FillEmptyHistory {NO, FEN_ONLY, ALWAYS}; 36 | 37 | // Encodes the last position in history for the neural network request. 38 | InputPlanes EncodePositionForNN(const PositionHistory& history, 39 | int history_planes, 40 | FillEmptyHistory fill_empty_history); 41 | 42 | } // namespace lczero 43 | -------------------------------------------------------------------------------- /src/neural/loader.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | #include "neural/network.h" 34 | #include "proto/net.pb.h" 35 | 36 | namespace lczero { 37 | 38 | using FloatVector = std::vector; 39 | using FloatVectors = std::vector; 40 | 41 | using WeightsFile = pblczero::Net; 42 | 43 | // Read weights file and fill the weights structure. 44 | WeightsFile LoadWeightsFromFile(const std::string& filename); 45 | 46 | // Tries to find a file which looks like a weights file, and located in 47 | // directory of binary_name or one of subdirectories. If there are several such 48 | // files, returns one which has the latest modification date. 49 | std::string DiscoverWeightsFile(); 50 | 51 | } // namespace lczero 52 | -------------------------------------------------------------------------------- /src/neural/network.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | namespace lczero { 34 | 35 | const int kInputPlanes = 112; 36 | 37 | // All input planes are 64 value vectors, every element of which is either 38 | // 0 or some value, unique for the plane. Therefore, input is defined as 39 | // a bitmask showing where to set the value, and the value itself. 40 | struct InputPlane { 41 | InputPlane() = default; 42 | void SetAll() { mask = ~0ull; } 43 | void Fill(float val) { 44 | SetAll(); 45 | value = val; 46 | } 47 | std::uint64_t mask = 0ull; 48 | float value = 1.0f; 49 | }; 50 | using InputPlanes = std::vector; 51 | 52 | // An interface to implement by computing backends. 53 | class NetworkComputation { 54 | public: 55 | // Adds a sample to the batch. 56 | virtual void AddInput(InputPlanes&& input) = 0; 57 | // Do the computation. 58 | virtual void ComputeBlocking() = 0; 59 | // Returns how many times AddInput() was called. 60 | virtual int GetBatchSize() const = 0; 61 | // Returns Q value of @sample. 62 | virtual float GetQVal(int sample) const = 0; 63 | virtual float GetDVal(int sample) const = 0; 64 | // Returns P value @move_id of @sample. 65 | virtual float GetPVal(int sample, int move_id) const = 0; 66 | virtual ~NetworkComputation() {} 67 | }; 68 | 69 | class Network { 70 | public: 71 | virtual std::unique_ptr NewComputation() = 0; 72 | virtual ~Network(){}; 73 | }; 74 | 75 | } // namespace lczero 76 | -------------------------------------------------------------------------------- /src/neural/network_legacy.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/network_legacy.h" 20 | 21 | #include 22 | #include 23 | #include "utils/weights_adapter.h" 24 | 25 | namespace lczero { 26 | namespace { 27 | static constexpr float kEpsilon = 1e-5f; 28 | } // namespace 29 | 30 | LegacyWeights::LegacyWeights(const pblczero::Weights& weights) 31 | : input(weights.input()), 32 | policy1(weights.policy1()), 33 | policy(weights.policy()), 34 | ip_pol_w(LayerAdapter(weights.ip_pol_w()).as_vector()), 35 | ip_pol_b(LayerAdapter(weights.ip_pol_b()).as_vector()), 36 | value(weights.value()), 37 | ip1_val_w(LayerAdapter(weights.ip1_val_w()).as_vector()), 38 | ip1_val_b(LayerAdapter(weights.ip1_val_b()).as_vector()), 39 | ip2_val_w(LayerAdapter(weights.ip2_val_w()).as_vector()), 40 | ip2_val_b(LayerAdapter(weights.ip2_val_b()).as_vector()) { 41 | for (const auto& res : weights.residual()) { 42 | residual.emplace_back(res); 43 | } 44 | } 45 | 46 | LegacyWeights::SEunit::SEunit(const pblczero::Weights::SEunit& se) 47 | : w1(LayerAdapter(se.w1()).as_vector()), 48 | b1(LayerAdapter(se.b1()).as_vector()), 49 | w2(LayerAdapter(se.w2()).as_vector()), 50 | b2(LayerAdapter(se.b2()).as_vector()) {} 51 | 52 | LegacyWeights::Residual::Residual(const pblczero::Weights::Residual& residual) 53 | : conv1(residual.conv1()), 54 | conv2(residual.conv2()), 55 | se(residual.se()), 56 | has_se(residual.has_se()) {} 57 | 58 | LegacyWeights::ConvBlock::ConvBlock(const pblczero::Weights::ConvBlock& block) 59 | : weights(LayerAdapter(block.weights()).as_vector()), 60 | biases(LayerAdapter(block.biases()).as_vector()), 61 | bn_gammas(LayerAdapter(block.bn_gammas()).as_vector()), 62 | bn_betas(LayerAdapter(block.bn_betas()).as_vector()), 63 | bn_means(LayerAdapter(block.bn_means()).as_vector()), 64 | bn_stddivs(LayerAdapter(block.bn_stddivs()).as_vector()) { 65 | if (weights.size() == 0) { 66 | // Empty ConvBlock. 67 | return; 68 | } 69 | 70 | if (bn_betas.size() == 0) { 71 | // Old net without gamma and beta. 72 | for (auto i = size_t{0}; i < bn_means.size(); i++) { 73 | bn_betas.emplace_back(0.0f); 74 | bn_gammas.emplace_back(1.0f); 75 | } 76 | } 77 | if (biases.size() == 0) { 78 | for (auto i = size_t{0}; i < bn_means.size(); i++) { 79 | biases.emplace_back(0.0f); 80 | } 81 | } 82 | 83 | if (bn_means.size() == 0) { 84 | // No batch norm. 85 | return; 86 | } 87 | 88 | // Fold batch norm into weights and biases. 89 | // Variance to gamma. 90 | for (auto i = size_t{0}; i < bn_stddivs.size(); i++) { 91 | bn_gammas[i] *= 1.0f / std::sqrt(bn_stddivs[i] + kEpsilon); 92 | bn_means[i] -= biases[i]; 93 | } 94 | 95 | auto outputs = biases.size(); 96 | 97 | // We can treat the [inputs, filter_size, filter_size] dimensions as one. 98 | auto inputs = weights.size() / outputs; 99 | 100 | for (auto o = size_t{0}; o < outputs; o++) { 101 | for (auto c = size_t{0}; c < inputs; c++) { 102 | weights[o * inputs + c] *= bn_gammas[o]; 103 | } 104 | 105 | biases[o] = -bn_gammas[o] * bn_means[o] + bn_betas[o]; 106 | } 107 | 108 | // Batch norm weights are not needed anymore. 109 | bn_stddivs.clear(); 110 | bn_means.clear(); 111 | bn_betas.clear(); 112 | bn_gammas.clear(); 113 | } 114 | } // namespace lczero 115 | -------------------------------------------------------------------------------- /src/neural/network_legacy.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include "proto/net.pb.h" 23 | 24 | namespace lczero { 25 | 26 | // DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED! DEPRECATED!!! 27 | // Legacy structure describing network weights. 28 | // Please try to migrate away from this struture do not add anything new 29 | // to it. 30 | 31 | struct LegacyWeights { 32 | explicit LegacyWeights(const pblczero::Weights& weights); 33 | 34 | using Vec = std::vector; 35 | struct ConvBlock { 36 | explicit ConvBlock(const pblczero::Weights::ConvBlock& block); 37 | 38 | Vec weights; 39 | Vec biases; 40 | Vec bn_gammas; 41 | Vec bn_betas; 42 | Vec bn_means; 43 | Vec bn_stddivs; 44 | }; 45 | 46 | struct SEunit { 47 | explicit SEunit(const pblczero::Weights::SEunit& se); 48 | Vec w1; 49 | Vec b1; 50 | Vec w2; 51 | Vec b2; 52 | }; 53 | 54 | struct Residual { 55 | explicit Residual(const pblczero::Weights::Residual& residual); 56 | ConvBlock conv1; 57 | ConvBlock conv2; 58 | SEunit se; 59 | bool has_se; 60 | }; 61 | 62 | // Input convnet. 63 | ConvBlock input; 64 | 65 | // Residual tower. 66 | std::vector residual; 67 | 68 | // Policy head 69 | // Extra convolution for AZ-style policy head 70 | ConvBlock policy1; 71 | ConvBlock policy; 72 | Vec ip_pol_w; 73 | Vec ip_pol_b; 74 | 75 | // Value head 76 | ConvBlock value; 77 | Vec ip1_val_w; 78 | Vec ip1_val_b; 79 | Vec ip2_val_w; 80 | Vec ip2_val_b; 81 | }; 82 | 83 | } // namespace lczero 84 | -------------------------------------------------------------------------------- /src/neural/network_random.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include "neural/factory.h" 34 | #include "utils/hashcat.h" 35 | 36 | namespace lczero { 37 | namespace { 38 | 39 | class RandomNetworkComputation : public NetworkComputation { 40 | public: 41 | RandomNetworkComputation(int delay, int seed, bool uniform_mode) 42 | : delay_ms_(delay), seed_(seed), uniform_mode_(uniform_mode) {} 43 | 44 | void AddInput(InputPlanes&& input) override { 45 | std::uint64_t hash = seed_; 46 | for (const auto& plane : input) { 47 | hash = HashCat({hash, plane.mask}); 48 | std::uint32_t tmp; 49 | std::memcpy(&tmp, &plane.value, sizeof(float)); 50 | const std::uint64_t value_hash = tmp; 51 | hash = HashCat({hash, value_hash}); 52 | } 53 | inputs_.push_back(hash); 54 | } 55 | 56 | void ComputeBlocking() override { 57 | if (delay_ms_) { 58 | std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms_)); 59 | } 60 | } 61 | 62 | int GetBatchSize() const override { return inputs_.size(); } 63 | 64 | float GetQVal(int sample) const override { 65 | if (uniform_mode_) return 0.0f; 66 | return (int(inputs_[sample] % 200000) - 100000) / 100000.0; 67 | } 68 | 69 | float GetDVal(int sample) const override { 70 | if (uniform_mode_) return 0.0f; 71 | // Maximum D value is 1 - abs(Q) for W, D, L to be in range [0.0, 1.0]. 72 | float q = GetQVal(sample); 73 | float max_d = 1.0f - std::fabs(q); 74 | // Hash in arbitrary constant to make D return different value from Q. 75 | float d = max_d * (HashCat({inputs_[sample], 1234}) % 10000) / 10000.0; 76 | return d; 77 | } 78 | 79 | float GetPVal(int sample, int move_id) const override { 80 | if (uniform_mode_) return 1.0f; 81 | return (HashCat({inputs_[sample], static_cast(move_id)}) % 82 | 10000) / 83 | 10000.0; 84 | } 85 | 86 | private: 87 | std::vector inputs_; 88 | int delay_ms_ = 0; 89 | int seed_ = 0; 90 | bool uniform_mode_ = false; 91 | }; 92 | 93 | class RandomNetwork : public Network { 94 | public: 95 | RandomNetwork(const OptionsDict& options) 96 | : delay_ms_(options.GetOrDefault("delay", 0)), 97 | seed_(options.GetOrDefault("seed", 0)), 98 | uniform_mode_(options.GetOrDefault("uniform", false)) {} 99 | std::unique_ptr NewComputation() override { 100 | return std::make_unique(delay_ms_, seed_, uniform_mode_); 101 | } 102 | 103 | private: 104 | int delay_ms_ = 0; 105 | int seed_ = 0; 106 | bool uniform_mode_ = false; 107 | }; 108 | } // namespace 109 | 110 | std::unique_ptr MakeRandomNetwork(const WeightsFile& /*weights*/, 111 | const OptionsDict& options) { 112 | return std::make_unique(options); 113 | } 114 | 115 | REGISTER_NETWORK("random", MakeRandomNetwork, -900) 116 | 117 | } // namespace lczero 118 | -------------------------------------------------------------------------------- /src/neural/network_rr.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "neural/factory.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include "utils/exception.h" 34 | 35 | namespace lczero { 36 | namespace { 37 | 38 | class RoundRobinNetwork : public Network { 39 | public: 40 | RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) { 41 | const auto parents = options.ListSubdicts(); 42 | if (parents.empty()) { 43 | // If options are empty, or multiplexer configured in root object, 44 | // initialize on root object and default backend. 45 | auto backends = NetworkFactory::Get()->GetBackendsList(); 46 | AddBackend(backends[0], weights, options); 47 | } 48 | 49 | for (const auto& name : parents) { 50 | AddBackend(name, weights, options.GetSubdict(name)); 51 | } 52 | } 53 | 54 | void AddBackend(const std::string& name, const WeightsFile& weights, 55 | const OptionsDict& opts) { 56 | const std::string backend = opts.GetOrDefault("backend", name); 57 | 58 | networks_.emplace_back( 59 | NetworkFactory::Get()->Create(backend, weights, opts)); 60 | } 61 | 62 | std::unique_ptr NewComputation() override { 63 | const long long val = ++counter_; 64 | return networks_[val % networks_.size()]->NewComputation(); 65 | } 66 | 67 | ~RoundRobinNetwork() {} 68 | 69 | private: 70 | std::vector> networks_; 71 | std::atomic counter_; 72 | }; 73 | 74 | std::unique_ptr MakeRoundRobinNetwork(const WeightsFile& weights, 75 | const OptionsDict& options) { 76 | return std::make_unique(weights, options); 77 | } 78 | 79 | REGISTER_NETWORK("roundrobin", MakeRoundRobinNetwork, -999) 80 | 81 | } // namespace 82 | } // namespace lczero 83 | -------------------------------------------------------------------------------- /src/neural/network_st_batch.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "neural/network_st_batch.h" 29 | 30 | #include 31 | 32 | namespace lczero { 33 | 34 | SingleThreadBatchingNetwork::SingleThreadBatchingNetwork( 35 | std::unique_ptr parent) 36 | : parent_(std::move(parent)) {} 37 | 38 | std::unique_ptr 39 | SingleThreadBatchingNetwork::NewComputation() { 40 | ++computations_pending_; 41 | return std::make_unique(this); 42 | } 43 | 44 | void SingleThreadBatchingNetwork::Reset() { 45 | assert(computations_pending_ == 0); 46 | parent_computation_ = parent_->NewComputation(); 47 | } 48 | 49 | SingleThreadBatchingNetworkComputation::SingleThreadBatchingNetworkComputation( 50 | SingleThreadBatchingNetwork* network) 51 | : network_(network), 52 | start_idx_(network_->parent_computation_->GetBatchSize()) {} 53 | 54 | void SingleThreadBatchingNetworkComputation::AddInput(InputPlanes&& input) { 55 | assert(start_idx_ + batch_size_ == 56 | network_->parent_computation_->GetBatchSize()); 57 | ++batch_size_; 58 | network_->parent_computation_->AddInput(std::move(input)); 59 | } 60 | 61 | void SingleThreadBatchingNetworkComputation::ComputeBlocking() { 62 | if (--network_->computations_pending_ == 0) 63 | network_->parent_computation_->ComputeBlocking(); 64 | } 65 | 66 | float SingleThreadBatchingNetworkComputation::GetQVal(int sample) const { 67 | return network_->parent_computation_->GetQVal(sample - start_idx_); 68 | } 69 | 70 | float SingleThreadBatchingNetworkComputation::GetDVal(int sample) const { 71 | return network_->parent_computation_->GetDVal(sample - start_idx_); 72 | } 73 | 74 | float SingleThreadBatchingNetworkComputation::GetPVal(int sample, 75 | int move_id) const { 76 | return network_->parent_computation_->GetPVal(sample - start_idx_, move_id); 77 | } 78 | 79 | } // namespace lczero 80 | -------------------------------------------------------------------------------- /src/neural/network_st_batch.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include "neural/network.h" 31 | 32 | namespace lczero { 33 | 34 | // This is a network that helps to combine batches from multiple games running 35 | // is a single thread. Not thread safe. 36 | // Usage: 37 | // network.Reset(); // Creates new parent computation 38 | // computations = [] 39 | // multiple times: 40 | // x = network.NewComputation() 41 | // computations += x 42 | // x.AddInput(); 43 | // x.AddInput(); 44 | // x.AddInput(); 45 | // ... 46 | // for x in computations: 47 | // x.ComputeBlocking() // Only last call actually computes, and they are 48 | // // computed together in one batch. 49 | // for x in computations: 50 | // use(x) 51 | class SingleThreadBatchingNetwork : public Network { 52 | public: 53 | SingleThreadBatchingNetwork(std::unique_ptr parent); 54 | std::unique_ptr NewComputation() override; 55 | 56 | // Start a fresh batch. 57 | void Reset(); 58 | 59 | private: 60 | std::unique_ptr parent_; 61 | std::unique_ptr parent_computation_; 62 | int computations_pending_ = 0; 63 | friend class SingleThreadBatchingNetworkComputation; 64 | }; 65 | 66 | class SingleThreadBatchingNetworkComputation : public NetworkComputation { 67 | public: 68 | SingleThreadBatchingNetworkComputation(SingleThreadBatchingNetwork* network); 69 | 70 | // Adds a sample to the parent batch. 71 | void AddInput(InputPlanes&& input) override; 72 | // May not actually compute immediately. Instead computes when all 73 | // computations of the network called this. 74 | void ComputeBlocking() override; 75 | // Returns how many times AddInput() was called. 76 | int GetBatchSize() const override { return batch_size_; } 77 | // Returns Q value of @sample. 78 | float GetQVal(int sample) const override; 79 | float GetDVal(int sample) const override; 80 | // Returns P value @move_id of @sample. 81 | float GetPVal(int sample, int move_id) const override; 82 | 83 | private: 84 | SingleThreadBatchingNetwork* const network_; 85 | int start_idx_; 86 | int batch_size_ = 0; 87 | }; 88 | 89 | } // namespace lczero 90 | -------------------------------------------------------------------------------- /src/neural/opencl/OpenCLBuffers.h: -------------------------------------------------------------------------------- 1 | /* 2 | Originally from the Leela Zero project. 3 | Copyright (C) 2017 Gian-Carlo Pascutto 4 | 5 | This file is part of Leela Chess Zero. 6 | Copyright (C) 2018-2019 The LCZero Authors 7 | 8 | Leela Chess is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | Leela Chess is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with Leela Chess. If not, see . 20 | */ 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include "neural/opencl/OpenCL.h" 38 | #include "neural/opencl/OpenCLParams.h" 39 | #include "neural/opencl/OpenCLTuner.h" 40 | #include "utils/logging.h" 41 | 42 | class OpenCL_Network; 43 | 44 | class OpenCLBuffers { 45 | friend class OpenCL; 46 | friend class OpenCL_Network; 47 | 48 | public: 49 | OpenCLBuffers(const OpenCL_Network& opencl_net); 50 | 51 | void forward(const std::vector& input, std::vector& output_pol, 52 | std::vector& output_val, const int batch_size); 53 | 54 | private: 55 | using weight_slice_t = std::vector::const_iterator; 56 | 57 | void convolve3(int channels, int outputs, cl::Buffer& bufferIn, 58 | cl::Buffer& bufferOut, cl::Buffer& bufferV, 59 | cl::Buffer& bufferM, weight_slice_t weights, 60 | cl::Buffer* bufferResidual, weight_slice_t biases, 61 | bool skip_in_transform, bool fuse_in_transform, 62 | bool store_inout, bool relu, int batch_size); 63 | 64 | void convolve1(int channels, int outputs, cl::Buffer& bufferInput, 65 | cl::Buffer& bufferOutput, cl::Buffer& bufferMerge, 66 | weight_slice_t weights, weight_slice_t biases, int batch_size); 67 | 68 | void innerproduct(cl::Buffer& input, weight_slice_t weights, 69 | weight_slice_t biases, cl::Buffer& output, const int inputs, 70 | const int outputs, const int relu, int batch_size); 71 | 72 | void squeeze_excitation(int channels, int fc_outputs, cl::Buffer& bufferIn, 73 | cl::Buffer& bufferTemp1, cl::Buffer& bufferTemp2, 74 | weight_slice_t weights, cl::Buffer& bufferResidual, 75 | int batch_size); 76 | 77 | void policymap(int N, const cl::Buffer& input, cl::Buffer& output, 78 | const cl::Buffer& indices, int inputSize, int usedSize, 79 | int outputSize); 80 | 81 | const OpenCL_Network& m_opencl_net; 82 | const OpenCL& m_opencl; 83 | 84 | cl::CommandQueue m_commandqueue; 85 | cl::Kernel m_convolve1_kernel; 86 | cl::Kernel m_merge_kernel; 87 | cl::Kernel m_in_transform_kernel; 88 | cl::Kernel m_sgemm_kernel; 89 | cl::Kernel m_sgemv_kernel; 90 | cl::Kernel m_out_transform_bn_kernel; 91 | cl::Kernel m_out_transform_bn_in_kernel; 92 | cl::Kernel m_global_avg_pooling_kernel; 93 | cl::Kernel m_apply_se_kernel; 94 | cl::Kernel m_policymap_kernel; 95 | cl::Buffer m_inBuffer; 96 | cl::Buffer m_inBuffer2; 97 | cl::Buffer m_VBuffer; 98 | cl::Buffer m_MBuffer; 99 | cl::Buffer m_pool_buffer; 100 | cl::Buffer m_pinnedOutBuffer_pol; 101 | cl::Buffer m_pinnedOutBuffer_val; 102 | }; 103 | -------------------------------------------------------------------------------- /src/neural/opencl/OpenCLParams.h: -------------------------------------------------------------------------------- 1 | /* 2 | Originally from the Leela Zero project. 3 | Copyright (C) 2017 Gian-Carlo Pascutto 4 | 5 | This file is part of Leela Chess Zero. 6 | Copyright (C) 2018 The LCZero Authors 7 | 8 | Leela Chess is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | Leela Chess is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with Leela Chess. If not, see . 20 | */ 21 | 22 | #pragma once 23 | 24 | struct OpenCLParams { 25 | int gpuId = -1; 26 | 27 | bool tune_only = false; 28 | bool force_tune = false; 29 | bool tune_exhaustive = false; 30 | int tune_batch_size = 1; 31 | }; 32 | -------------------------------------------------------------------------------- /src/neural/opencl/OpenCLTuner.h: -------------------------------------------------------------------------------- 1 | /* 2 | Originally from the Leela Zero project. 3 | Copyright (C) 2017 Gian-Carlo Pascutto 4 | 5 | This file is part of Leela Chess Zero. 6 | Copyright (C) 2018 The LCZero Authors 7 | 8 | Leela Chess is free software: you can redistribute it and/or modify 9 | it under the terms of the GNU General Public License as published by 10 | the Free Software Foundation, either version 3 of the License, or 11 | (at your option) any later version. 12 | 13 | Leela Chess is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with Leela Chess. If not, see . 20 | */ 21 | 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "OpenCLParams.h" 29 | 30 | using Configurations = std::pair>; 31 | using TuneParameters = std::map; 32 | 33 | class OpenCL; 34 | 35 | class Tuner { 36 | OpenCL& m_opencl; 37 | const OpenCLParams& m_params; 38 | cl::Context m_context; 39 | cl::Device m_device; 40 | 41 | public: 42 | std::string tune_sgemm(const int m, const int n, const int k, 43 | const int batch_size, const int runs = 4); 44 | std::string load_sgemm_tuners(const int m, const int n, const int k, 45 | const int batch_size); 46 | 47 | static constexpr auto TUNER_VERSION = 0; 48 | Tuner(OpenCL& opencl, const OpenCLParams& params, cl::Context context, 49 | cl::Device device) 50 | : m_opencl(opencl), 51 | m_params(params), 52 | m_context(context), 53 | m_device(device) {} 54 | 55 | private: 56 | void store_sgemm_tuners(const int m, const int n, const int k, 57 | const int batch_size, std::string tuners); 58 | bool valid_config_sgemm(TuneParameters p, bool exhaustive); 59 | std::string parameters_to_defines(const TuneParameters& p); 60 | std::string parameters_to_string(const TuneParameters& p); 61 | TuneParameters get_parameters_by_int(const std::vector& opts, 62 | const int n); 63 | std::string sgemm_tuners_from_line(std::string line, const int m, const int n, 64 | const int k, const int batch_size); 65 | }; 66 | -------------------------------------------------------------------------------- /src/neural/opencl/README.md: -------------------------------------------------------------------------------- 1 | The files in this directory comprise the OpenCL backend of Lc0. 2 | 3 | ## License 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | **The source files of this directory are not covered by any additional 19 | permission.** 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/neural/opencl/clblast_level3/xgemm_batched.opencl: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This file contains the batched version of the non-direct GEMM kernel. See part 1 for information 11 | // about the non-batched version of the kernel. 12 | // 13 | // ================================================================================================= 14 | 15 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 16 | // literal). Comment-out this line for syntax-highlighting when developing. 17 | R"( 18 | 19 | // ================================================================================================= 20 | 21 | // Main entry point of the kernel. This is the regular full version. 22 | __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) 23 | void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK, 24 | const __global realM* restrict agm, 25 | const __global realN* restrict bgm, 26 | __global realM* restrict cgm) { 27 | const int batch = get_group_id(2); 28 | 29 | // Sets the offsets 30 | const int a_offset = kSizeM*kSizeK*batch; 31 | const int b_offset = kSizeK*kSizeN*batch; 32 | const int c_offset = kSizeM*kSizeN*batch; 33 | const __global realM* restrict agm_ = &agm[a_offset / VWM]; 34 | const __global realN* restrict bgm_ = &bgm[b_offset / VWN]; 35 | __global realM* restrict cgm_ = &cgm[c_offset / VWM]; 36 | 37 | // Allocates workgroup-private memory (local memory) 38 | #if SA == 1 39 | __local realM alm[KWG * MWG/VWM]; 40 | #endif 41 | #if SB == 1 42 | __local realN blm[KWG * NWG/VWN]; 43 | #endif 44 | 45 | // Computes the matrix-multiplication and stores the result in global memory 46 | #if SA == 1 && SB == 1 47 | XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alm, blm); 48 | #elif SA == 1 49 | XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alm); 50 | #elif SB == 1 51 | XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, blm); 52 | #else 53 | XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_); 54 | #endif 55 | } 56 | 57 | // ================================================================================================= 58 | 59 | // End of the C++11 raw string literal 60 | )" 61 | 62 | // ================================================================================================= 63 | -------------------------------------------------------------------------------- /src/neural/opencl/clblast_level3/xgemm_part2.opencl: -------------------------------------------------------------------------------- 1 | 2 | // ================================================================================================= 3 | // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 | // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 | // width of 100 characters per line. 6 | // 7 | // Author(s): 8 | // Cedric Nugteren 9 | // 10 | // This is part 2 of 4 of the GEMM kernel. See part 1 for more information. 11 | // 12 | // ================================================================================================= 13 | 14 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 15 | // literal). Comment-out this line for syntax-highlighting when developing. 16 | R"( 17 | 18 | // ================================================================================================= 19 | 20 | // The vectorised multiply-add function 21 | INLINE_FUNC realM MultiplyAddVector(realM cvec, const realM avec, const real bval) { 22 | #if USE_VECTOR_MAD == 1 23 | cvec += avec * bval; 24 | #else 25 | #if VWM == 1 26 | MultiplyAdd(cvec, avec, bval); 27 | #elif VWM == 2 28 | MultiplyAdd(cvec.x , avec.x, bval); 29 | MultiplyAdd(cvec.y , avec.y, bval); 30 | #elif VWM == 4 31 | MultiplyAdd(cvec.x , avec.x, bval); 32 | MultiplyAdd(cvec.y , avec.y, bval); 33 | MultiplyAdd(cvec.z , avec.z, bval); 34 | MultiplyAdd(cvec.w , avec.w, bval); 35 | #elif VWM == 8 36 | MultiplyAdd(cvec.s0, avec.s0, bval); 37 | MultiplyAdd(cvec.s1, avec.s1, bval); 38 | MultiplyAdd(cvec.s2, avec.s2, bval); 39 | MultiplyAdd(cvec.s3, avec.s3, bval); 40 | MultiplyAdd(cvec.s4, avec.s4, bval); 41 | MultiplyAdd(cvec.s5, avec.s5, bval); 42 | MultiplyAdd(cvec.s6, avec.s6, bval); 43 | MultiplyAdd(cvec.s7, avec.s7, bval); 44 | #elif VWM == 16 45 | MultiplyAdd(cvec.s0, avec.s0, bval); 46 | MultiplyAdd(cvec.s1, avec.s1, bval); 47 | MultiplyAdd(cvec.s2, avec.s2, bval); 48 | MultiplyAdd(cvec.s3, avec.s3, bval); 49 | MultiplyAdd(cvec.s4, avec.s4, bval); 50 | MultiplyAdd(cvec.s5, avec.s5, bval); 51 | MultiplyAdd(cvec.s6, avec.s6, bval); 52 | MultiplyAdd(cvec.s7, avec.s7, bval); 53 | MultiplyAdd(cvec.s8, avec.s8, bval); 54 | MultiplyAdd(cvec.s9, avec.s9, bval); 55 | MultiplyAdd(cvec.sA, avec.sA, bval); 56 | MultiplyAdd(cvec.sB, avec.sB, bval); 57 | MultiplyAdd(cvec.sC, avec.sC, bval); 58 | MultiplyAdd(cvec.sD, avec.sD, bval); 59 | MultiplyAdd(cvec.sE, avec.sE, bval); 60 | MultiplyAdd(cvec.sF, avec.sF, bval); 61 | #endif 62 | #endif 63 | return cvec; 64 | } 65 | 66 | // ================================================================================================= 67 | 68 | // Merges the results in Cpm with the global array in Cgm. 69 | INLINE_FUNC void StoreResults(__global realM* cgm, realM cpm[NWI*MWI/VWM], const int kSizeM) { 70 | #pragma unroll 71 | for (int _ni = 0; _ni < NWI; _ni += 1) { 72 | #pragma unroll 73 | for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { 74 | #if STRM == 0 75 | int mg = _mi + get_local_id(0)*(MWI/VWM); 76 | #elif STRM == 1 77 | int mg = get_local_id(0) + _mi*MDIMC; 78 | #endif 79 | #if STRN == 0 80 | int ng = _ni + get_local_id(1)*NWI; 81 | #elif STRN == 1 82 | int ng = _ni%VWN + get_local_id(1)*VWN + (_ni/VWN)*VWN*NDIMC; 83 | #endif 84 | int idm = mg + GetGroupID0() * (MWG/VWM); 85 | int idn = ng + GetGroupID1() * NWG; 86 | int index = idn*(kSizeM/VWM) + idm; 87 | 88 | cgm[index] = cpm[_ni * (MWI/VWM) + _mi]; 89 | 90 | } 91 | } 92 | } 93 | 94 | // ================================================================================================= 95 | 96 | // End of the C++11 raw string literal 97 | )" 98 | 99 | // ================================================================================================= 100 | -------------------------------------------------------------------------------- /src/neural/opencl/clsource/config.opencl: -------------------------------------------------------------------------------- 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 2 | // literal). Comment-out this line for syntax-highlighting when developing. 3 | R"( 4 | 5 | typedef float net_t; 6 | #define vload_net_t(offset,p) ((p)[(offset)]) 7 | #define vstore_net_t(data,offset,p) (((p)[(offset)])=(data)) 8 | 9 | #define BOARD_SIZE 8 10 | #define BOARD_SQUARES (BOARD_SIZE*BOARD_SIZE) 11 | 12 | // End of the C++11 raw string literal 13 | )" 14 | -------------------------------------------------------------------------------- /src/neural/opencl/clsource/policymap.opencl: -------------------------------------------------------------------------------- 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 2 | // literal). Comment-out this line for syntax-highlighting when developing. 3 | 4 | R"( 5 | __kernel void policymap( 6 | __global const net_t * restrict input, 7 | __global net_t * restrict output, 8 | __global short* restrict indices, 9 | const int N, 10 | const int inputSize, 11 | const int usedSize, 12 | const int outputSize) { 13 | 14 | int tid = get_global_id(0); 15 | 16 | int n = tid / usedSize; 17 | int i = tid % usedSize; 18 | 19 | if (n >= N) return; 20 | 21 | int j = indices[i]; 22 | 23 | if (j >= 0) { 24 | output[n * outputSize + j] = input[n * inputSize + i]; 25 | } 26 | } 27 | // End of the C++11 raw string literal 28 | )" 29 | -------------------------------------------------------------------------------- /src/neural/opencl/clsource/se.opencl: -------------------------------------------------------------------------------- 1 | // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string 2 | // literal). Comment-out this line for syntax-highlighting when developing. 3 | 4 | R"( 5 | __kernel void global_avg_pooling( 6 | const int channels, 7 | __global const net_t * restrict in, 8 | __global net_t * restrict out) { 9 | 10 | const int col = get_global_id(0); // column 11 | const int c = get_global_id(1); // channel 12 | 13 | const int lid = get_local_id(0); 14 | 15 | __local net_t row_acc[BOARD_SIZE]; 16 | 17 | if (c < channels && col < BOARD_SIZE) { 18 | 19 | net_t acc = 0.0f; 20 | 21 | for ( int i = 0; i < BOARD_SIZE; i++) { 22 | acc += vload_net_t(c * BOARD_SQUARES + i * BOARD_SIZE + col, in); 23 | } 24 | row_acc[lid] = acc; 25 | } 26 | 27 | barrier(CLK_LOCAL_MEM_FENCE); 28 | 29 | if (lid == 0) { 30 | net_t acc = 0.0f; 31 | for ( int i = 0; i < BOARD_SIZE; i++) { 32 | acc += row_acc[i]; 33 | } 34 | acc = acc/BOARD_SQUARES; 35 | vstore_net_t(acc, c, out); 36 | } 37 | } 38 | 39 | __kernel void apply_se( 40 | const int channels, 41 | const int batch_size, 42 | __global const net_t * restrict input, 43 | __global net_t * restrict residual, 44 | __constant const net_t * restrict fc_out) { 45 | 46 | const int col = get_global_id(0); // column 47 | const int c = get_global_id(1); // channel 48 | 49 | const int batch = c / channels; 50 | 51 | if (c < batch_size * channels && col < BOARD_SIZE) { 52 | net_t gamma = vload_net_t(c + batch * channels, fc_out); 53 | gamma = 1.0f/(1.0f + exp(-gamma)); // Sigmoid 54 | net_t beta = vload_net_t(c + batch * channels + channels, fc_out); 55 | 56 | for ( int i = 0; i < BOARD_SIZE; i++) { 57 | const int idx = c * BOARD_SQUARES + i * BOARD_SIZE + col; 58 | const net_t in = vload_net_t(idx, input); 59 | const net_t res = vload_net_t(idx, residual); 60 | 61 | net_t val = gamma * in + res + beta; 62 | 63 | val = val > 0.0f ? val : 0.0f; 64 | 65 | vstore_net_t(val, idx, residual); 66 | } 67 | } 68 | } 69 | // End of the C++11 raw string literal 70 | )" 71 | -------------------------------------------------------------------------------- /src/neural/shared/activation.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/shared/activation.h" 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | namespace { 26 | constexpr int kWidth = 8; 27 | constexpr int kHeight = 8; 28 | constexpr int kSquares = kWidth * kHeight; 29 | } // namespace 30 | 31 | void SoftmaxActivation(const size_t size, const float* input, float* output) { 32 | auto alpha = *std::max_element(input, input + size); 33 | 34 | auto denom = 0.0f; 35 | for (size_t i = 0; i < size; i++) { 36 | auto val = std::exp(input[i] - alpha); 37 | output[i] = val; 38 | denom += val; 39 | } 40 | for (size_t i = 0; i < size; i++) { 41 | output[i] = output[i] / denom; 42 | } 43 | } 44 | 45 | void BiasResidualRelu(const size_t batch_size, const size_t channels, 46 | float* data, const float* biases, 47 | const float* eltwise, 48 | const bool relu) { 49 | for (size_t i = 0; i < batch_size; i++) { 50 | for (size_t c = 0; c < channels; ++c) { 51 | auto bias = biases[c]; 52 | 53 | if (eltwise == nullptr) { 54 | auto arr = &data[c * kSquares]; 55 | for (size_t b = 0; b < kSquares; b++) { 56 | float val = arr[b] + bias; 57 | if (relu) { 58 | val = val > 0 ? val : 0; 59 | } 60 | arr[b] = val; 61 | } 62 | } else { 63 | auto arr = &data[c * kSquares]; 64 | auto res = &eltwise[c * kSquares]; 65 | for (size_t b = 0; b < kSquares; b++) { 66 | float val = res[b] + arr[b] + bias; 67 | if (relu) { 68 | val = val > 0 ? val : 0; 69 | } 70 | arr[b] = val; 71 | } 72 | } 73 | } 74 | data += channels * kSquares; 75 | if (eltwise != nullptr) eltwise += channels * kSquares; 76 | } 77 | } 78 | } // namespace lczero 79 | -------------------------------------------------------------------------------- /src/neural/shared/activation.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | 26 | // Softmax activation 27 | void SoftmaxActivation(const size_t size, const float* input, float* output); 28 | 29 | void BiasResidualRelu(const size_t batch_size, const size_t channels, 30 | float* data, const float* biases, 31 | const float* eltwise = nullptr, 32 | const bool relu = true); 33 | } // namespace lczero 34 | -------------------------------------------------------------------------------- /src/neural/shared/winograd_filter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "neural/shared/winograd_filter.h" 20 | 21 | #include 22 | 23 | namespace lczero { 24 | namespace { 25 | 26 | static constexpr auto kWinogradAlpha = 4; 27 | static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha; 28 | 29 | } // namespace 30 | 31 | std::vector WinogradFilterZeropadU(const std::vector& U, 32 | const size_t outputs, 33 | const size_t channels, 34 | const size_t outputs_pad, 35 | const size_t channels_pad) { 36 | // Fill with zeroes. 37 | auto Upad = std::vector(kWinogradTile * outputs_pad * channels_pad); 38 | 39 | for (size_t o = 0; o < outputs; o++) { 40 | for (size_t c = 0; c < channels; c++) { 41 | for (size_t xi = 0; xi < kWinogradAlpha; xi++) { 42 | for (size_t nu = 0; nu < kWinogradAlpha; nu++) { 43 | Upad[xi * (kWinogradAlpha * outputs_pad * channels_pad) + 44 | nu * (outputs_pad * channels_pad) + c * outputs_pad + o] = 45 | U[xi * (kWinogradAlpha * outputs * channels) + 46 | nu * (outputs * channels) + c * outputs + o]; 47 | } 48 | } 49 | } 50 | } 51 | return Upad; 52 | } 53 | 54 | std::vector WinogradFilterTransformF(const std::vector& f, 55 | const size_t outputs, 56 | const size_t channels) { 57 | // F(2x2, 3x3) Winograd filter transformation 58 | // transpose(G.dot(f).dot(G.transpose())) 59 | // U matrix is transposed for better memory layout in SGEMM 60 | auto U = std::vector(kWinogradTile * outputs * channels); 61 | auto G = std::array{1.0, 0.0, 0.0, 0.5, 0.5, 0.5, 62 | 0.5, -0.5, 0.5, 0.0, 0.0, 1.0}; 63 | auto temp = std::array{}; 64 | 65 | for (size_t o = 0; o < outputs; o++) { 66 | for (size_t c = 0; c < channels; c++) { 67 | for (size_t i = 0; i < 4; i++) { 68 | for (size_t j = 0; j < 3; j++) { 69 | auto acc = 0.0f; 70 | for (size_t k = 0; k < 3; k++) { 71 | acc += G[i * 3 + k] * f[o * channels * 9 + c * 9 + k * 3 + j]; 72 | } 73 | temp[i * 3 + j] = acc; 74 | } 75 | } 76 | 77 | for (size_t xi = 0; xi < 4; xi++) { 78 | for (size_t nu = 0; nu < 4; nu++) { 79 | auto acc = 0.0f; 80 | for (size_t k = 0; k < 3; k++) { 81 | acc += temp[xi * 3 + k] * G[nu * 3 + k]; 82 | } 83 | U[xi * (4 * outputs * channels) + nu * (outputs * channels) + 84 | c * outputs + o] = acc; 85 | } 86 | } 87 | } 88 | } 89 | return U; 90 | } 91 | 92 | } // namespace lczero 93 | -------------------------------------------------------------------------------- /src/neural/shared/winograd_filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace lczero { 25 | 26 | // Here are BLAS-free methods to setup the filter 27 | // for the 3x3 winograd convolution algorithm. 28 | // 29 | // Ref: 30 | // 31 | // Fast Algorithms for Convolutional Neural Networks 32 | // https://arxiv.org/abs/1509.09308 33 | // 34 | // https://ai.intel.com/winograd/ 35 | // https://ai.intel.com/winograd-2/ 36 | 37 | // Convolution filter for 3x3 Winograd algorithm 38 | 39 | // Create the zero-padded U matrix. 40 | std::vector WinogradFilterZeropadU(const std::vector& U, 41 | const size_t outputs, 42 | const size_t channels, 43 | const size_t outputs_pad, 44 | const size_t channels_pad); 45 | 46 | // Create the filter transform matrix. 47 | std::vector WinogradFilterTransformF(const std::vector& f, 48 | const size_t outputs, 49 | const size_t channels); 50 | 51 | } // namespace lczero 52 | -------------------------------------------------------------------------------- /src/neural/writer.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "neural/writer.h" 29 | 30 | #include 31 | #include 32 | #include "utils/commandline.h" 33 | #include "utils/exception.h" 34 | #include "utils/filesystem.h" 35 | #include "utils/random.h" 36 | 37 | namespace lczero { 38 | 39 | TrainingDataWriter::TrainingDataWriter(int game_id) { 40 | static std::string directory = 41 | CommandLine::BinaryDirectory() + "/data-" + Random::Get().GetString(12); 42 | // It's fine if it already exists. 43 | CreateDirectory(directory.c_str()); 44 | 45 | std::ostringstream oss; 46 | oss << directory << '/' << "game_" << std::setfill('0') << std::setw(6) 47 | << game_id << ".gz"; 48 | 49 | filename_ = oss.str(); 50 | fout_ = gzopen(filename_.c_str(), "wb"); 51 | if (!fout_) throw Exception("Cannot create gzip file " + filename_); 52 | } 53 | 54 | void TrainingDataWriter::WriteChunk(const V4TrainingData& data) { 55 | auto bytes_written = 56 | gzwrite(fout_, reinterpret_cast(&data), sizeof(data)); 57 | if (bytes_written != sizeof(data)) { 58 | throw Exception("Unable to write into " + filename_); 59 | } 60 | } 61 | 62 | void TrainingDataWriter::Finalize() { 63 | gzclose(fout_); 64 | fout_ = nullptr; 65 | } 66 | 67 | } // namespace lczero 68 | -------------------------------------------------------------------------------- /src/neural/writer.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include 29 | #include 30 | #include "utils/cppattributes.h" 31 | 32 | #pragma once 33 | 34 | namespace lczero { 35 | 36 | #pragma pack(push, 1) 37 | 38 | struct V4TrainingData { 39 | uint32_t version; 40 | float probabilities[1858]; 41 | uint64_t planes[104]; 42 | uint8_t castling_us_ooo; 43 | uint8_t castling_us_oo; 44 | uint8_t castling_them_ooo; 45 | uint8_t castling_them_oo; 46 | uint8_t side_to_move; 47 | uint8_t rule50_count; 48 | uint8_t move_count; 49 | int8_t result; 50 | float root_q; 51 | float best_q; 52 | float root_d; 53 | float best_d; 54 | } PACKED_STRUCT; 55 | static_assert(sizeof(V4TrainingData) == 8292, "Wrong struct size"); 56 | 57 | #pragma pack(pop) 58 | 59 | class TrainingDataWriter { 60 | public: 61 | // Creates a new file to write in data directory. It will has @game_id 62 | // somewhere in the filename. 63 | TrainingDataWriter(int game_id); 64 | 65 | ~TrainingDataWriter() { 66 | if (fout_) Finalize(); 67 | } 68 | 69 | // Writes a chunk. 70 | void WriteChunk(const V4TrainingData& data); 71 | 72 | // Flushes file and closes it. 73 | void Finalize(); 74 | 75 | // Gets full filename of the file written. 76 | std::string GetFileName() const { return filename_; } 77 | 78 | private: 79 | std::string filename_; 80 | gzFile fout_; 81 | }; 82 | 83 | } // namespace lczero 84 | -------------------------------------------------------------------------------- /src/selfplay/game.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include "chess/position.h" 31 | #include "chess/uciloop.h" 32 | #include "mcts/search.h" 33 | #include "neural/cache.h" 34 | #include "neural/network.h" 35 | #include "utils/optionsparser.h" 36 | 37 | namespace lczero { 38 | 39 | struct SelfPlayLimits : SearchLimits { 40 | // Movetime 41 | std::int64_t movetime; 42 | }; 43 | 44 | struct PlayerOptions { 45 | // Network to use by the player. 46 | Network* network; 47 | // Callback when player moves. 48 | BestMoveInfo::Callback best_move_callback; 49 | // Callback when player outputs info. 50 | ThinkingInfo::Callback info_callback; 51 | // NNcache to use. 52 | NNCache* cache; 53 | // User options dictionary. 54 | const OptionsDict* uci_options; 55 | // Limits to use for every move. 56 | SelfPlayLimits search_limits; 57 | }; 58 | 59 | // Plays a single game vs itself. 60 | class SelfPlayGame { 61 | public: 62 | // Player options may point to the same network/cache/etc. 63 | // If shared_tree is true, search tree is reused between players. 64 | // (useful for training games). Otherwise the tree is separate for black 65 | // and white (useful i.e. when they use different networks). 66 | SelfPlayGame(PlayerOptions player1, PlayerOptions player2, bool shared_tree); 67 | 68 | // Populate command line options that it uses. 69 | static void PopulateUciParams(OptionsParser* options); 70 | 71 | // Starts the game and blocks until the game is finished. 72 | void Play(int white_threads, int black_threads, bool training, 73 | bool enable_resign = true); 74 | // Aborts the game currently played, doesn't matter if it's synchronous or 75 | // not. 76 | void Abort(); 77 | 78 | // Writes training data to a file. 79 | void WriteTrainingData(TrainingDataWriter* writer) const; 80 | 81 | GameResult GetGameResult() const { return game_result_; } 82 | std::vector GetMoves() const; 83 | // Gets the eval which required the biggest swing up to get the final outcome. 84 | // Eval is the expected outcome in the range 0<->1. 85 | float GetWorstEvalForWinnerOrDraw() const; 86 | 87 | private: 88 | // options_[0] is for white player, [1] for black. 89 | PlayerOptions options_[2]; 90 | // Node tree for player1 and player2. If the tree is shared between players, 91 | // tree_[0] == tree_[1]. 92 | std::shared_ptr tree_[2]; 93 | 94 | // Search that is currently in progress. Stored in members so that Abort() 95 | // can stop it. 96 | std::unique_ptr search_; 97 | bool abort_ = false; 98 | GameResult game_result_ = GameResult::UNDECIDED; 99 | // Track minimum eval for each player so that GetWorstEvalForWinnerOrDraw() 100 | // can be calculated after end of game. 101 | float min_eval_[2] = {1.0f, 1.0f}; 102 | std::mutex mutex_; 103 | 104 | // Training data to send. 105 | std::vector training_data_; 106 | }; 107 | 108 | } // namespace lczero 109 | -------------------------------------------------------------------------------- /src/selfplay/loop.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include "chess/uciloop.h" 32 | #include "selfplay/tournament.h" 33 | #include "utils/optionsparser.h" 34 | 35 | namespace lczero { 36 | 37 | class SelfPlayLoop : public UciLoop { 38 | public: 39 | SelfPlayLoop(); 40 | ~SelfPlayLoop(); 41 | 42 | void RunLoop() override; 43 | void CmdStart() override; 44 | void CmdUci() override; 45 | void CmdSetOption(const std::string& name, const std::string& value, 46 | const std::string& context) override; 47 | 48 | private: 49 | void SendGameInfo(const GameInfo& move); 50 | void SendTournament(const TournamentInfo& info); 51 | 52 | void EnsureOptionsSent(); 53 | OptionsParser options_; 54 | 55 | std::unique_ptr tournament_; 56 | std::unique_ptr thread_; 57 | }; 58 | 59 | } // namespace lczero 60 | -------------------------------------------------------------------------------- /src/selfplay/tournament.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include "selfplay/game.h" 32 | #include "utils/mutex.h" 33 | #include "utils/optionsdict.h" 34 | #include "utils/optionsparser.h" 35 | 36 | namespace lczero { 37 | 38 | // Runs many selfplay games, possibly in parallel. 39 | class SelfPlayTournament { 40 | public: 41 | SelfPlayTournament(const OptionsDict& options, 42 | BestMoveInfo::Callback best_move_info, 43 | ThinkingInfo::Callback thinking_info, 44 | GameInfo::Callback game_info, 45 | TournamentInfo::Callback tournament_info); 46 | 47 | // Populate command line options that it uses. 48 | static void PopulateOptions(OptionsParser* options); 49 | 50 | // Starts worker threads and exists immediately. 51 | void StartAsync(); 52 | 53 | // Starts tournament and waits until it finishes. 54 | void RunBlocking(); 55 | 56 | // Blocks until all worker threads finish. 57 | void Wait(); 58 | 59 | // Tells worker threads to finish ASAP. Does not block. 60 | void Abort(); 61 | 62 | // If there are ongoing games, aborts and waits. 63 | ~SelfPlayTournament(); 64 | 65 | private: 66 | void Worker(); 67 | void PlayOneGame(int game_id); 68 | 69 | Mutex mutex_; 70 | // Whether next game will be black for player1. 71 | bool next_game_black_ GUARDED_BY(mutex_) = false; 72 | // Number of games which already started. 73 | int games_count_ GUARDED_BY(mutex_) = 0; 74 | bool abort_ GUARDED_BY(mutex_) = false; 75 | // Games in progress. Exposed here to be able to abort them in case if 76 | // Abort(). Stored as list and not vector so that threads can keep iterators 77 | // to them and not worry that it becomes invalid. 78 | std::list> games_ GUARDED_BY(mutex_); 79 | // Place to store tournament stats. 80 | TournamentInfo tournament_info_ GUARDED_BY(mutex_); 81 | 82 | Mutex threads_mutex_; 83 | std::vector threads_ GUARDED_BY(threads_mutex_); 84 | 85 | // All those are [0] for player1 and [1] for player2 86 | // Shared pointers for both players may point to the same object. 87 | std::shared_ptr networks_[2]; 88 | std::shared_ptr cache_[2]; 89 | const OptionsDict player_options_[2]; 90 | SelfPlayLimits search_limits_[2]; 91 | 92 | BestMoveInfo::Callback best_move_callback_; 93 | ThinkingInfo::Callback info_callback_; 94 | GameInfo::Callback game_callback_; 95 | TournamentInfo::Callback tournament_callback_; 96 | const int kThreads[2]; 97 | const int kTotalGames; 98 | const bool kShareTree; 99 | const size_t kParallelism; 100 | const bool kTraining; 101 | const float kResignPlaythrough; 102 | }; 103 | 104 | } // namespace lczero 105 | -------------------------------------------------------------------------------- /src/utils/bititer.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | #include 30 | #ifdef _MSC_VER 31 | #include 32 | #endif 33 | 34 | namespace lczero { 35 | 36 | inline unsigned long GetLowestBit(std::uint64_t value) { 37 | #if defined(_MSC_VER) && defined(_WIN64) 38 | unsigned long result; 39 | _BitScanForward64(&result, value); 40 | return result; 41 | #elif defined(_MSC_VER) 42 | unsigned long result; 43 | if (value & 0xFFFFFFFF) { 44 | _BitScanForward(&result, value); 45 | } else { 46 | _BitScanForward(&result, value >> 32); 47 | result += 32; 48 | } 49 | return result; 50 | #else 51 | return __builtin_ctzll(value); 52 | #endif 53 | } 54 | 55 | // Iterates over all set bits of the value, lower to upper. The value of 56 | // dereferenced iterator is bit number (lower to upper, 0 bazed) 57 | template 58 | class BitIterator { 59 | public: 60 | BitIterator(std::uint64_t value) : value_(value){}; 61 | bool operator!=(const BitIterator& other) { return value_ != other.value_; } 62 | 63 | void operator++() { value_ &= (value_ - 1); } 64 | T operator*() const { return GetLowestBit(value_); } 65 | 66 | private: 67 | std::uint64_t value_; 68 | }; 69 | 70 | class IterateBits { 71 | public: 72 | IterateBits(std::uint64_t value) : value_(value) {} 73 | BitIterator begin() { return value_; } 74 | BitIterator end() { return 0; } 75 | 76 | private: 77 | std::uint64_t value_; 78 | }; 79 | 80 | } // namespace lczero 81 | -------------------------------------------------------------------------------- /src/utils/commandline.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/commandline.h" 29 | #include "utils/logging.h" 30 | 31 | namespace lczero { 32 | 33 | std::string CommandLine::binary_; 34 | std::vector CommandLine::arguments_; 35 | std::vector> CommandLine::modes_; 36 | 37 | void CommandLine::Init(int argc, const char** argv) { 38 | binary_ = argv[0]; 39 | arguments_.clear(); 40 | std::ostringstream params; 41 | for (int i = 1; i < argc; ++i) { 42 | params << ' ' << argv[i]; 43 | arguments_.push_back(argv[i]); 44 | } 45 | LOGFILE << "Command line: " << binary_ << params.str(); 46 | } 47 | 48 | bool CommandLine::ConsumeCommand(const std::string& command) { 49 | if (arguments_.empty()) return false; 50 | if (arguments_[0] != command) return false; 51 | arguments_.erase(arguments_.begin()); 52 | return true; 53 | } 54 | 55 | void CommandLine::RegisterMode(const std::string& mode, 56 | const std::string& description) { 57 | modes_.emplace_back(mode, description); 58 | } 59 | 60 | std::string CommandLine::BinaryDirectory() { 61 | std::string path = binary_; 62 | const auto pos = path.find_last_of("\\/"); 63 | if (pos == std::string::npos) { 64 | return "."; 65 | } 66 | path.resize(pos); 67 | return path; 68 | } 69 | 70 | } // namespace lczero 71 | -------------------------------------------------------------------------------- /src/utils/commandline.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | namespace lczero { 34 | 35 | class CommandLine { 36 | public: 37 | CommandLine() = delete; 38 | 39 | // This function must be called before any other. 40 | static void Init(int argc, const char** argv); 41 | 42 | // Name of the executable filename that was run. 43 | static const std::string& BinaryName() { return binary_; } 44 | 45 | // Directory where the binary is run. Without trailing slash. 46 | static std::string BinaryDirectory(); 47 | 48 | // If the first command line parameter is @command, remove it and return 49 | // true. Otherwise return false. 50 | static bool ConsumeCommand(const std::string& command); 51 | 52 | // Command line arguments. 53 | static const std::vector& Arguments() { return arguments_; } 54 | 55 | static void RegisterMode(const std::string& mode, 56 | const std::string& description); 57 | 58 | static const std::vector>& GetModes() { 59 | return modes_; 60 | } 61 | 62 | private: 63 | static std::string binary_; 64 | static std::vector arguments_; 65 | static std::vector> modes_; 66 | }; 67 | 68 | } // namespace lczero 69 | -------------------------------------------------------------------------------- /src/utils/configfile.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | namespace lczero { 34 | 35 | class OptionsParser; 36 | 37 | class ConfigFile { 38 | public: 39 | ConfigFile() = delete; 40 | 41 | // This function must be called after PopulateOptions. 42 | static bool Init(OptionsParser* options); 43 | 44 | // Returns the command line arguments from the config file. 45 | static const std::vector& Arguments() { return arguments_; } 46 | 47 | // Add the config file parameter to the options dictionary. 48 | static void PopulateOptions(OptionsParser* options); 49 | 50 | private: 51 | // Parses the config file into the arguments_ vector. 52 | static bool ParseFile(const std::string& filename, OptionsParser* options); 53 | 54 | static std::string ProcessConfigFlag(const std::vector& args); 55 | 56 | static std::vector arguments_; 57 | }; 58 | 59 | } // namespace lczero 60 | -------------------------------------------------------------------------------- /src/utils/cppattributes.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | // Enable thread safety attributes only with clang. 31 | // The attributes can be safely erased when compiling with other compilers. 32 | #if defined(__clang__) && (!defined(SWIG)) 33 | #define ATTRIBUTE__(x) __attribute__((x)) 34 | #else 35 | #define ATTRIBUTE__(x) // no-op 36 | #endif 37 | 38 | #define CAPABILITY(x) ATTRIBUTE__(capability(x)) 39 | #define SCOPED_CAPABILITY ATTRIBUTE__(scoped_lockable) 40 | #define GUARDED_BY(x) ATTRIBUTE__(guarded_by(x)) 41 | #define PT_GUARDED_BY(x) ATTRIBUTE__(pt_guarded_by(x)) 42 | #define ACQUIRED_BEFORE(...) ATTRIBUTE__(acquired_before(__VA_ARGS__)) 43 | #define ACQUIRED_AFTER(...) ATTRIBUTE__(acquired_after(__VA_ARGS__)) 44 | #define REQUIRES(...) ATTRIBUTE__(requires_capability(__VA_ARGS__)) 45 | #define REQUIRES_SHARED(...) \ 46 | ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) 47 | #define ACQUIRE(...) ATTRIBUTE__(acquire_capability(__VA_ARGS__)) 48 | #define ACQUIRE_SHARED(...) ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) 49 | #define RELEASE(...) ATTRIBUTE__(release_capability(__VA_ARGS__)) 50 | #define RELEASE_SHARED(...) ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) 51 | #define TRY_ACQUIRE(...) ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) 52 | #define TRY_ACQUIRE_SHARED(...) \ 53 | ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) 54 | #define EXCLUDES(...) ATTRIBUTE__(locks_excluded(__VA_ARGS__)) 55 | #define ASSERT_CAPABILITY(x) ATTRIBUTE__(assert_capability(x)) 56 | #define ASSERT_SHARED_CAPABILITY(x) ATTRIBUTE__(assert_shared_capability(x)) 57 | #define RETURN_CAPABILITY(x) ATTRIBUTE__(lock_returned(x)) 58 | #define PACKED_STRUCT ATTRIBUTE__(packed) 59 | 60 | #define NO_THREAD_SAFETY_ANALYSIS ATTRIBUTE__(no_thread_safety_analysis) 61 | -------------------------------------------------------------------------------- /src/utils/exception.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include "utils/logging.h" 32 | 33 | namespace lczero { 34 | 35 | // Exception to throw around. 36 | class Exception : public std::runtime_error { 37 | public: 38 | Exception(const std::string& what) : std::runtime_error(what) { 39 | LOGFILE << "Exception: " << what; 40 | } 41 | }; 42 | 43 | } // namespace lczero 44 | -------------------------------------------------------------------------------- /src/utils/fastmath.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018-2019 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | 32 | namespace lczero { 33 | // These stunts are performed by trained professionals, do not try this at home. 34 | 35 | // Fast approximate log2(x). Does no range checking. 36 | // The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f) 37 | // where N is the integer and f the fractional part, f>=0. 38 | inline float FastLog2(const float a) { 39 | uint32_t tmp; 40 | std::memcpy(&tmp, &a, sizeof(float)); 41 | uint32_t expb = tmp >> 23; 42 | tmp = (tmp & 0x7fffff) | (0x7f << 23); 43 | float out; 44 | std::memcpy(&out, &tmp, sizeof(float)); 45 | return out * (2.028011f - 0.342671f * out) - 128.68534f + expb; 46 | } 47 | 48 | // Fast approximate 2^x. Does only limited range checking. 49 | // The approximation used here is 2^(N+f) ~ 2^N*(1+f*(0.656366+0.343634*f)) 50 | // where N is the integer and f the fractional part, f>=0. 51 | inline float FastPow2(const float a) { 52 | if (a < -126) return 0.0; 53 | int32_t exp = floor(a); 54 | float out = a - exp; 55 | out = 1.0f + out * (0.656366f + 0.343634f * out); 56 | int32_t tmp; 57 | std::memcpy(&tmp, &out, sizeof(float)); 58 | tmp += static_cast(static_cast(exp) << 23); 59 | std::memcpy(&out, &tmp, sizeof(float)); 60 | return out; 61 | } 62 | 63 | // Fast approximate ln(x). Does no range checking. 64 | inline float FastLog(const float a) { 65 | return 0.6931471805599453f * FastLog2(a); 66 | } 67 | 68 | } // namespace lczero 69 | -------------------------------------------------------------------------------- /src/utils/filesystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | namespace lczero { 35 | 36 | // Creates directory at a given path. Throws exception if cannot. 37 | // Returns silently if already exists. 38 | void CreateDirectory(const std::string& path); 39 | 40 | // Returns list of full paths of regular files in this directory. 41 | // Silently returns empty vector on error. 42 | std::vector GetFileList(const std::string& directory); 43 | 44 | // Returns size of a file. Throws exception if file doesn't exist. 45 | uint64_t GetFileSize(const std::string& filename); 46 | 47 | // Returns modification time of a file. Throws exception if file doesn't exist. 48 | time_t GetFileTime(const std::string& filename); 49 | 50 | } // namespace lczero 51 | -------------------------------------------------------------------------------- /src/utils/filesystem.posix.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/exception.h" 29 | #include "utils/filesystem.h" 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | namespace lczero { 36 | 37 | void CreateDirectory(const std::string& path) { 38 | if (mkdir(path.c_str(), 0777) < 0 && errno != EEXIST) { 39 | throw Exception("Cannot create directory: " + path); 40 | } 41 | } 42 | 43 | std::vector GetFileList(const std::string& directory) { 44 | std::vector result; 45 | DIR* dir = opendir(directory.c_str()); 46 | if (!dir) return result; 47 | while (auto* entry = readdir(dir)) { 48 | bool exists = false; 49 | switch (entry->d_type) { 50 | case DT_REG: 51 | exists = true; 52 | break; 53 | case DT_LNK: 54 | // check that the soft link actually points to a regular file. 55 | const std::string filename = directory + "/" + entry->d_name; 56 | struct stat s; 57 | exists = 58 | stat(filename.c_str(), &s) == 0 && (s.st_mode & S_IFMT) == S_IFREG; 59 | break; 60 | } 61 | if (exists) result.push_back(entry->d_name); 62 | } 63 | closedir(dir); 64 | return result; 65 | } 66 | 67 | uint64_t GetFileSize(const std::string& filename) { 68 | struct stat s; 69 | if (stat(filename.c_str(), &s) < 0) { 70 | throw Exception("Cannot stat file: " + filename); 71 | } 72 | return s.st_size; 73 | } 74 | 75 | time_t GetFileTime(const std::string& filename) { 76 | struct stat s; 77 | if (stat(filename.c_str(), &s) < 0) { 78 | throw Exception("Cannot stat file: " + filename); 79 | } 80 | #ifdef __APPLE__ 81 | return s.st_mtimespec.tv_sec; 82 | #else 83 | return s.st_mtim.tv_sec; 84 | #endif 85 | } 86 | 87 | } // namespace lczero 88 | -------------------------------------------------------------------------------- /src/utils/filesystem.win32.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/exception.h" 29 | #include "utils/filesystem.h" 30 | 31 | #include 32 | #undef CreateDirectory 33 | 34 | namespace lczero { 35 | 36 | void CreateDirectory(const std::string& path) { 37 | if (CreateDirectoryA(path.c_str(), nullptr)) return; 38 | if (GetLastError() != ERROR_ALREADY_EXISTS) { 39 | throw Exception("Cannot create directory: " + path); 40 | } 41 | } 42 | 43 | std::vector GetFileList(const std::string& directory) { 44 | std::vector result; 45 | WIN32_FIND_DATAA dir; 46 | const auto handle = FindFirstFileA((directory + "\\*").c_str(), &dir); 47 | if (handle == INVALID_HANDLE_VALUE) return result; 48 | do { 49 | if ((dir.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) { 50 | result.emplace_back(dir.cFileName); 51 | } 52 | } while (FindNextFile(handle, &dir) != 0); 53 | FindClose(handle); 54 | return result; 55 | } 56 | 57 | uint64_t GetFileSize(const std::string& filename) { 58 | WIN32_FILE_ATTRIBUTE_DATA s; 59 | if (!GetFileAttributesExA(filename.c_str(), GetFileExInfoStandard, &s)) { 60 | throw Exception("Cannot stat file: " + filename); 61 | } 62 | return (static_cast(s.nFileSizeHigh) << 32) + s.nFileSizeLow; 63 | } 64 | 65 | time_t GetFileTime(const std::string& filename) { 66 | WIN32_FILE_ATTRIBUTE_DATA s; 67 | if (!GetFileAttributesExA(filename.c_str(), GetFileExInfoStandard, &s)) { 68 | throw Exception("Cannot stat file: " + filename); 69 | } 70 | return (static_cast(s.ftLastWriteTime.dwHighDateTime) << 32) + 71 | s.ftLastWriteTime.dwLowDateTime; 72 | } 73 | 74 | } // namespace lczero 75 | -------------------------------------------------------------------------------- /src/utils/hashcat.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include 29 | #include 30 | 31 | #pragma once 32 | namespace lczero { 33 | 34 | // Tries to scramble @val. 35 | inline uint64_t Hash(uint64_t val) { 36 | return 0xfad0d7f2fbb059f1ULL * (val + 0xbaad41cdcb839961ULL) + 37 | 0x7acec0050bf82f43ULL * ((val >> 31) + 0xd571b3a92b1b2755ULL); 38 | } 39 | 40 | // Appends value to a hash. 41 | inline uint64_t HashCat(uint64_t hash, uint64_t x) { 42 | hash ^= 0x299799adf0d95defULL + Hash(x) + (hash << 6) + (hash >> 2); 43 | return hash; 44 | } 45 | 46 | // Combines 64-bit values into concatenated hash. 47 | inline uint64_t HashCat(std::initializer_list args) { 48 | uint64_t hash = 0; 49 | for (uint64_t x : args) hash = HashCat(hash, x); 50 | return hash; 51 | } 52 | 53 | } // namespace lczero 54 | -------------------------------------------------------------------------------- /src/utils/hashcat_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/hashcat.h" 29 | #include 30 | 31 | namespace lczero { 32 | 33 | TEST(HashCat, TestCollision) { 34 | uint64_t hash1 = HashCat({0x8000000010500000, 0x4000080000002000, 35 | 0x8000000000002000, 0x4000000000000000}); 36 | uint64_t hash2 = HashCat({0x4000000010500000, 0x1000080000002000, 37 | 0x4000000000002000, 0x1000000000000000}); 38 | EXPECT_NE(hash1, hash2); 39 | } 40 | 41 | } // namespace lczero 42 | 43 | int main(int argc, char** argv) { 44 | ::testing::InitGoogleTest(&argc, argv); 45 | return RUN_ALL_TESTS(); 46 | } 47 | -------------------------------------------------------------------------------- /src/utils/histogram.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/histogram.h" 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | namespace lczero { 37 | 38 | namespace { 39 | void Print(const std::string& what) { std::cerr << what; } 40 | 41 | void PrintAligned(const std::string& what, int aligned) { 42 | std::cerr << std::right << std::setw(aligned) << what; 43 | } 44 | 45 | std::string Format(const std::string& format, double value) { 46 | static const int kMaxBufferSize = 32; 47 | char buffer[kMaxBufferSize]; 48 | const int len = snprintf(buffer, kMaxBufferSize, format.c_str(), value); 49 | return std::string(buffer, buffer + len); 50 | } 51 | } // namespace 52 | 53 | Histogram::Histogram() 54 | : Histogram(kDefaultMinExp, kDefaultMaxExp, kDefaultMinorScales) {} 55 | 56 | Histogram::Histogram(int min_exp, int max_exp, int minor_scales) 57 | : min_exp_(min_exp), 58 | max_exp_(max_exp), 59 | minor_scales_(minor_scales), 60 | major_scales_(max_exp_ - min_exp_ + 1), 61 | total_scales_(major_scales_ * minor_scales_), 62 | buckets_(total_scales_ + 4) { 63 | Clear(); 64 | } 65 | 66 | void Histogram::Clear() { 67 | std::fill(buckets_.begin(), buckets_.end(), 0); 68 | total_ = 0; 69 | max_ = 0; 70 | } 71 | 72 | void Histogram::Add(double value) { 73 | const int index = GetIndex(std::abs(value)); 74 | const int count = ++buckets_[index]; 75 | total_++; 76 | if (count > max_) max_ = count; 77 | } 78 | 79 | void Histogram::Dump() const { 80 | const double ymax = 0.02 + max_ / (double)total_; 81 | for (int i = 0; i < 100; i++) { 82 | const double yscale = 1 - i * 0.01; 83 | if (yscale > ymax) continue; 84 | const bool scale = i % 5 == 0; 85 | if (scale) { 86 | PrintAligned(Format("%.2g", yscale), 5); 87 | Print(" +"); 88 | } else { 89 | Print(" |"); 90 | } 91 | const double ymin = (99 - i) * 0.01; 92 | for (size_t j = 0; j < buckets_.size(); j++) { 93 | const double val = buckets_[j] / (double)total_; 94 | if (val > ymin) { 95 | Print("#"); 96 | } else { 97 | Print(" "); 98 | } 99 | } 100 | if (scale) { 101 | Print("+"); 102 | } else { 103 | Print("|"); 104 | } 105 | Print("\n"); 106 | } 107 | Print(" +"); 108 | for (int j = 0; j <= major_scales_; j++) { 109 | const int size = j == 0 ? 5 : minor_scales_; 110 | for (int k = 0; k < size - 1; k++) Print("-"); 111 | Print("+"); 112 | } 113 | Print("\n"); 114 | Print(" -inf"); 115 | for (int j = 0; j < major_scales_; j++) { 116 | const int size = j == 0 ? 5 : minor_scales_; 117 | Print(" "); 118 | PrintAligned(Format("%g", min_exp_ + j), size - 1); 119 | } 120 | Print(" "); 121 | PrintAligned("+inf", minor_scales_ - 2); 122 | Print(" \n"); 123 | } 124 | 125 | int Histogram::GetIndex(double val) const { 126 | if (val <= 0) return 0; 127 | const double log10 = std::log10(val); 128 | // 2: -15 : -15.1 ... -14.9 2 ... 3 129 | // 1: -15.3 ... -15.1 130 | // 0: -15.5 ... -15.3 0 ... 1 131 | const int index = 132 | static_cast(std::floor(2.5 + minor_scales_ * (log10 - min_exp_))); 133 | if (index < 0) return 0; 134 | if (index >= total_scales_) return total_scales_ + 3; 135 | return index + 2; 136 | } 137 | 138 | } // namespace lczero 139 | -------------------------------------------------------------------------------- /src/utils/histogram.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | namespace lczero { 35 | 36 | // Histogram with a logarithmic x-axis. 37 | // 38 | // 0.50 + 39 | // | 40 | // | 41 | // | 42 | // 0.40 | 43 | // 44 | // .... 45 | // 46 | // | 47 | // 0.10 + 48 | // | 49 | // |# 50 | // |# ## #| 51 | // |# # # #### # # # #| 52 | // 0.00 +----+----+----+----+---- ... +----+ 53 | // 54 | // -inf -15 -14 -13 -12 5 inf 55 | 56 | class Histogram { 57 | public: 58 | // Creates a histogram with default scales. 59 | Histogram(); 60 | 61 | // Creates a histogram from 10^min_exp to 10^max_exp 62 | // with minor_scales spacing. 63 | Histogram(int min_exp, int max_exp, int minor_scales); 64 | 65 | void Clear(); 66 | 67 | // Adds a sample. 68 | void Add(double value); 69 | 70 | // Dumps the histogram to stderr. 71 | void Dump() const; 72 | 73 | private: 74 | int GetIndex(double val) const; 75 | 76 | static constexpr int kDefaultMinExp = -15; 77 | static constexpr int kDefaultMaxExp = 5; 78 | static constexpr int kDefaultMinorScales = 5; 79 | 80 | const int min_exp_; 81 | const int max_exp_; 82 | const int minor_scales_; 83 | const int major_scales_; 84 | const int total_scales_; 85 | std::vector buckets_; 86 | double total_; 87 | double max_; 88 | }; 89 | 90 | } // namespace lczero 91 | -------------------------------------------------------------------------------- /src/utils/logging.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/logging.h" 29 | #include 30 | #include 31 | #include 32 | 33 | namespace lczero { 34 | 35 | namespace { 36 | size_t kBufferSizeLines = 200; 37 | const char* kStderrFilename = ""; 38 | } // namespace 39 | 40 | Logging& Logging::Get() { 41 | static Logging logging; 42 | return logging; 43 | } 44 | 45 | void Logging::WriteLineRaw(const std::string& line) { 46 | Mutex::Lock lock_(mutex_); 47 | if (filename_.empty()) { 48 | buffer_.push_back(line); 49 | if (buffer_.size() > kBufferSizeLines) buffer_.pop_front(); 50 | } else { 51 | auto& file = (filename_ == kStderrFilename) ? std::cerr : file_; 52 | file << line << std::endl; 53 | } 54 | } 55 | 56 | void Logging::SetFilename(const std::string& filename) { 57 | Mutex::Lock lock_(mutex_); 58 | if (filename_ == filename) return; 59 | filename_ = filename; 60 | if (filename.empty() || filename == kStderrFilename) { 61 | file_.close(); 62 | } 63 | if (filename.empty()) return; 64 | if (filename != kStderrFilename) file_.open(filename, std::ios_base::app); 65 | auto& file = (filename == kStderrFilename) ? std::cerr : file_; 66 | file << "\n\n============= Log started. =============" << std::endl; 67 | for (const auto& line : buffer_) file << line << std::endl; 68 | buffer_.clear(); 69 | } 70 | 71 | LogMessage::LogMessage(const char* file, int line) { 72 | *this << FormatTime(std::chrono::system_clock::now()) << ' ' 73 | << std::setfill(' ') << std::this_thread::get_id() << std::setfill('0') 74 | << ' ' << file << ':' << line << "] "; 75 | } 76 | 77 | LogMessage::~LogMessage() { Logging::Get().WriteLineRaw(str()); } 78 | 79 | StderrLogMessage::StderrLogMessage(const char* file, int line) 80 | : log_(file, line) {} 81 | 82 | StderrLogMessage::~StderrLogMessage() { 83 | std::cerr << str() << std::endl; 84 | log_ << str(); 85 | } 86 | 87 | std::chrono::time_point SteadyClockToSystemClock( 88 | std::chrono::time_point time) { 89 | return std::chrono::system_clock::now() + 90 | std::chrono::duration_cast( 91 | time - std::chrono::steady_clock::now()); 92 | } 93 | 94 | std::string FormatTime( 95 | std::chrono::time_point time) { 96 | std::ostringstream ss; 97 | using namespace std::chrono; 98 | const auto us = 99 | duration_cast(time.time_since_epoch()).count() % 1000000; 100 | auto timer = std::chrono::system_clock::to_time_t(time); 101 | ss << std::put_time(std::localtime(&timer), "%m%d %T") << '.' 102 | << std::setfill('0') << std::setw(6) << us; 103 | return ss.str(); 104 | } 105 | 106 | } // namespace lczero -------------------------------------------------------------------------------- /src/utils/logging.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "utils/mutex.h" 37 | 38 | namespace lczero { 39 | 40 | class Logging { 41 | public: 42 | static Logging& Get(); 43 | 44 | // Sets the name of the log. Empty name disables logging. 45 | void SetFilename(const std::string& filename); 46 | 47 | private: 48 | // Writes line to the log, and appends new line character. 49 | void WriteLineRaw(const std::string& line); 50 | 51 | Mutex mutex_; 52 | std::string filename_ GUARDED_BY(mutex_); 53 | std::ofstream file_ GUARDED_BY(mutex_); 54 | std::deque buffer_ GUARDED_BY(mutex_); 55 | 56 | Logging() = default; 57 | friend class LogMessage; 58 | }; 59 | 60 | class LogMessage : public std::ostringstream { 61 | public: 62 | LogMessage(const char* file, int line); 63 | ~LogMessage(); 64 | }; 65 | 66 | class StderrLogMessage : public std::ostringstream { 67 | public: 68 | StderrLogMessage(const char* file, int line); 69 | ~StderrLogMessage(); 70 | 71 | private: 72 | LogMessage log_; 73 | }; 74 | 75 | std::chrono::time_point SteadyClockToSystemClock( 76 | std::chrono::time_point time); 77 | 78 | std::string FormatTime(std::chrono::time_point time); 79 | } // namespace lczero 80 | 81 | #define LOGFILE ::lczero::LogMessage(__FILE__, __LINE__) 82 | #define CERR ::lczero::StderrLogMessage(__FILE__, __LINE__) -------------------------------------------------------------------------------- /src/utils/mutex.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include "utils/cppattributes.h" 34 | 35 | namespace lczero { 36 | 37 | // Implementation of reader-preferenced shared mutex. Based on fair shared 38 | // mutex. 39 | class CAPABILITY("mutex") RpSharedMutex { 40 | public: 41 | RpSharedMutex() : waiting_readers_(0) {} 42 | 43 | void lock() ACQUIRE() { 44 | while (true) { 45 | mutex_.lock(); 46 | if (waiting_readers_ == 0) return; 47 | mutex_.unlock(); 48 | } 49 | } 50 | void unlock() RELEASE() { mutex_.unlock(); } 51 | void lock_shared() ACQUIRE_SHARED() { 52 | ++waiting_readers_; 53 | mutex_.lock_shared(); 54 | } 55 | void unlock_shared() RELEASE_SHARED() { 56 | --waiting_readers_; 57 | mutex_.unlock_shared(); 58 | } 59 | 60 | private: 61 | std::shared_timed_mutex mutex_; 62 | std::atomic waiting_readers_; 63 | }; 64 | 65 | // std::mutex wrapper for clang thread safety annotation. 66 | class CAPABILITY("mutex") Mutex { 67 | public: 68 | // std::unique_lock wrapper. 69 | class SCOPED_CAPABILITY Lock { 70 | public: 71 | Lock(Mutex& m) ACQUIRE(m) : lock_(m.get_raw()) {} 72 | ~Lock() RELEASE() {} 73 | std::unique_lock& get_raw() { return lock_; } 74 | 75 | private: 76 | std::unique_lock lock_; 77 | }; 78 | 79 | void lock() ACQUIRE() { mutex_.lock(); } 80 | void unlock() RELEASE() { mutex_.unlock(); } 81 | std::mutex& get_raw() { return mutex_; } 82 | 83 | private: 84 | std::mutex mutex_; 85 | }; 86 | 87 | // std::shared_mutex wrapper for clang thread safety annotation. 88 | class CAPABILITY("mutex") SharedMutex { 89 | public: 90 | // std::unique_lock wrapper. 91 | class SCOPED_CAPABILITY Lock { 92 | public: 93 | Lock(SharedMutex& m) ACQUIRE(m) : lock_(m.get_raw()) {} 94 | ~Lock() RELEASE() {} 95 | 96 | private: 97 | std::unique_lock lock_; 98 | }; 99 | 100 | // std::shared_lock wrapper. 101 | class SCOPED_CAPABILITY SharedLock { 102 | public: 103 | SharedLock(SharedMutex& m) ACQUIRE_SHARED(m) : lock_(m.get_raw()) {} 104 | ~SharedLock() RELEASE() {} 105 | 106 | private: 107 | std::shared_lock lock_; 108 | }; 109 | 110 | void lock() ACQUIRE() { mutex_.lock(); } 111 | void unlock() RELEASE() { mutex_.unlock(); } 112 | void lock_shared() ACQUIRE_SHARED() { mutex_.lock_shared(); } 113 | void unlock_shared() RELEASE_SHARED() { mutex_.unlock_shared(); } 114 | 115 | std::shared_timed_mutex& get_raw() { return mutex_; } 116 | 117 | private: 118 | std::shared_timed_mutex mutex_; 119 | }; 120 | 121 | } // namespace lczero 122 | -------------------------------------------------------------------------------- /src/utils/optional.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | namespace lczero { 31 | 32 | // Very poor-man implementation of std::optional. It literally cannot do 33 | // anything, but it's enough for our use case. 34 | template 35 | class optional { 36 | public: 37 | operator bool() const { return has_value_; } 38 | constexpr const T& operator*() const& { return value_; } 39 | constexpr const T* operator->() const& { return &value_; } 40 | optional& operator=(const T& value) { 41 | value_ = value; 42 | has_value_ = true; 43 | return *this; 44 | } 45 | void reset() { has_value_ = false; } 46 | T value_or(const T& def) const { return has_value_ ? value_ : def; } 47 | 48 | private: 49 | T value_; 50 | bool has_value_ = false; 51 | }; 52 | 53 | } // namespace lczero 54 | -------------------------------------------------------------------------------- /src/utils/optionsparser_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | */ 18 | 19 | #include "utils/optionsparser.h" 20 | #include 21 | #include 22 | 23 | namespace lczero { 24 | 25 | TEST(OptionsParser, CheckInvalidOption) { 26 | OptionsParser options; 27 | const OptionId id{"this-is-a-valid-option", "this-is-a-valid-option", "help", 28 | 'a'}; 29 | options.Add(id) = ""; 30 | EXPECT_NO_THROW( 31 | options.SetUciOption("this-is-a-valid-option", "valid-value")); 32 | EXPECT_THROW(options.SetUciOption("this-is-an-invalid-option", "0"), 33 | Exception); 34 | } 35 | 36 | TEST(OptionsParser, IntOptionCheckValueConstraints) { 37 | OptionsParser options; 38 | const OptionId id{"int-test-a", "int-test-a", "help", 'a'}; 39 | options.Add(id, 25, 75) = 50; 40 | 41 | EXPECT_NO_THROW(options.SetUciOption("int-test-a", "25")); 42 | EXPECT_NO_THROW(options.SetUciOption("int-test-a", "50")); 43 | EXPECT_NO_THROW(options.SetUciOption("int-test-a", "75")); 44 | EXPECT_THROW(options.SetUciOption("int-test-a", "0"), Exception); 45 | EXPECT_THROW(options.SetUciOption("int-test-a", "100"), Exception); 46 | } 47 | 48 | TEST(OptionsParser, FloatOptionCheckValueConstraints) { 49 | OptionsParser options; 50 | const OptionId id{"float-test-a", "float-test-a", "help", 'a'}; 51 | options.Add(id, 25.0f, 75.0f) = 50.0f; 52 | 53 | EXPECT_NO_THROW(options.SetUciOption("float-test-a", "25.0")); 54 | EXPECT_NO_THROW(options.SetUciOption("float-test-a", "50.0")); 55 | EXPECT_NO_THROW(options.SetUciOption("float-test-a", "75.0")); 56 | EXPECT_THROW(options.SetUciOption("float-test-a", "0.0"), Exception); 57 | EXPECT_THROW(options.SetUciOption("float-test-a", "100.0"), Exception); 58 | } 59 | 60 | TEST(OptionsParser, BoolOptionsCheckValueConstraints) { 61 | OptionsParser options; 62 | const OptionId id{"bool-test-a", "bool-test-a", "help", 'a'}; 63 | options.Add(id) = false; 64 | 65 | EXPECT_NO_THROW(options.SetUciOption("bool-test-a", "true")); 66 | EXPECT_NO_THROW(options.SetUciOption("bool-test-a", "false")); 67 | EXPECT_THROW(options.SetUciOption("bool-test-a", "leela"), Exception); 68 | } 69 | 70 | TEST(OptionsParser, ChoiceOptionCheckValueConstraints) { 71 | OptionsParser options; 72 | const OptionId id{"choice-test-a", "choice-test-a", "help", 'a'}; 73 | std::vector choices; 74 | choices.push_back("choice-a"); 75 | choices.push_back("choice-b"); 76 | choices.push_back("choice-c"); 77 | options.Add(id, choices) = "choice-a"; 78 | 79 | EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-a")); 80 | EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-b")); 81 | EXPECT_NO_THROW(options.SetUciOption("choice-test-a", "choice-c")); 82 | EXPECT_THROW(options.SetUciOption("choice-test-a", "choice-d"), Exception); 83 | } 84 | 85 | } // namespace lczero 86 | 87 | int main(int argc, char** argv) { 88 | ::testing::InitGoogleTest(&argc, argv); 89 | return RUN_ALL_TESTS(); 90 | } 91 | -------------------------------------------------------------------------------- /src/utils/random.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "random.h" 29 | #include 30 | 31 | namespace lczero { 32 | 33 | Random::Random() : gen_(std::random_device()()) {} 34 | 35 | Random& Random::Get() { 36 | static Random rand; 37 | return rand; 38 | } 39 | 40 | int Random::GetInt(int min, int max) { 41 | Mutex::Lock lock(mutex_); 42 | std::uniform_int_distribution<> dist(min, max); 43 | return dist(gen_); 44 | } 45 | 46 | bool Random::GetBool() { return GetInt(0, 1) != 0; } 47 | 48 | double Random::GetDouble(double maxval) { 49 | Mutex::Lock lock(mutex_); 50 | std::uniform_real_distribution<> dist(0.0, maxval); 51 | return dist(gen_); 52 | } 53 | 54 | float Random::GetFloat(float maxval) { 55 | Mutex::Lock lock(mutex_); 56 | std::uniform_real_distribution<> dist(0.0, maxval); 57 | return dist(gen_); 58 | } 59 | 60 | std::string Random::GetString(int length) { 61 | std::string result; 62 | for (int i = 0; i < length; ++i) { 63 | result += 'a' + GetInt(0, 25); 64 | } 65 | return result; 66 | } 67 | 68 | double Random::GetGamma(double alpha, double beta) { 69 | Mutex::Lock lock(mutex_); 70 | std::gamma_distribution dist(alpha, beta); 71 | return dist(gen_); 72 | } 73 | 74 | } // namespace lczero 75 | -------------------------------------------------------------------------------- /src/utils/random.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include "utils/mutex.h" 33 | 34 | namespace lczero { 35 | 36 | class Random { 37 | public: 38 | static Random& Get(); 39 | double GetDouble(double max_val); 40 | float GetFloat(float max_val); 41 | double GetGamma(double alpha, double beta); 42 | // Both sides are included. 43 | int GetInt(int min, int max); 44 | std::string GetString(int length); 45 | bool GetBool(); 46 | 47 | private: 48 | Random(); 49 | 50 | Mutex mutex_; 51 | std::mt19937 gen_ GUARDED_BY(mutex_); 52 | }; 53 | 54 | } // namespace lczero 55 | -------------------------------------------------------------------------------- /src/utils/smallarray.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | 32 | namespace lczero { 33 | 34 | // Non resizeable array which can contain up to 255 elements. 35 | template 36 | class SmallArray { 37 | public: 38 | SmallArray() = delete; 39 | SmallArray(size_t size) : size_(size), data_(std::make_unique(size)) {} 40 | SmallArray(SmallArray&&); // TODO implement when needed 41 | T& operator[](int idx) { return data_[idx]; } 42 | const T& operator[](int idx) const { return data_[idx]; } 43 | int size() const { return size_; } 44 | 45 | private: 46 | unsigned char size_; 47 | std::unique_ptr data_; 48 | }; 49 | 50 | } // namespace lczero 51 | -------------------------------------------------------------------------------- /src/utils/string.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/string.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace lczero { 36 | 37 | std::string StrJoin(const std::vector& strings, 38 | const std::string& delim) { 39 | std::string res; 40 | for (const auto& str : strings) { 41 | if (!res.empty()) res += delim; 42 | res += str; 43 | } 44 | return res; 45 | } 46 | 47 | std::vector StrSplitAtWhitespace(const std::string& str) { 48 | std::vector result; 49 | std::istringstream iss(str); 50 | std::string tmp; 51 | while (iss >> tmp) result.emplace_back(std::move(tmp)); 52 | return result; 53 | } 54 | 55 | std::vector StrSplit(const std::string& str, 56 | const std::string& delim) { 57 | std::vector result; 58 | for (std::string::size_type pos = 0, next = 0; pos != std::string::npos; 59 | pos = next) { 60 | next = str.find(delim, pos); 61 | result.push_back(str.substr(pos, next - pos)); 62 | if (next != std::string::npos) next += delim.size(); 63 | } 64 | return result; 65 | } 66 | 67 | std::vector ParseIntList(const std::string& str) { 68 | std::vector result; 69 | for (const auto& x : StrSplit(str, ",")) { 70 | result.push_back(std::stoi(x)); 71 | } 72 | return result; 73 | } 74 | 75 | std::string LeftTrim(std::string str) { 76 | const auto it = std::find_if(str.begin(), str.end(), 77 | [](int ch) { return !std::isspace(ch); }); 78 | str.erase(str.begin(), it); 79 | return str; 80 | } 81 | 82 | std::string RightTrim(std::string str) { 83 | auto it = std::find_if(str.rbegin(), str.rend(), 84 | [](int ch) { return !std::isspace(ch); }); 85 | str.erase(it.base(), str.end()); 86 | return str; 87 | } 88 | 89 | std::string Trim(std::string str) { 90 | return LeftTrim(RightTrim(std::move(str))); 91 | } 92 | 93 | bool StringsEqualIgnoreCase(const std::string& a, const std::string& b) { 94 | return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { 95 | return std::tolower(a) == std::tolower(b); 96 | }); 97 | } 98 | 99 | std::vector FlowText(const std::string& src, size_t width) { 100 | std::vector result; 101 | auto paragraphs = StrSplit(src, "\n"); 102 | for (const auto& paragraph : paragraphs) { 103 | result.emplace_back(); 104 | auto words = StrSplit(paragraph, " "); 105 | for (const auto& word : words) { 106 | if (result.back().empty()) { 107 | // First word in line, always add. 108 | } else if (result.back().size() + word.size() + 1 > width) { 109 | // The line doesn't have space for a new word. 110 | result.emplace_back(); 111 | } else { 112 | // Appending to the current line. 113 | result.back() += " "; 114 | } 115 | result.back() += word; 116 | } 117 | } 118 | return result; 119 | } 120 | 121 | } // namespace lczero 122 | -------------------------------------------------------------------------------- /src/utils/string.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | namespace lczero { 34 | 35 | // Joins strings using @delim as delimiter. 36 | std::string StrJoin(const std::vector& strings, 37 | const std::string& delim = " "); 38 | 39 | // Splits strings at whitespace. 40 | std::vector StrSplitAtWhitespace(const std::string& str); 41 | 42 | // Split string by delimiter. 43 | std::vector StrSplit(const std::string& str, 44 | const std::string& delim); 45 | 46 | // Parses comma-separated list of integers. 47 | std::vector ParseIntList(const std::string& str); 48 | 49 | // Trims a string of whitespace from the start. 50 | std::string LeftTrim(std::string str); 51 | 52 | // Trims a string of whitespace from the end. 53 | std::string RightTrim(std::string str); 54 | 55 | // Trims a string of whitespace from both ends. 56 | std::string Trim(std::string str); 57 | 58 | // Returns whether strings are equal, ignoring case. 59 | bool StringsEqualIgnoreCase(const std::string& a, const std::string& b); 60 | 61 | // Flow text into lines of width up to @width. 62 | std::vector FlowText(const std::string& src, size_t width); 63 | 64 | } // namespace lczero 65 | -------------------------------------------------------------------------------- /src/utils/transpose.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "utils/transpose.h" 29 | #include 30 | 31 | namespace lczero { 32 | void TransposeTensor(const std::vector& dims, std::vector order, 33 | const std::vector from, float* to) { 34 | if (order.empty()) { 35 | for (size_t i = 0; i < dims.size(); ++i) 36 | order.push_back(dims.size() - i - 1); 37 | } 38 | std::vector cur_idx(dims.size()); 39 | for (size_t _ = 0; _ < from.size(); ++_) { 40 | size_t from_idx = 0; 41 | for (int i : order) { 42 | from_idx *= dims[i]; 43 | from_idx += cur_idx[i]; 44 | } 45 | *to++ = from[from_idx]; 46 | for (int i = static_cast(dims.size()) - 1; i >= 0; --i) { 47 | if (++cur_idx[i] == dims[i]) { 48 | cur_idx[i] = 0; 49 | } else { 50 | break; 51 | } 52 | } 53 | } 54 | } 55 | } // namespace lczero 56 | -------------------------------------------------------------------------------- /src/utils/transpose.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | 32 | namespace lczero { 33 | 34 | // Transposes flattened tensor from @from into @to. @to must have space for 35 | // from.size() elements. 36 | // @dims -- Dimensions of @from tensor. For example, {120, 60, 3, 3} 37 | // @order -- New-to-old dimension index mapping. For example {3, 2, 0, 1} 38 | void TransposeTensor(const std::vector& dims, std::vector order, 39 | const std::vector from, float* to); 40 | 41 | } // namespace lczero 42 | -------------------------------------------------------------------------------- /src/utils/weights_adapter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include "src/utils/weights_adapter.h" 29 | 30 | namespace lczero { 31 | float LayerAdapter::Iterator::ExtractValue(const uint16_t* ptr, 32 | const LayerAdapter* adapter) { 33 | return *ptr / static_cast(0xffff) * adapter->range_ + adapter->min_; 34 | } 35 | 36 | LayerAdapter::LayerAdapter(const pblczero::Weights_Layer& layer) 37 | : data_(reinterpret_cast(layer.params().data())), 38 | size_(layer.params().size() / sizeof(uint16_t)), 39 | min_(layer.min_val()), 40 | range_(layer.max_val() - min_) {} 41 | 42 | std::vector LayerAdapter::as_vector() const { 43 | return std::vector(begin(), end()); 44 | } 45 | float LayerAdapter::Iterator::operator*() const { 46 | return ExtractValue(data_, adapter_); 47 | } 48 | float LayerAdapter::Iterator::operator[](size_t idx) const { 49 | return ExtractValue(data_ + idx, adapter_); 50 | } 51 | 52 | } // namespace lczero -------------------------------------------------------------------------------- /src/utils/weights_adapter.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | 28 | #include 29 | #include 30 | #include "proto/net.pb.h" 31 | 32 | namespace lczero { 33 | 34 | class LayerAdapter { 35 | public: 36 | class Iterator 37 | : public std::iterator { 38 | public: 39 | Iterator() = default; 40 | Iterator(const Iterator& other) = default; 41 | 42 | float operator*() const; 43 | float operator[](size_t idx) const; 44 | bool operator==(const LayerAdapter::Iterator& other) const { 45 | return data_ == other.data_; 46 | } 47 | bool operator!=(const LayerAdapter::Iterator& other) const { 48 | return data_ != other.data_; 49 | } 50 | Iterator& operator++() { 51 | ++data_; 52 | return *this; 53 | } 54 | Iterator& operator--() { 55 | --data_; 56 | return *this; 57 | } 58 | ptrdiff_t operator-(const Iterator& other) const { 59 | return data_ - other.data_; 60 | } 61 | 62 | // TODO(crem) implement other iterator functions when they are needed. 63 | 64 | private: 65 | friend class LayerAdapter; 66 | Iterator(const LayerAdapter* adapter, const uint16_t* ptr) 67 | : adapter_(adapter), data_(ptr) {} 68 | static float ExtractValue(const uint16_t* ptr, const LayerAdapter* adapter); 69 | 70 | const LayerAdapter* adapter_ = nullptr; 71 | const uint16_t* data_ = nullptr; 72 | }; 73 | 74 | LayerAdapter(const pblczero::Weights_Layer& layer); 75 | std::vector as_vector() const; 76 | size_t size() const { return size_; } 77 | float operator[](size_t idx) const { return begin()[idx]; } 78 | Iterator begin() const { return {this, data_}; } 79 | Iterator end() const { return {this, data_ + size_}; } 80 | 81 | private: 82 | const uint16_t* data_ = nullptr; 83 | const size_t size_ = 0; 84 | const float min_; 85 | const float range_; 86 | }; 87 | 88 | } // namespace lczero 89 | -------------------------------------------------------------------------------- /src/version.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | #include "version.h" 28 | 29 | std::uint32_t GetVersionInt(int major, int minor, int patch) { 30 | return major * 1000000 + minor * 1000 + patch; 31 | } 32 | 33 | std::string GetVersionStr(int major, int minor, int patch, 34 | const std::string& postfix) { 35 | auto v = std::to_string(major) + "." + std::to_string(minor) + "." + 36 | std::to_string(patch); 37 | if (postfix.empty()) return v; 38 | return v + "-" + postfix; 39 | } 40 | -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Leela Chess Zero. 3 | Copyright (C) 2018 The LCZero Authors 4 | 5 | Leela Chess is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | Leela Chess is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with Leela Chess. If not, see . 17 | 18 | Additional permission under GNU GPL version 3 section 7 19 | 20 | If you modify this Program, or any covered work, by linking or 21 | combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA 22 | Toolkit and the NVIDIA CUDA Deep Neural Network library (or a 23 | modified version of those libraries), containing parts covered by the 24 | terms of the respective license agreement, the licensors of this 25 | Program grant you additional permission to convey the resulting work. 26 | */ 27 | #pragma once 28 | 29 | // Versioning is performed according to the standard at 30 | // Creating a new version should be performed using scripts/bumpversion.py. 31 | 32 | #include 33 | #include "version.inc" 34 | 35 | std::uint32_t GetVersionInt(int major = LC0_VERSION_MAJOR, 36 | int minor = LC0_VERSION_MINOR, 37 | int patch = LC0_VERSION_PATCH); 38 | 39 | std::string GetVersionStr(int major = LC0_VERSION_MAJOR, 40 | int minor = LC0_VERSION_MINOR, 41 | int patch = LC0_VERSION_PATCH, 42 | const std::string& postfix = LC0_VERSION_POSTFIX); 43 | -------------------------------------------------------------------------------- /src/version.inc: -------------------------------------------------------------------------------- 1 | #define LC0_VERSION_MAJOR 0 2 | #define LC0_VERSION_MINOR 21 3 | #define LC0_VERSION_PATCH 0 4 | #define LC0_VERSION_POSTFIX "fish-v0.8" 5 | -------------------------------------------------------------------------------- /subprojects/gtest.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = googletest-release-1.8.0 3 | 4 | source_url = https://github.com/google/googletest/archive/release-1.8.0.zip 5 | source_filename = gtest-1.8.0.zip 6 | source_hash = f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf 7 | 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.0/5/get_zip 9 | patch_filename = gtest-1.8.0-5-wrap.zip 10 | patch_hash = 7eeaede4aa2610a403313b74e04baf91ccfbaef03203d8f56312e22df1834ec5 11 | -------------------------------------------------------------------------------- /subprojects/protobuf-3.6.0.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = protobuf-3.6.0 3 | 4 | source_url = https://github.com/protocolbuffers/protobuf/releases/download/v3.6.0/protobuf-all-3.6.0.tar.gz 5 | source_filename = protobuf-all-3.6.0.tar.gz 6 | source_hash = 1532154addf85080330fdd037949d4653dfce16550df5c70ea0cd212d8aff3af 7 | 8 | patch_url = https://github.com/borg323/protobuf/releases/download/3.6.0/protobuf-3.6.0-wrap.zip 9 | patch_filename = protobuf-3.6.0-wrap.zip 10 | patch_hash = a14730d2e3702c4a0d7b3f05a380ec6b2c0b138a5b00539705b5c3a8df9885e3 11 | -------------------------------------------------------------------------------- /subprojects/protobuf.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = protobuf-3.5.1 3 | 4 | source_url = https://github.com/google/protobuf/releases/download/v3.5.1/protobuf-all-3.5.1.tar.gz 5 | source_filename = protobuf-all-3.5.1.tar.gz 6 | source_hash = 72d43863f58567a9ea2054671fdb667867f9cf7865df623c7be630978ff97dff 7 | 8 | patch_url = https://github.com/borg323/protobuf/releases/download/3.5.1-2w/protobuf-3.5.1-2w-wrap.zip 9 | patch_filename = protobuf-3.5.1-2w-wrap.zip 10 | patch_hash = 5185ae7252941e252b075d3f845768296b079516f9f6feb0bd3ae63de7e9a52e 11 | -------------------------------------------------------------------------------- /subprojects/zlib.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = zlib-1.2.11 3 | 4 | source_url = http://zlib.net/fossils/zlib-1.2.11.tar.gz 5 | source_filename = zlib-1.2.11.tar.gz 6 | source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 7 | 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/3/get_zip 9 | patch_filename = zlib-1.2.11-3-wrap.zip 10 | patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0 -------------------------------------------------------------------------------- /tensorflow.md: -------------------------------------------------------------------------------- 1 | To build with tensorflow under linux you need to install Tensorflow_cc from 2 | . Either release v1.9.0, v1.12.0 or 3 | v1.13.0. Tensorflow_cc requires a specific version of protobuf, which constrains 4 | the build. Release v1.9.0 works out of the box, since the default protobuf 5 | subproject (v3.5.1) is compatible and is used instead of a system installed 6 | version. In contrast release v1.12.0 needs protobuf v3.6.0 and release v1.13.0 7 | is built with protobuf 3.6.1 but also works with 3.6.0. For those versions 8 | `-Dprotobuf-3-6-0=true` should be added to the build command line. Note that 9 | this protobuf version has issues with static builds and crashes so is not 10 | recommended for normal use. The crashes look very similar to: 11 | * 12 | * 13 | -------------------------------------------------------------------------------- /windows_build.md: -------------------------------------------------------------------------------- 1 | ## Windows BLAS/OpenCL 2 | 3 | 0. [Install Microsoft Visual Studio](https://visualstudio.microsoft.com/). For VS2017 make sure the 4 | option "Desktop development with C++" is installed (you can add it later if not). 5 | 6 | 1. [Install git for windows](https://git-scm.com/download/win) - this can be used to get lc0 but is also 7 | needed for meson. 8 | 9 | 2. Install a BLAS library. This can be either OpenBLAS or Intel MKL. 10 | * For [OpenBLAS go here](http://www.openblas.net/), you need a binary package with a filename of the 11 | form `OpenBLAS-version-Win64-int32.zip`, they are not available for all versions, which you just unpack 12 | at a location of your choise (but not inside the lc0 directory). 13 | * For [Intel MKL go here](https://software.intel.com/en-us/mkl), where you need to register. After 14 | installation don't forget to run `mklvars.bat intel64` to set up the paths to the dlls. 15 | 16 | 3. For OpenCL you also need to install OpenCL developer libraries. 17 | * For AMD cards the AMD APP SDK 3.0 seems to be the appropriate one, to be installed after the card drivers. 18 | This is not currently available on the AMD website, but links to a signed installer are available in the 19 | [AMD community forum](https://community.amd.com/thread/222855). 20 | * For nVIDIA cards you probably need the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). 21 | 22 | 4. [Install Python3](https://www.python.org/) - be sure to check the box to add python to the path. 23 | 24 | 5. Install Meson: `pip3 install --upgrade meson` 25 | 26 | 6. Edit `build-cl.cmd`: 27 | * If you use MSVS other than 2017 community edition (or if it's installed into non-standard location) 28 | replace the path to vcvarsall.bat and MSBuild.exe. If you can't find vcvarsall.bat on VS2017, you 29 | need to install option "Desktop development with C++". Some example paths are in comments. 30 | * In `--backend 2017` replace 2017 with the correct MSVS version. 31 | * Set the BLAS (and optionally OpenCL) library `include` and `lib` directories in the appropriate 32 | variables. 33 | - For OpenBLAS, they are `openblas_include` and `openblas_libdirs`. 34 | - For Intel MKL, they are `mkl_include` and `mkl_libdirs`. The `lib` directory typically ends in 35 | `\lib\intel64`. 36 | - For OpenCL, they are `opencl_libdirs` and `opencl_include`. The include directory is the one with 37 | the `CL` directory containing `opencl.h`, not directly the one containing `opencl.h`. 38 | 39 | 7. Run `build-cl.cmd`. It will generate MSVS project and pause. 40 | 41 | 8. Hit `Enter` to build it. 42 | 43 | 9. Resulting binary will be `build/lc0.exe` 44 | 45 | Alternatively you can 46 | 47 | 8. open generated solution `build/lc0.sln` in Visual Studio and build yourself. 48 | 49 | 50 | --------------------------------------------------------------------------------