├── .gitignore ├── README.md ├── build.jai ├── eval.jai ├── linux.jai ├── macos.jai ├── movegen.jai ├── nnue_avx2.jai ├── nnue_cpu.jai ├── nnue_probe.jai ├── nnue_sse.jai ├── resources ├── AnonymousPro.ttf ├── capture.wav ├── chess_pieces.png ├── move.wav ├── nn-04cf2b4ed1da.nnue └── settings_icon.png ├── search.jai ├── uci.jai ├── ui.jai └── windows.jai /.gitignore: -------------------------------------------------------------------------------- 1 | .build/ 2 | chess 3 | ceij 4 | jai 5 | berserk 6 | stockfish 7 | koivisto 8 | Chess 9 | Ceij 10 | Jai 11 | Berserk 12 | Stockfish 13 | Koivisto 14 | *.pdb 15 | *.nnue 16 | *.pdf 17 | *.exe 18 | * 19 | resources/ 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chess Engine in Jai 2 | 3 | This Chess Engine in Jai is a hobby project designed to find bugs and benchmark the Jai Compiler. 4 | You can find video demos of this Chess Engine [here](https://www.youtube.com/watch?v=2OvE0I_rdpI&list=PL2fmKE0pL4IyET-eKbbBPw_i9IHN1QmFZ&index=1) 5 | 6 | ## How to Build: 7 | This project includes both a Chess Graphical User Interface and a UCI compatible Chess Engine. 8 | 9 | Type `jai build.jai` to build both the UI and AI on `release` mode 10 | 11 | Type `jai build.jai - ui` to build the GUI. 12 | 13 | Type `jai build.jai - ai` to build the AI. 14 | 15 | Type `jai build.jai - ui ai` to build both the GUI and the AI. 16 | 17 | Type `jai build.jai - release` to build an optimized build. 18 | 19 | Type `jai build.jai - release ai nnue_probe` to build the engine using Daniel Shawul's NNUE-Probe library. This C library is entirely optional, and is used to provide the best SIMD code for unsupported computers (e.g. AVX512, ARM NEON, MMX, etc.) 20 | 21 | The build script will try to detect the CPU you are using, and pick either `AVX2`, `SSE`, or `cpu` automatically based on your processor. Send the flags `avx2`, `sse`, or `cpu` to manually toggle what you want. `cpu` means running with no SIMD support. Because Neural Networks are based on matrix multiplication, SIMD is needed to obtain the best performance. 22 | 23 | ## Code Organization 24 | This code is divided up into the following files: 25 | * `build.jai` is a build script for both the User Interface and GUI. 26 | * `eval.jai` contains chess evaluation parameters 27 | * `movegen.jai` contains the `Chess` structs and Piece definitions 28 | * `search.jai` is the main `Chess` search function. This file contains code for Multi-threaded Parallel Search 29 | * `uci.jai` contains the `main` function for the AI. This file parses the Universal Chess Interface protocol for the engine 30 | * `ui.jai` is the `main` function for the UI. This file handles the User Interface with `Simp` and `GetRect` 31 | * `linux.jai` is code specific to the Linux OS. 32 | * `windows.jai` is code specific to the Windows OS. 33 | * `macos.jai` is code specific to Mac OS. 34 | * the `resources` contains the following information 35 | * Fonts 36 | * Sound Effects 37 | * Chess Piece Picture 38 | * Neural Network Model for AI 39 | * The Efficiently Updatable Neural Network Code is organized as follows: 40 | * `nnue_avx2.jai` contains code for AVX2 processors. 41 | * `nnue_sse.jai` contains code for SSE processors. 42 | * `nnue_probe.jai` contains code for interacting with the NNUE-Probe C library by Daniel Shawul. This can be used to get the best optimization for SIMD architectures unsupported by the existing Jai code (e.g. AVX512 SIMD, ARM NEON SIMD, X86-64 MMX SIMD, etc.) 43 | * `nnue_cpu.jai` is a default code with no SIMD. Because it is general with no SIMD whatsoever, this allows someone to run it on any platform. 44 | 45 | ## User Interface Features 46 | 47 | * Drag and drop behavior to move pieces 48 | * Click to move pieces 49 | * Legal move generation and detection 50 | * Flip the board using the `X` Key 51 | * New game using the `Ctrl+N` Key 52 | * Undo move using the `Ctrl+Z`Key 53 | * Redo move using the `Ctrl+Y`Key 54 | * Implements UCI (Universal Chess Interface) Protocol to communicate with any UCI compatible chess engine (e.g. Stockfish, RubiChess) 55 | * Can do Human vs Computer, Computer vs Human, Human vs Human, and Computer vs Computer. 56 | * Parse and Loads FEN strings 57 | * Engine vs. Engine Implemented (e.g. Stockfish vs Komodo) 58 | * Multi-engine support 59 | * UCI Engine Options 60 | * Console command-line 61 | * Windows, Mac, and Linux support 62 | * Chess Clock with Increment 63 | * Blindfold Mode 64 | * Highlight Squares with Right Click 65 | * Draw Arrows on the Board with Right Click 66 | * Display Engine Lines 67 | * Dark Background/Light Background Theme 68 | 69 | ## Chess Engine in Jai 70 | * Estimated elo: 3100 71 | * UCI protocol 72 | 73 | ### Chess Engine Options 74 | * Clear Hash - clears out the transposition table 75 | * Transposition Table Memory Size (in MB). - adjusts the size of the transposition table in megabytes 76 | * Number of Threads - adjusts the number of threads used by the CPU. increasing the number of threads should increase the playing strength of the CPU 77 | * Multiple Principle Variation Lines - detects multiple PV lines. Set the PV to 1 for the best playing strength (the default) 78 | * Difficulty Levels 1 to 8 - adjusts the difficulty of the engine. 79 | 80 | ### Parallel Search 81 | * Lazy Shared Memory Processing Parallel Search 82 | * Uses the `Thread_Group` Jai Module to implement threading 83 | 84 | ### Board Representation 85 | * 8x8 Board 86 | * Legal/Psuedo Legal Move Generation 87 | * Staged Move Generation 88 | * Bitboards with Little Endian Rank-File Mapping 89 | * Fancy Magic Bitboards with Parallel Bit Extract 90 | * Kogge-Stone Algorithm 91 | * Move generator generates 324 million positions per second 92 | * Moves encoded as 16-bit integers 93 | 94 | ### Search 95 | * Negamax Search with Alpha-Beta Pruning 96 | * Principle Variation Search 97 | * Iterative Deepening 98 | * Aspiration Window Search 99 | * Internal Iterative Deepening 100 | 101 | ### Pruning and Reductions 102 | * Null Move Pruning w/ Verification 103 | * Low Depth SEE Pruning 104 | * Reverse Futility Pruning/Static Move Pruning 105 | * Delta Pruning 106 | * SEE Quiescene Search Pruning 107 | * Razoring 108 | * Mate Distance Pruning 109 | * History Leaf Reduction/Pruning 110 | * Prob Cut 111 | * Late Move Reduction/Pruning 112 | 113 | ### Extensions 114 | * Singular Search Extensions 115 | * Check Extensions 116 | * Recapture Extensions 117 | 118 | ### Transposition Table 119 | * 16-byte Transposition Table Hash Entries 120 | * 3-fold repetition & Fifty-move Rule 121 | * Zobrist Hashing w/ Incremental Update 122 | * Transposition Table Probing in Non PV Nodes 123 | * Prefetch Transposition Table Entries 124 | * Depth-Preferred Replacement Scheme 125 | * Aging 126 | 127 | ### Move Ordering 128 | * MVV-LVA (Most Valuable Victim, Least Valuable Attacker) Implementation 129 | * Transposition Table Hash Move 130 | * Killer Moves Move Ordering 131 | * History Moves Move Ordering 132 | * Tactical History Move Ordering 133 | * Countermove and Follow Up History Move Ordering 134 | * Relative History Move Ordering 135 | 136 | ### Evaluation 137 | * Efficiently Updatable Neural Networks with Incremental Update 138 | * Simple Mop-Up Endgame Evaluation 139 | * NNUE Stockfish 12 HalfKP Architecture 140 | * Supports the following CPU Architectures: 141 | * AVX2 142 | * SSE 143 | * CPU w/o SIMD 144 | 145 | ### Credits 146 | * Thank you to Jai Beta Users Patrik Smělý and Don Swet (github.com/cookednick) for testing the Chess program on Mac OS. 147 | * Thank you to Maksim Korzh, author of the BBC chess engine. His work provided the main inspiration for this project. Here is a link to the [BBC Chess Engine](https://github.com/maksimKorzh/bbc). 148 | * Thank you to Daniel Shawul for translating Stockfish NNUE into C. His work was the basis for my NNUE implementation. [nnue-probe](https://github.com/dshawul/nnue-probe) 149 | * Thank you to Jonathan Blow, for allowing me access to the Jai Compiler Beta. [Jai-Community-Wiki](https://github.com/Jai-Community/Jai-Community-Library) 150 | * [Berserk Chess Engine](https://github.com/jhonnold/berserk) was a great chess engine I learned a lot from. The code is well-organized and well-written. I highly recommend looking through Jay Honnold's work if you want to learn how to take your chess engine to the next level. 151 | * [The Stockfish PyTorch NNUE Guide](https://github.com/glinscott/nnue-pytorch/blob/master/docs/nnue.md) is an incredibly detailed look at how NNUE works. I recommend reading it over and over again to get a full grasp about what is going on. 152 | 153 | -------------------------------------------------------------------------------- /build.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Compiler"; 3 | #import "Machine_X64"; 4 | #import "String"; 5 | #import "File"; 6 | #import "File_Utilities"; 7 | #import "Process"; 8 | 9 | ui_exe_name :: "chess"; 10 | ai_exe_name :: "ceij"; 11 | author :: "Daniel Tan"; 12 | 13 | add_chess_engine_files :: (w: Workspace, cpu_info: *Cpu_X86, type: NNUE_Type) { 14 | 15 | create_uci_message :: () -> string #expand { 16 | NEWLINE := ifx OS == .WINDOWS "\r\n" else "\n"; 17 | builder: String_Builder; 18 | print_to_builder(*builder, "uci_response :: #string END%1", NEWLINE); 19 | print_to_builder(*builder, "id name %1%2", ai_exe_name, NEWLINE); 20 | print_to_builder(*builder, "id author %1%2", author, NEWLINE); 21 | print_to_builder(*builder, "info string neural network type %1%2", type, NEWLINE); 22 | print_to_builder(*builder, "END%1", NEWLINE); 23 | return builder_to_string(*builder); 24 | } 25 | 26 | // OS specific code. 27 | #if OS == .LINUX { 28 | add_build_file("linux.jai", w); 29 | } else #if OS == .WINDOWS { 30 | add_build_file("windows.jai", w); 31 | } else #if OS == .MACOS { 32 | add_build_file("macos.jai", w); 33 | } 34 | 35 | // cross platform layer. 36 | add_build_file("uci.jai", w); 37 | add_build_file("search.jai", w); 38 | add_build_file("eval.jai", w); 39 | add_build_file("movegen.jai", w); 40 | 41 | // determine which NNUE SIMD to build. 42 | if type == .auto { 43 | // automatically add based on whether the CPU has AVX features or not. 44 | print("Detecting CPU instruction set.\n"); 45 | if check_feature(cpu_info.feature_leaves, .AVX2) { 46 | type = .avx2; 47 | } else if check_feature(cpu_info.feature_leaves, .SSE) { 48 | type = .sse; 49 | } else { 50 | type = .cpu; 51 | } 52 | } 53 | 54 | if #complete type == { 55 | case .auto; 56 | assert(false); 57 | case .cpu; 58 | print("Building Chess Engine with NNUE CPU\n"); 59 | add_build_file("nnue_cpu.jai", w); 60 | case .sse; 61 | print("Building Chess Engine with NNUE SSE\n"); 62 | add_build_file("nnue_sse.jai", w); 63 | case .avx2; 64 | print("Building Chess Engine with NNUE AVX2\n"); 65 | add_build_file("nnue_avx2.jai", w); 66 | case .nnue_probe; 67 | print("Building Chess Engine with NNUE Probe\n"); 68 | 69 | // I only tested this on Linux. I believe nnue-probe only works on Linux only, 70 | // and cannot run on windows/mac, but I could be wrong... 71 | library: string; 72 | location: string; 73 | if OS == .LINUX { 74 | library = "libnnueprobe.so"; 75 | location = "nnue-probe/src/libnnueprobe.so"; 76 | } else if OS == .WINDOWS { 77 | library = "libnnueprobe.dll"; 78 | location = "nnue-probe/src/libnnueprobe.dll"; 79 | } else if OS == .MACOS { 80 | library = "libnnueprobe.dylib"; 81 | location = "nnue-probe/src/libnnueprobe.dylib"; 82 | } 83 | if !file_exists(library) { 84 | if !file_exists("nnue-probe") { 85 | print("Downloading nnue-probe library by Daniel Shawul.\n"); 86 | run_command("git", "clone", "https://github.com/dshawul/nnue-probe"); 87 | } 88 | 89 | run_command("make", "--directory=nnue-probe/src"); 90 | file_move(location, library); 91 | } 92 | 93 | add_build_file("nnue_probe.jai", w); 94 | case; 95 | assert(false, "Invalid build script.\n"); 96 | } 97 | 98 | uci_message := create_uci_message(); 99 | add_build_string(uci_message, w); 100 | 101 | 102 | } 103 | 104 | add_ui_files :: (w: Workspace) { 105 | // OS specific code. 106 | #if OS == .LINUX { 107 | add_build_file("linux.jai", w); 108 | } else #if OS == .WINDOWS { 109 | add_build_file("windows.jai", w); 110 | } else #if OS == .MACOS { 111 | add_build_file("macos.jai", w); 112 | } 113 | 114 | // cross platform layer. 115 | add_build_file("ui.jai", w); 116 | add_build_file("movegen.jai", w); 117 | } 118 | 119 | #run { 120 | cpu_info := get_cpu_info(); 121 | defer set_build_options_dc(.{do_output=false}); 122 | target_options := get_build_options(); 123 | args := target_options.compile_time_command_line; 124 | flags: CompileFlags = 0; 125 | nnue_type: NNUE_Type = .auto; 126 | if args.count == 0 then { 127 | flags = CompileFlags.ui | .ai | .release; 128 | } 129 | 130 | for arg: args { 131 | if arg == { 132 | case "ui"; 133 | flags |= .ui; 134 | case "ai"; 135 | flags |= .ai; 136 | case "debug"; 137 | flags &= ~.release; 138 | case "release"; 139 | flags |= .release; 140 | case "avx2"; 141 | nnue_type = .avx2; 142 | case "sse"; 143 | nnue_type = .sse; 144 | case "cpu"; 145 | nnue_type = .cpu; 146 | case "nnue_probe"; 147 | nnue_type = .nnue_probe; 148 | 149 | } 150 | } 151 | 152 | if flags & .ai { 153 | w: Workspace; 154 | if flags & .release { 155 | w = build_release(*cpu_info, ai_exe_name); 156 | } else { 157 | w = build_debug(*cpu_info, ai_exe_name); 158 | } 159 | add_chess_engine_files(w, *cpu_info, nnue_type); 160 | print("Building Chess Engine [%]\n", ai_exe_name); 161 | } 162 | 163 | if flags & .ui { 164 | w: Workspace; 165 | if flags & .release { 166 | w = build_release(*cpu_info, ui_exe_name); 167 | } else { 168 | w = build_debug(*cpu_info, ui_exe_name); 169 | } 170 | add_ui_files(w); 171 | print("Building Chess User Interface [%]\n", ui_exe_name); 172 | } 173 | } 174 | 175 | CompileFlags :: enum_flags { 176 | release; 177 | ui; 178 | ai; 179 | } 180 | 181 | NNUE_Type :: enum { 182 | auto; 183 | cpu; 184 | sse; 185 | avx2; 186 | nnue_probe; 187 | } 188 | 189 | build_debug :: (cpu_info: *Cpu_X86, executable_name: string) -> Workspace { 190 | w := compiler_create_workspace(executable_name); 191 | options := get_build_options(w); 192 | options.output_executable_name = executable_name; 193 | set_optimization(*options, .DEBUG); 194 | options.backend = .X64; 195 | options.stack_trace = true; 196 | set_build_options(options, w); 197 | movegen_features(cpu_info, w); 198 | print("Creating Debug Build for: %\n", executable_name); 199 | return w; 200 | } 201 | 202 | build_release :: (cpu_info: *Cpu_X86, executable_name: string) -> Workspace { 203 | w := compiler_create_workspace(executable_name); 204 | options := get_build_options(w); 205 | options.output_executable_name = executable_name; 206 | set_optimization(*options, .OPTIMIZED); 207 | options.llvm_options.enable_split_modules = false; 208 | options.stack_trace = false; 209 | set_build_options(options, w); 210 | movegen_features(cpu_info, w); 211 | print("Creating Release Build for: %\n", executable_name); 212 | return w; 213 | } 214 | 215 | movegen_features :: (cpu_info: *Cpu_X86, w: Workspace) { 216 | enable_blsr := check_feature(cpu_info.feature_leaves, .BMI1) == true; 217 | code := tprint("ENABLE_BLSR :: %;\n", enable_blsr); 218 | print(code); 219 | add_build_string(code, w); 220 | 221 | enable_fancy_magic_bitboards := check_feature(cpu_info.feature_leaves, .BMI2) == true; 222 | code = tprint("FANCY_MAGIC_BITBOARDS :: %;\n", enable_fancy_magic_bitboards); 223 | print(code); 224 | add_build_string(code, w); 225 | } 226 | -------------------------------------------------------------------------------- /eval.jai: -------------------------------------------------------------------------------- 1 | // for NNUE resources: https://hxim.github.io/Stockfish-Evaluation-Guide/ 2 | 3 | uci_evaluate :: (chess: *Chess) -> int #expand { 4 | return nnue_evaluate_board(chess); 5 | } 6 | 7 | evaluate :: (chess: *ChessGame, fifty: int) -> int { 8 | TF, score := trivial_evaluation(chess, fifty); 9 | if TF return score; 10 | eval := nnue_evaluate(chess); 11 | return (eval * (100-fifty)) / 200; 12 | } 13 | 14 | #scope_file 15 | 16 | 17 | // We use trivial evaluation to deal with endgame positions. 18 | trivial_evaluation :: (using chess: *Chess, fifty: int) -> bool, int { 19 | 20 | WIN_SCORE :: 5_000; 21 | 22 | eval_winning_position :: (strong_king: u64, q: u64, r: u64, b: u64, n: u64, p: u64, weak_king: u64) -> int { 23 | 24 | push_to_edge :: (sq: int) -> int { 25 | r := sq / 8; 26 | f := sq % 8; 27 | fd := min(f, 7-f); 28 | rd := min(r, 7-r); 29 | return 90 - (7 * fd * fd / 2 + 7 * rd * rd / 2); 30 | } 31 | 32 | eg_pawn_table :: int.[ 33 | 0, 0, 0, 0, 0, 0, 0, 0, 34 | 750, 750, 750, 750, 750, 750, 750, 750, 35 | 550, 550, 550, 550, 550, 550, 550, 550, 36 | 250, 250, 250, 250, 250, 250, 250, 250, 37 | 150, 150, 150, 150, 150, 150, 150, 150, 38 | 50, 50, 50, 50, 50, 50, 50, 50, 39 | 25, 25, 25, 25, 25, 25, 25, 25, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 41 | ]; 42 | 43 | win := bit_scan_forward(strong_king); 44 | los := bit_scan_forward(weak_king); 45 | 46 | winx := win / 8; 47 | winy := win % 8; 48 | 49 | losx := los / 8; 50 | losy := los % 8; 51 | 52 | distance := abs(winx - losx) + abs(winy - losy); 53 | push_close := 140 - 20 * distance; 54 | edge := push_to_edge(los); 55 | mat := popcount(q)*900 + popcount(r)*500 + popcount(b)*350 + popcount(n)*300; 56 | pawn_points:= 0; 57 | while p { 58 | i := bit_scan_forward(p); 59 | pawn_points += eg_pawn_table[i]; 60 | p &= p-1; 61 | } 62 | 63 | return WIN_SCORE + push_close + edge + mat + pawn_points; 64 | } 65 | 66 | // used for trivial checkmates/draws. 67 | w_pieces := w_queen|w_rook|w_bishop|w_knight|w_pawn; 68 | b_pieces := b_queen|b_rook|b_bishop|b_knight|b_pawn; 69 | if b_pieces == 0 { 70 | if w_pieces & (~(w_pawn|w_knight)) { 71 | side := ifx turn == Turn.WHITE then 1 else -1; 72 | eval := eval_winning_position(w_king, q=w_queen, r=w_rook, b=w_bishop, n=w_knight, p=bit_reverse64(w_pawn), b_king) * side; 73 | return true, eval; 74 | } 75 | } 76 | 77 | if w_pieces == 0 { 78 | if b_pieces & (~(b_pawn|b_knight)) { 79 | side := ifx turn == Turn.WHITE then -1 else 1; 80 | eval := eval_winning_position(b_king, q=b_queen, r=b_rook, b=b_bishop, n=b_knight, p=b_pawn, w_king) * side; 81 | return true, eval; 82 | } 83 | } 84 | return false, 0; 85 | } 86 | 87 | #import "File"; 88 | #import "String"; 89 | 90 | -------------------------------------------------------------------------------- /linux.jai: -------------------------------------------------------------------------------- 1 | // contains linux OS specific code. 2 | 3 | EXE :: ""; // nothing. 4 | NEWLINE :: "\n"; 5 | 6 | OS :: struct {} 7 | 8 | // defines the AI uci message loop. 9 | getline :: (os: *OS, loop_body: Code, flags: For_Flags) #expand { 10 | nonblock_stdin :: () #expand { 11 | flags := fcntl(STDIN_FILENO, F_GETFL, 0) | O_NONBLOCK; 12 | fcntl(STDIN_FILENO, F_SETFL, flags); 13 | } 14 | nonblock_stdin(); 15 | `it_index := 0; 16 | while outer := true { 17 | stopping = false; 18 | memset(buffera.data, 0, size_of(type_of(buffera))); 19 | memset(bufferb.data, 0, size_of(type_of(bufferb))); 20 | bytes_read := read(STDIN_FILENO, buffera.data, buffera.count-1); 21 | if bytes_read < 0 { 22 | sleep_milliseconds(25); 23 | continue; 24 | } 25 | 26 | messages := to_string(buffera.data, bytes_read); 27 | while messages { 28 | found, `it, rest := split_from_left(messages, #char "\n"); 29 | if !found break; 30 | #insert (break=break outer) loop_body; 31 | messages = rest; 32 | } 33 | } 34 | } 35 | 36 | read_input :: (main_thread: bool, nodes: int, maxnodes: int, time_begin: float64, movetime: int) #expand { 37 | if stopping == true then 38 | `return 0; 39 | 40 | if (nodes & 8191) == 8191 { 41 | if nodes >= maxnodes { 42 | stopping = true; 43 | `return 0; 44 | } 45 | 46 | if main_thread == false 47 | return; 48 | 49 | if (nodes & 8191) == 8191 { 50 | time := seconds_since_init(); 51 | left: int = xx (1000.0 * (time - time_begin)); 52 | if left > movetime { 53 | stopping = true; 54 | `return 0; 55 | } 56 | } 57 | 58 | bytes_read := read(STDIN_FILENO, bufferb.data, bufferb.count-1); 59 | if bytes_read > 0 { 60 | str := to_string(bufferb.data, bytes_read-1); // -1 the '\n' char 61 | while str { 62 | found, msg, rest := split_from_left(str, #char "\n"); 63 | if equal(str, "isready") { 64 | print("readyok\n"); 65 | } 66 | 67 | if equal(str, "stop") { 68 | stopping = true; 69 | `return 0; 70 | } 71 | 72 | if equal(str, "quit") { 73 | exit(0); 74 | } 75 | 76 | str = rest; 77 | if !found break; 78 | } 79 | } 80 | } 81 | } 82 | 83 | 84 | stop :: () -> bool #expand { 85 | return stopping == true; 86 | } 87 | 88 | #scope_file 89 | buffera: [4096] u8; 90 | bufferb: [4096] u8; 91 | stopping: bool = false; 92 | 93 | #import "POSIX"; 94 | #import "Basic"; 95 | #import "String"; 96 | #import "System"; // For get_path_of_running_executable. 97 | -------------------------------------------------------------------------------- /macos.jai: -------------------------------------------------------------------------------- 1 | // contains mac OS specific code. 2 | // this is just copy/paste the linux code as macos. 3 | 4 | EXE :: ""; // nothing. 5 | NEWLINE :: "\n"; 6 | 7 | OS :: struct {} 8 | 9 | // defines the AI uci message loop. 10 | getline :: (os: *OS, loop_body: Code, flags: For_Flags) #expand { 11 | 12 | nonblock_stdin :: () #expand { 13 | flags := fcntl(STDIN_FILENO, F_GETFL, 0) | O_NONBLOCK; 14 | fcntl(STDIN_FILENO, F_SETFL, flags); 15 | } 16 | 17 | nonblock_stdin(); 18 | `it_index := 0; 19 | while outer := true { 20 | stopping = false; 21 | memset(buffera.data, 0, size_of(type_of(buffera))); 22 | memset(bufferb.data, 0, size_of(type_of(bufferb))); 23 | bytes_read := read(STDIN_FILENO, buffera.data, buffera.count-1); 24 | if bytes_read < 0 { 25 | sleep_milliseconds(25); 26 | continue; 27 | } 28 | 29 | messages := to_string(buffera.data, bytes_read); 30 | while messages { 31 | found, `it, rest := split_from_left(messages, #char "\n"); 32 | if !found break; 33 | #insert (break=break outer) loop_body; 34 | messages = rest; 35 | } 36 | } 37 | } 38 | 39 | read_input :: (main_thread: bool, nodes: int, maxnodes: int, time_begin: float64, movetime: int) #expand { 40 | if stopping == true then 41 | `return 0; 42 | 43 | if (nodes & 8191) == 8191 { 44 | if nodes >= maxnodes { 45 | stopping = true; 46 | `return 0; 47 | } 48 | 49 | if main_thread == false 50 | return; 51 | 52 | if (nodes & 8191) == 8191 { 53 | time := get_time(); 54 | left: int = xx (1000.0 * (time - time_begin)); 55 | if left > movetime { 56 | stopping = true; 57 | `return 0; 58 | } 59 | } 60 | 61 | bytes_read := read(STDIN_FILENO, bufferb.data, bufferb.count-1); 62 | if bytes_read > 0 { 63 | str := to_string(bufferb.data, bytes_read-1); // -1 the '\n' char 64 | while str { 65 | found, msg, rest := split_from_left(str, #char "\n"); 66 | if equal(str, "isready") { 67 | print("readyok\n"); 68 | } 69 | 70 | if equal(str, "stop") { 71 | stopping = true; 72 | `return 0; 73 | } 74 | 75 | if equal(str, "quit") { 76 | exit(0); 77 | } 78 | 79 | str = rest; 80 | if !found break; 81 | } 82 | } 83 | } 84 | } 85 | 86 | 87 | stop :: () -> bool #expand { 88 | return stopping == true; 89 | } 90 | 91 | #scope_file 92 | buffera: [4096] u8; 93 | bufferb: [4096] u8; 94 | stopping: bool = false; 95 | 96 | #import "POSIX"; 97 | #import "Basic"; 98 | #import "String"; 99 | #import "System"; // For get_path_of_running_executable. 100 | -------------------------------------------------------------------------------- /nnue_avx2.jai: -------------------------------------------------------------------------------- 1 | #run { 2 | nnue_default :: "resources/nn-04cf2b4ed1da.nnue"; 3 | if nnue_init(nnue_default) { 4 | print("NNUE % initialized\n", nnue_default); 5 | } else { 6 | assert(false, "Error. Neural Network is not initialized.\n"); 7 | } 8 | } 9 | 10 | nnue_startup :: () #expand {} // initialization is done at compile time. 11 | 12 | nnue_init :: (file_name: string) -> bool { 13 | 14 | read_hidden_weights :: (weight: []s8, dims: int, d: *s8) -> *s8 { 15 | 16 | wt_idx :: (r: u32, c: u32, dims: int) -> u32 { 17 | if dims > 32 { 18 | b: u32 = c & 0x18; 19 | b = (b << 1) | (b >> 1); 20 | c = xx ((c & ~0x18) | (b & 0x18)); 21 | } 22 | return c * 32 + r; 23 | } 24 | 25 | i := 0; 26 | for r: 0..cast(u32)31 { 27 | for c: 0..cast(u32)(dims-1) { 28 | index := wt_idx(r, c, dims); 29 | weight[index] = d.*; 30 | d += 1; 31 | } 32 | } 33 | 34 | return d; 35 | } 36 | 37 | read_output_weights :: (weight: []s8, data: *s8) { 38 | for i: 0..31 { 39 | weight[i] = << data; 40 | data += 1; 41 | } 42 | } 43 | 44 | permute_biases :: (biases: *s32) #expand { 45 | rdi := biases; 46 | // translated from godbolt's clang -O3 assembly language output. 47 | #asm AVX { 48 | movdqa.x xmm0: vec, [rdi+16]; 49 | movdqa.x xmm1: vec, [rdi+32]; 50 | movdqa.x xmm2: vec, [rdi+48]; 51 | movdqa.x xmm3: vec, [rdi+64]; 52 | movdqa.x xmm4: vec, [rdi+80]; 53 | movdqa.x xmm5: vec, [rdi+96]; 54 | 55 | movdqa.x [rdi+16], xmm3; 56 | movdqa.x [rdi+32], xmm0; 57 | movdqa.x [rdi+48], xmm4; 58 | movdqa.x [rdi+64], xmm1; 59 | movdqa.x [rdi+80], xmm5; 60 | movdqa.x [rdi+96], xmm2; 61 | } 62 | } 63 | verify_file :: (buffer: [] u8) -> bool { 64 | if buffer.count != 21022697 then 65 | return false; 66 | d := buffer.data; 67 | if < int { 147 | a_nnue: [3] *NNUEdata; 148 | a_nnue[0] = null; 149 | a_nnue[1] = null; 150 | a_nnue[2] = null; 151 | 152 | i := 0; 153 | while i<3 && chess.ply >= i { 154 | a_nnue[i] = *chess.nnue[chess.ply - i]; 155 | i += 1; 156 | } 157 | 158 | using chess.chess; 159 | return nnue_evaluate_pos(chess, a_nnue); 160 | } 161 | 162 | nnue_evaluate_board :: (chess: Chess) -> int { 163 | nnue: NNUEdata #align 32; 164 | nnue.accumulator.computedAccumulation = 0; 165 | nnue_data: [3] *NNUEdata; 166 | nnue_data[0] = *nnue; 167 | nnue_data[1] = null; 168 | nnue_data[2] = null; 169 | return nnue_evaluate_pos(*chess, nnue_data); 170 | } 171 | 172 | DirtyPiece :: struct { 173 | dirtyNum: s32; 174 | pc : [3] s32; 175 | from : [3] s32; 176 | to : [3] s32; 177 | } 178 | 179 | Accumulator :: struct { 180 | padding: [1088] u8; 181 | #place padding; 182 | accumulation: [2][256] s16 #align 64; 183 | computedAccumulation: s32; 184 | } 185 | 186 | NNUEdata :: struct { 187 | padding: [1152] u8; 188 | #place padding; 189 | accumulator: Accumulator; 190 | dirtyPiece: DirtyPiece; 191 | } 192 | 193 | #scope_file 194 | NNUE_Model :: struct { 195 | // features: 196 | ft_biases: [kHalfDimensions] s16 #align 64; 197 | ft_weights: [kHalfDimensions*FtInDims] s16 #align 64; 198 | 199 | // weights: 200 | hidden1_weights: [64*512] s8 #align 64; 201 | hidden2_weights: [64*32] s8 #align 64; 202 | output_weights: [1*32] s8 #align 64; 203 | 204 | // biases: 205 | hidden1_biases: [32] s32 #align 64; 206 | hidden2_biases: [32] s32 #align 64; 207 | output_biases : [1] s32 #align 64; 208 | } 209 | 210 | using #no_reset nnue_model: NNUE_Model #align 64; 211 | 212 | // dimensions 213 | kHalfDimensions :: 256; 214 | FtInDims :: 64*PS_END; // 63 * 641 215 | FtOutDims :: kHalfDimensions*2; 216 | NnueVersion : u32 : 0x7AF32F16; 217 | TransformerStart :: 3*4 + 177; 218 | NetworkStart :: TransformerStart+4 + 2*256 + 2*256*64*641; 219 | 220 | IndexList :: struct { 221 | size: s32; 222 | values: [30] s32; 223 | } 224 | 225 | nnue_evaluate_pos :: (chess: *Chess, nnue: [3] *NNUEdata) -> s32 { 226 | Data :: struct { 227 | input: [FtOutDims] s8; 228 | hidden1_out: [32] s8; 229 | hidden2_out: [32] s8; 230 | } 231 | 232 | input_mask: [FtOutDims / (8 * size_of(u32)) ] u32; 233 | hidden1_mask: [8 / size_of(u32)] u32; 234 | using data: Data #align 32; 235 | 236 | transform(chess, nnue, *input[0], *input_mask[0]); 237 | affine_txfm(*input[0], *hidden1_out[0], FtOutDims, 32, *hidden1_biases[0], *hidden1_weights[0], *input_mask[0], *hidden1_mask[0], true); 238 | affine_txfm(*hidden1_out[0], *hidden2_out[0], 32, 32, *hidden2_biases[0], *hidden2_weights[0], *hidden1_mask[0], null, false); 239 | out_value := inline affine_propagate(*hidden2_out[0], output_biases[0], *output_weights[0]); 240 | FV_SCALE :: 16; 241 | return out_value / FV_SCALE; 242 | } 243 | 244 | m256 :: union { 245 | i8x32 : [32] s8; 246 | i16x16: [16] s16; 247 | i32x8 : [8] s32; 248 | i64x4 : [4] s64; 249 | } 250 | 251 | update_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) -> bool { 252 | 253 | acc_if :: inline (prevAcc: **Accumulator, nnue: *NNUEdata) -> bool { 254 | if !nnue then 255 | return true; 256 | prevAcc.* = *nnue.accumulator; 257 | return !prevAcc.*.computedAccumulation; 258 | } 259 | 260 | accumulator := *nnue[0].accumulator; 261 | if accumulator.computedAccumulation then 262 | return true; 263 | prevAcc: *Accumulator = null; 264 | if acc_if(*prevAcc, nnue[1]) && acc_if(*prevAcc, nnue[2]) then 265 | return false; 266 | removed_indices: [2] IndexList; 267 | added_indices: [2] IndexList; 268 | reset: [2] bool; 269 | removed_indices[0].size = 0; 270 | removed_indices[1].size = 0; 271 | added_indices[0].size = 0; 272 | added_indices[1].size = 0; 273 | append_changed_indices(chess, nnue, removed_indices, added_indices, reset); 274 | for c: 0..1 { 275 | accTile := *accumulator.accumulation[c][0]; 276 | r := reset[c]; 277 | tile := ifx r then *ft_biases[0] else *prevAcc.accumulation[c][0]; 278 | #asm AVX, AVX2 { 279 | movdqa.y ymm0: vec, [tile + 0x000]; 280 | movdqa.y ymm1: vec, [tile + 0x020]; 281 | movdqa.y ymm2: vec, [tile + 0x040]; 282 | movdqa.y ymm3: vec, [tile + 0x060]; 283 | movdqa.y ymm4: vec, [tile + 0x080]; 284 | movdqa.y ymm5: vec, [tile + 0x0a0]; 285 | movdqa.y ymm6: vec, [tile + 0x0c0]; 286 | movdqa.y ymm7: vec, [tile + 0x0e0]; 287 | movdqa.y ymm8: vec, [tile + 0x100]; 288 | movdqa.y ymm9: vec, [tile + 0x120]; 289 | movdqa.y ymm10: vec, [tile + 0x140]; 290 | movdqa.y ymm11: vec, [tile + 0x160]; 291 | movdqa.y ymm12: vec, [tile + 0x180]; 292 | movdqa.y ymm13: vec, [tile + 0x1a0]; 293 | movdqa.y ymm14: vec, [tile + 0x1c0]; 294 | movdqa.y ymm15: vec, [tile + 0x1e0]; 295 | } 296 | if r == false { 297 | // Difference calculation for the deactivated features 298 | for k: 0..removed_indices[c].size-1 { 299 | index := removed_indices[c].values[k] * kHalfDimensions; 300 | subtile := *ft_weights[index]; 301 | #asm AVX, AVX2 { 302 | psubw.y ymm0, ymm0, [subtile + 0x000]; 303 | psubw.y ymm1, ymm1, [subtile + 0x020]; 304 | psubw.y ymm2, ymm2, [subtile + 0x040]; 305 | psubw.y ymm3, ymm3, [subtile + 0x060]; 306 | psubw.y ymm4, ymm4, [subtile + 0x080]; 307 | psubw.y ymm5, ymm5, [subtile + 0x0a0]; 308 | psubw.y ymm6, ymm6, [subtile + 0x0c0]; 309 | psubw.y ymm7, ymm7, [subtile + 0x0e0]; 310 | psubw.y ymm8, ymm8, [subtile + 0x100]; 311 | psubw.y ymm9, ymm9, [subtile + 0x120]; 312 | psubw.y ymm10, ymm10, [subtile + 0x140]; 313 | psubw.y ymm11, ymm11, [subtile + 0x160]; 314 | psubw.y ymm12, ymm12, [subtile + 0x180]; 315 | psubw.y ymm13, ymm13, [subtile + 0x1a0]; 316 | psubw.y ymm14, ymm14, [subtile + 0x1c0]; 317 | psubw.y ymm15, ymm15, [subtile + 0x1e0]; 318 | } 319 | } 320 | } 321 | 322 | // Difference calculation for the activated features 323 | for k: 0..added_indices[c].size-1 { 324 | index := added_indices[c].values[k] * kHalfDimensions; 325 | addtile := *ft_weights[index]; 326 | #asm AVX, AVX2 { 327 | paddw.y ymm0, ymm0, [addtile + 0x000]; 328 | paddw.y ymm1, ymm1, [addtile + 0x020]; 329 | paddw.y ymm2, ymm2, [addtile + 0x040]; 330 | paddw.y ymm3, ymm3, [addtile + 0x060]; 331 | paddw.y ymm4, ymm4, [addtile + 0x080]; 332 | paddw.y ymm5, ymm5, [addtile + 0x0a0]; 333 | paddw.y ymm6, ymm6, [addtile + 0x0c0]; 334 | paddw.y ymm7, ymm7, [addtile + 0x0e0]; 335 | paddw.y ymm8, ymm8, [addtile + 0x100]; 336 | paddw.y ymm9, ymm9, [addtile + 0x120]; 337 | paddw.y ymm10, ymm10, [addtile + 0x140]; 338 | paddw.y ymm11, ymm11, [addtile + 0x160]; 339 | paddw.y ymm12, ymm12, [addtile + 0x180]; 340 | paddw.y ymm13, ymm13, [addtile + 0x1a0]; 341 | paddw.y ymm14, ymm14, [addtile + 0x1c0]; 342 | paddw.y ymm15, ymm15, [addtile + 0x1e0]; 343 | } 344 | } 345 | 346 | #asm AVX, AVX2 { 347 | movdqa.y [accTile + 0x000],ymm0; 348 | movdqa.y [accTile + 0x020],ymm1; 349 | movdqa.y [accTile + 0x040],ymm2; 350 | movdqa.y [accTile + 0x060],ymm3; 351 | movdqa.y [accTile + 0x080],ymm4; 352 | movdqa.y [accTile + 0x0a0],ymm5; 353 | movdqa.y [accTile + 0x0c0],ymm6; 354 | movdqa.y [accTile + 0x0e0],ymm7; 355 | movdqa.y [accTile + 0x100],ymm8; 356 | movdqa.y [accTile + 0x120],ymm9; 357 | movdqa.y [accTile + 0x140],ymm10; 358 | movdqa.y [accTile + 0x160],ymm11; 359 | movdqa.y [accTile + 0x180],ymm12; 360 | movdqa.y [accTile + 0x1a0],ymm13; 361 | movdqa.y [accTile + 0x1c0],ymm14; 362 | movdqa.y [accTile + 0x1e0],ymm15; 363 | } 364 | } 365 | 366 | accumulator.computedAccumulation = 1; 367 | return true; 368 | } 369 | 370 | refresh_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) { 371 | accumulator := *(nnue[0].accumulator); 372 | activeIndices: [2] IndexList; 373 | activeIndices[0].size = 0; 374 | activeIndices[1].size = 0; 375 | append_active_indices(chess, activeIndices); 376 | 377 | biases := *ft_biases[0]; 378 | for c: 0..1 { 379 | 380 | acc := *accumulator.accumulation[c][0]; 381 | #asm AVX, AVX2 { 382 | movdqa.y ymm0: vec, [biases + 0x000]; 383 | movdqa.y ymm1: vec, [biases + 0x020]; 384 | movdqa.y ymm2: vec, [biases + 0x040]; 385 | movdqa.y ymm3: vec, [biases + 0x060]; 386 | movdqa.y ymm4: vec, [biases + 0x080]; 387 | movdqa.y ymm5: vec, [biases + 0x0a0]; 388 | movdqa.y ymm6: vec, [biases + 0x0c0]; 389 | movdqa.y ymm7: vec, [biases + 0x0e0]; 390 | movdqa.y ymm8: vec, [biases + 0x100]; 391 | movdqa.y ymm9: vec, [biases + 0x120]; 392 | movdqa.y ymm10: vec, [biases + 0x140]; 393 | movdqa.y ymm11: vec, [biases + 0x160]; 394 | movdqa.y ymm12: vec, [biases + 0x180]; 395 | movdqa.y ymm13: vec, [biases + 0x1a0]; 396 | movdqa.y ymm14: vec, [biases + 0x1c0]; 397 | movdqa.y ymm15: vec, [biases + 0x1e0]; 398 | } 399 | 400 | for k: 0..activeIndices[c].size-1 { 401 | index := kHalfDimensions * activeIndices[c].values[k]; 402 | tile_add := *ft_weights[index]; 403 | #asm AVX, AVX2 { 404 | paddw.y ymm0, ymm0, [tile_add + 0x000]; 405 | paddw.y ymm1, ymm1, [tile_add + 0x020]; 406 | paddw.y ymm2, ymm2, [tile_add + 0x040]; 407 | paddw.y ymm3, ymm3, [tile_add + 0x060]; 408 | paddw.y ymm4, ymm4, [tile_add + 0x080]; 409 | paddw.y ymm5, ymm5, [tile_add + 0x0a0]; 410 | paddw.y ymm6, ymm6, [tile_add + 0x0c0]; 411 | paddw.y ymm7, ymm7, [tile_add + 0x0e0]; 412 | paddw.y ymm8, ymm8, [tile_add + 0x100]; 413 | paddw.y ymm9, ymm9, [tile_add + 0x120]; 414 | paddw.y ymm10, ymm10, [tile_add + 0x140]; 415 | paddw.y ymm11, ymm11, [tile_add + 0x160]; 416 | paddw.y ymm12, ymm12, [tile_add + 0x180]; 417 | paddw.y ymm13, ymm13, [tile_add + 0x1a0]; 418 | paddw.y ymm14, ymm14, [tile_add + 0x1c0]; 419 | paddw.y ymm15, ymm15, [tile_add + 0x1e0]; 420 | } 421 | } 422 | #asm AVX, AVX2 { 423 | movdqa.y [acc + 0x000],ymm0; 424 | movdqa.y [acc + 0x020],ymm1; 425 | movdqa.y [acc + 0x040],ymm2; 426 | movdqa.y [acc + 0x060],ymm3; 427 | movdqa.y [acc + 0x080],ymm4; 428 | movdqa.y [acc + 0x0a0],ymm5; 429 | movdqa.y [acc + 0x0c0],ymm6; 430 | movdqa.y [acc + 0x0e0],ymm7; 431 | movdqa.y [acc + 0x100],ymm8; 432 | movdqa.y [acc + 0x120],ymm9; 433 | movdqa.y [acc + 0x140],ymm10; 434 | movdqa.y [acc + 0x160],ymm11; 435 | movdqa.y [acc + 0x180],ymm12; 436 | movdqa.y [acc + 0x1a0],ymm13; 437 | movdqa.y [acc + 0x1c0],ymm14; 438 | movdqa.y [acc + 0x1e0],ymm15; 439 | } 440 | } 441 | 442 | accumulator.computedAccumulation = 1; 443 | } 444 | 445 | append_active_indices :: (chess: *Chess, active: [] IndexList) { 446 | half_kp_append_active_indices(chess, chess.w_king, 0, *active[0]); 447 | half_kp_append_active_indices(chess, chess.b_king, 1, *active[1]); 448 | } 449 | 450 | append_changed_indices :: (chess: *Chess, nnue: [3] *NNUEdata, removed: [] IndexList, added: [] IndexList, reset: [] bool) { 451 | dp := *nnue[0].dirtyPiece; 452 | if nnue[1].accumulator.computedAccumulation then { 453 | { 454 | king := chess.w_king; 455 | ksq := cast(s32) bsf(king); 456 | reset[0] = dp.pc[0] == 1; 457 | if reset[0] then { 458 | half_kp_append_active_indices(chess, king, 0, *added[0]); 459 | } else { 460 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 461 | } 462 | } 463 | { 464 | king := chess.b_king; 465 | ksq := cast(s32) bsf(king); 466 | reset[1] = dp.pc[0] == 7; 467 | if reset[1] then { 468 | half_kp_append_active_indices(chess, king, 1, *added[1]); 469 | } else { 470 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 471 | } 472 | } 473 | } else { 474 | dp2 := *nnue[1].dirtyPiece; 475 | { 476 | king := chess.w_king; 477 | ksq := cast(s32) bsf(king); 478 | reset[0] = dp.pc[0] == 1 || dp2.pc[0] == 1; 479 | if reset[0] then { 480 | half_kp_append_active_indices(chess, king, 0, *added[0]); 481 | } else { 482 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 483 | half_kp_append_changed_indices(ksq, 0, dp2, *removed[0], *added[0]); 484 | } 485 | } 486 | 487 | { 488 | king := chess.b_king; 489 | ksq := cast(s32) bsf(king); 490 | reset[1] = dp.pc[0] == 7 || dp2.pc[0] == 7; 491 | if reset[1] then { 492 | half_kp_append_active_indices(chess, king, 1, *added[1]); 493 | } else { 494 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 495 | half_kp_append_changed_indices(ksq, 1, dp2, *removed[1], *added[1]); 496 | } 497 | } 498 | } 499 | } 500 | 501 | half_kp_append_active_indices :: (chess: *Chess, king: u64, c: s32, active: *IndexList) { 502 | ksq := cast(s32) bsf(king); 503 | ksq = orient(c, ksq) * PS_END; 504 | occupied := chess.occupied; 505 | kings := chess.w_king | chess.b_king; 506 | occupied ^= kings; 507 | while occupied { 508 | sq := cast(s32) bsf(occupied); 509 | occupied &= occupied - 1; 510 | pc := cast(s32) chess.pieces[sq]; 511 | active.values[active.size] = make_index(xx c, sq, pc, ksq); 512 | active.size += 1; 513 | } 514 | } 515 | 516 | bsf :: (value: u64) -> int #expand { 517 | result: int = 0; 518 | #asm { bsf.q result, value; } 519 | return result; 520 | } 521 | 522 | half_kp_append_changed_indices :: (ksq: s32, c: s32, dp: DirtyPiece, removed: *IndexList, added: *IndexList) { 523 | ksq = orient(c, ksq) * PS_END; 524 | num := dp.dirtyNum - 1; 525 | for i: 0..num { 526 | pc := dp.pc[i]; 527 | if pc == 1 || pc == 7 continue; 528 | from := dp.from[i]; 529 | to := dp.to[i]; 530 | if from != 64 then { 531 | removed.values[removed.size] = make_index(c, from, pc, ksq); 532 | removed.size += 1; 533 | } 534 | 535 | if to != 64 then { 536 | added.values[added.size] = make_index(c, to, pc, ksq); 537 | added.size += 1; 538 | } 539 | } 540 | } 541 | 542 | make_index :: (c: s32, s: s32, pc: s32, ksq: s32) -> s32 #expand { 543 | return orient(c, s) + PieceToIndex[c][pc] + ksq; 544 | } 545 | 546 | orient :: (c: s32, s: s32) -> s32 { 547 | if c == 0 { 548 | return s; 549 | } else { 550 | return s ^ 0x3F; 551 | } 552 | } 553 | 554 | PS_W_PAWN :: 1; 555 | PS_B_PAWN :: 1*64 + 1; 556 | PS_W_KNIGHT :: 2*64 + 1; 557 | PS_B_KNIGHT :: 3*64 + 1; 558 | PS_W_BISHOP :: 4*64 + 1; 559 | PS_B_BISHOP :: 5*64 + 1; 560 | PS_W_ROOK :: 6*64 + 1; 561 | PS_B_ROOK :: 7*64 + 1; 562 | PS_W_QUEEN :: 8*64 + 1; 563 | PS_B_QUEEN :: 9*64 + 1; 564 | PS_END :: 10*64 + 1; 565 | 566 | PieceToIndex: [2][14] s32 = .[ 567 | s32.[0, 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 568 | 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 0], 569 | s32.[ 0, 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 570 | 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 0] 571 | ]; 572 | 573 | transform :: (chess: *Chess, nnue: [3] *NNUEdata, output: *s8, out_mask: *u32) { 574 | if !update_accumulator(chess, nnue) then 575 | refresh_accumulator(chess, nnue); 576 | accumulation: [][256] s16 = nnue[0].accumulator.accumulation; 577 | 578 | turn := chess.turn; 579 | for p: 0..1 { 580 | tile := *accumulation[turn][0]; 581 | #asm AVX, AVX2 { 582 | val: gpr; 583 | pxor.y zeroes: vec, zeroes, zeroes; 584 | movdqa.y ymm0: vec, [tile + 0x000]; 585 | packsswb.y ymm0, ymm0, [tile + 0x020]; 586 | movdqa.y ymm1: vec, [tile + 0x040]; 587 | packsswb.y ymm1, ymm1, [tile + 0x060]; 588 | movdqa.y ymm2: vec, [tile + 0x080]; 589 | packsswb.y ymm2, ymm2, [tile + 0x0a0]; 590 | movdqa.y ymm3: vec, [tile + 0x0c0]; 591 | packsswb.y ymm3, ymm3, [tile + 0x0e0]; 592 | movdqa.y ymm4: vec, [tile + 0x100]; 593 | packsswb.y ymm4, ymm4, [tile + 0x120]; 594 | movdqa.y ymm5: vec, [tile + 0x140]; 595 | packsswb.y ymm5, ymm5, [tile + 0x160]; 596 | movdqa.y ymm6: vec, [tile + 0x180]; 597 | packsswb.y ymm6, ymm6, [tile + 0x1a0]; 598 | movdqa.y ymm7: vec, [tile + 0x1c0]; 599 | packsswb.y ymm7, ymm7, [tile + 0x1e0]; 600 | 601 | movdqa.y [output + 0x000], ymm0; 602 | movdqa.y [output + 0x020], ymm1; 603 | movdqa.y [output + 0x040], ymm2; 604 | movdqa.y [output + 0x060], ymm3; 605 | movdqa.y [output + 0x080], ymm4; 606 | movdqa.y [output + 0x0a0], ymm5; 607 | movdqa.y [output + 0x0c0], ymm6; 608 | movdqa.y [output + 0x0e0], ymm7; 609 | pcmpgtb.y ymm0, ymm0, zeroes; 610 | pcmpgtb.y ymm1, ymm1, zeroes; 611 | pcmpgtb.y ymm2, ymm2, zeroes; 612 | pcmpgtb.y ymm3, ymm3, zeroes; 613 | pcmpgtb.y ymm4, ymm4, zeroes; 614 | pcmpgtb.y ymm5, ymm5, zeroes; 615 | pcmpgtb.y ymm6, ymm6, zeroes; 616 | pcmpgtb.y ymm7, ymm7, zeroes; 617 | pmovmskb val, ymm0; 618 | mov.d [out_mask + 0x00], val; 619 | pmovmskb val, ymm1; 620 | mov.d [out_mask + 0x04], val; 621 | pmovmskb val, ymm2; 622 | mov.d [out_mask + 0x08], val; 623 | pmovmskb val, ymm3; 624 | mov.d [out_mask + 0x0c], val; 625 | pmovmskb val, ymm4; 626 | mov.d [out_mask + 0x10], val; 627 | pmovmskb val, ymm5; 628 | mov.d [out_mask + 0x14], val; 629 | pmovmskb val, ymm6; 630 | mov.d [out_mask + 0x18], val; 631 | pmovmskb val, ymm7; 632 | mov.d [out_mask + 0x1c], val; 633 | 634 | add output, 0x100; 635 | add out_mask, 0x20; 636 | } 637 | 638 | turn ^= 1; 639 | } 640 | } 641 | 642 | affine_txfm :: (input: *s8, output: *s8, inDims: u32, outDims: u32, biases: *s32, weights: *s8, in_mask: *u32, out_mask: *u32, pack8_and_calc_mask: bool) #expand { 643 | 644 | // mask2_t = u64 645 | next_idx :: () -> bool #expand { 646 | #if pack8_and_calc_mask{ 647 | while v == 0 { 648 | offset += 8 * size_of(u64); 649 | if offset >= inDims then 650 | return false; 651 | v = << cast(*u64)((cast(*s8)in_mask) + (offset/8)); 652 | } 653 | 654 | idx = offset + bsf(v); 655 | v &= v - 1; 656 | return true; 657 | } else { 658 | if v == 0 then { 659 | return false; 660 | } 661 | 662 | idx = bsf(v); 663 | v &= v - 1; 664 | return true; 665 | } 666 | } 667 | 668 | 669 | #asm AVX, AVX2 { 670 | movdqa.y out_0: vec, [biases + 0]; 671 | movdqa.y out_1: vec, [biases + 32]; 672 | movdqa.y out_2: vec, [biases + 64]; 673 | movdqa.y out_3: vec, [biases + 96]; 674 | pxor.y kZero: vec, kZero, kZero; 675 | } 676 | 677 | // translated from => memcpy(&v, inMask, sizeof(mask2_t)); 678 | v := << cast(*u64)in_mask; 679 | idx: int = 0; 680 | offset: int = 0; 681 | while offset < inDims { 682 | if !next_idx() break; 683 | weights_data := *(cast(*m256)weights)[idx]; 684 | #asm AVX, AVX2 { 685 | // initialize first and second = 0. 686 | movdqa.y first: vec, [weights_data]; 687 | second: vec; 688 | } 689 | factor: s16 = input[idx]; 690 | if next_idx() { 691 | weights_data := *(cast(*m256)weights)[idx]; 692 | val: s16 = cast(s16) input[idx]; 693 | factor |= val << 8; 694 | #asm AVX, AVX2 { 695 | movdqa.y second, [weights_data]; 696 | } 697 | } else { 698 | #asm AVX, AVX2 { 699 | pxor.y second, second, second; 700 | } 701 | } 702 | 703 | #asm AVX, AVX2 { 704 | // __m256i mul = _mm256_set1_epi16(factor), prod, signs; 705 | // __m256i prod = _mm256_maddubs_epi16(mul, _mm256_unpacklo_epi8(first, second)); 706 | // __m256i signs = _mm256_cmpgt_epi16(kZero, prod); 707 | // out_0 = _mm256_add_epi32(out_0, _mm256_unpacklo_epi16(prod, signs)); 708 | // out_1 = _mm256_add_epi32(out_1, _mm256_unpackhi_epi16(prod, signs)); 709 | // prod = _mm256_maddubs_epi16(mul, _mm256_unpackhi_epi8(first, second)); 710 | // signs = _mm256_cmpgt_epi16(kZero, prod); 711 | // out_2 = _mm256_add_epi32(out_2, _mm256_unpacklo_epi16(prod, signs)); 712 | // out_3 = _mm256_add_epi32(out_3, _mm256_unpackhi_epi16(prod, signs)); 713 | 714 | movd mul: vec, factor; 715 | pbroadcastw.y mul, mul; 716 | punpcklbw.y prod: vec, first, second; 717 | pmaddubsw.y prod, mul, prod; 718 | pcmpgtw.y signs: vec, kZero, prod; 719 | punpcklwd.y xmm1: vec, prod, signs; 720 | paddd.y out_0, out_0, xmm1; 721 | punpckhwd.y xmm1, prod, signs; 722 | paddd.y out_1, out_1, xmm1; 723 | punpckhbw.y xmm1, first, second; 724 | pmaddubsw.y prod, mul, xmm1; 725 | pcmpgtw.y signs, kZero, prod; 726 | punpcklwd.y xmm1, prod, signs; 727 | paddd.y out_2, out_2, xmm1; 728 | punpckhwd.y xmm1, prod, signs; 729 | paddd.y out_3, out_3, xmm1; 730 | } 731 | } 732 | 733 | #asm AVX, AVX2 { 734 | // __m256i out16_0 = _mm256_srai_epi16(_mm256_packs_epi32(out_0, out_1), SHIFT); 735 | // __m256i out16_1 = _mm256_srai_epi16(_mm256_packs_epi32(out_2, out_3), SHIFT); 736 | // __m256i *outVec = (__m256i *)output; 737 | // outVec[0] = _mm256_packs_epi16(out16_0, out16_1); 738 | 739 | packssdw.y out_0, out_0, out_1; 740 | packssdw.y out_1, out_2, out_3; 741 | psraw.y out_0, out_0, 6; 742 | psraw.y out_1, out_1, 6; 743 | packsswb.y out_0, out_0, out_1; 744 | } 745 | 746 | #if pack8_and_calc_mask then { 747 | #asm AVX, AVX2 { 748 | // outMask[0] = _mm256_movemask_epi8(_mm256_cmpgt_epi8(outVec[0], kZero)); 749 | movdqa.y [output], out_0; 750 | pcmpgtb.y out_0, out_0, kZero; 751 | pmovmskb reg: gpr, out_0; 752 | mov.d [out_mask], reg; 753 | } 754 | } else { 755 | #asm AVX, AVX2 { 756 | // outVec[0] = _mm256_max_epi8(outVec[0], kZero); 757 | pmaxsb.y out_0, out_0, kZero; 758 | movdqa.y [output], out_0; 759 | } 760 | } 761 | } 762 | 763 | affine_propagate :: (input: *s8, biases: s32, weights: *s8) -> s32 #expand { 764 | eax: s32 = ---; 765 | #asm AVX, AVX2 { 766 | // __m256i prod = _mm256_maddubs_epi16(iv[0], row[0]); 767 | // prod = _mm256_madd_epi16(prod, _mm256_set1_epi16(1)); 768 | // __m128i sum = _mm_add_epi32(_mm256_castsi256_si128(prod), _mm256_extracti128_si256(prod, 1)); 769 | // sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x1b)); 770 | // return _mm_cvtsi128_si32(sum) + _mm_extract_epi32(sum, 1) + biases[0]; 771 | 772 | mov eax, 1; 773 | movdqa.y prod: vec, [input]; 774 | pmaddubsw.y prod, prod, [weights]; 775 | movd xmm0: vec, eax; 776 | pbroadcastw xmm0, xmm0; 777 | pmaddwd.y prod, prod, xmm0; 778 | 779 | 780 | 781 | extracti128 xmm0, prod, 1; 782 | paddd.x sum: vec, prod, xmm0; 783 | pshufd xmm0, sum, 0x1b; 784 | paddd.x sum, sum, xmm0; 785 | movd eax, sum; 786 | pextrd val: gpr, sum, 1; 787 | add eax, val; 788 | add eax, biases; 789 | } 790 | 791 | return eax; 792 | } 793 | 794 | #import "Basic"; 795 | #import "File"; 796 | 797 | 798 | 799 | 800 | -------------------------------------------------------------------------------- /nnue_cpu.jai: -------------------------------------------------------------------------------- 1 | #run { 2 | nnue_default :: "resources/nn-04cf2b4ed1da.nnue"; 3 | if nnue_init(nnue_default) { 4 | print("NNUE % initialized\n", nnue_default); 5 | } else { 6 | assert(false, "Error. Neural Network is not initialized.\n"); 7 | } 8 | } 9 | 10 | nnue_startup :: () #expand {} // initialization is done at compile time. 11 | 12 | nnue_init :: (file_name: string) -> bool { 13 | 14 | verify_file :: (buffer: [] u8) -> bool { 15 | if buffer.count != 21022697 then 16 | return false; 17 | d := buffer.data; 18 | if < *s8 { 32 | 33 | wt_idx :: (r: int, c: int, dims: int) -> int { 34 | return c * 32 + r; 35 | } 36 | i := 0; 37 | for r: 0..31 { 38 | for c: 0..dims-1 { 39 | index := wt_idx(r, c, dims); 40 | weight[index] = < int { 120 | a_nnue: [3] *NNUEdata; 121 | a_nnue[0] = null; 122 | a_nnue[1] = null; 123 | a_nnue[2] = null; 124 | 125 | i := 0; 126 | while i<3 && chess.ply >= i { 127 | a_nnue[i] = *chess.nnue[chess.ply - i]; 128 | i += 1; 129 | } 130 | 131 | using chess.chess; 132 | return nnue_evaluate_pos(chess, a_nnue); 133 | } 134 | 135 | nnue_evaluate_board :: (chess: Chess) -> int { 136 | nnue: NNUEdata; 137 | nnue.accumulator.computedAccumulation = 0; 138 | nnue_data: [3] *NNUEdata; 139 | nnue_data[0] = *nnue; 140 | nnue_data[1] = null; 141 | nnue_data[2] = null; 142 | return nnue_evaluate_pos(*chess, nnue_data); 143 | } 144 | 145 | DirtyPiece :: struct { 146 | dirtyNum: s32; 147 | pc : [3] s32; 148 | from : [3] s32; 149 | to : [3] s32; 150 | } 151 | 152 | Accumulator :: struct { 153 | padding: [1088] u8; 154 | #place padding; 155 | 156 | accumulation: [2][256] s16 #align 64; 157 | computedAccumulation: s32; 158 | } 159 | 160 | NNUEdata :: struct { 161 | padding: [1152] u8; 162 | #place padding; 163 | 164 | accumulator: Accumulator; 165 | dirtyPiece: DirtyPiece; 166 | } 167 | 168 | #scope_file 169 | NNUE_Model :: struct { 170 | // features: 171 | ft_biases: [kHalfDimensions] s16 #align 64; 172 | ft_weights: [kHalfDimensions*FtInDims] s16 #align 64; 173 | 174 | // weights: 175 | hidden1_weights: [64*512] s8 #align 64; 176 | hidden2_weights: [64*32] s8 #align 64; 177 | output_weights: [1*32] s8 #align 64; 178 | 179 | // biases: 180 | hidden1_biases: [32] s32 #align 64; 181 | hidden2_biases: [32] s32 #align 64; 182 | output_biases : [1] s32 #align 64; 183 | } 184 | 185 | #no_reset nnue_model: NNUE_Model #align 64; 186 | using nnue_model; 187 | 188 | // dimensions 189 | kHalfDimensions :: 256; 190 | FtInDims :: 64*PS_END; // 63 * 641 191 | FtOutDims :: kHalfDimensions*2; 192 | NnueVersion : u32 : 0x7AF32F16; 193 | TransformerStart :: 3*4 + 177; 194 | NetworkStart :: TransformerStart+4 + 2*256 + 2*256*64*641; 195 | 196 | IndexList :: struct { 197 | size: s32; 198 | values: [30] s32; 199 | } 200 | 201 | nnue_evaluate_pos :: (chess: *Chess, nnue: [3] *NNUEdata) -> s32 { 202 | input_mask: [FtOutDims / (8 * size_of(u32)) ] u32 #align 8; 203 | hidden1_mask: [8 / size_of(u32)] u32 #align 8; 204 | FV_SCALE :: 16; 205 | input: [FtOutDims] s8 #align 16; 206 | hidden1_out: [32] s8 #align 16; 207 | hidden2_out: [32] s8 #align 16; 208 | transform(chess, nnue, *input[0], *input_mask[0]); 209 | affine_txfm(*input[0], *hidden1_out[0], FtOutDims, 32, *hidden1_biases[0], *hidden1_weights[0], *input_mask[0], *hidden1_mask[0], true); 210 | affine_txfm(*hidden1_out[0], *hidden2_out[0], 32, 32, *hidden2_biases[0], *hidden2_weights[0], *hidden1_mask[0], null, false); 211 | out_value := inline affine_propagate(*hidden2_out[0], output_biases[0], *output_weights[0]); 212 | return out_value / FV_SCALE; 213 | } 214 | 215 | update_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) -> bool { 216 | 217 | acc_if :: inline (prevAcc: **Accumulator, nnue: *NNUEdata) -> bool { 218 | if !nnue then 219 | return true; 220 | prevAcc.* = *nnue.accumulator; 221 | return !prevAcc.*.computedAccumulation; 222 | } 223 | 224 | accumulator := *nnue[0].accumulator; 225 | if accumulator.computedAccumulation then 226 | return true; 227 | prevAcc: *Accumulator = null; 228 | if acc_if(*prevAcc, nnue[1]) && acc_if(*prevAcc, nnue[2]) then 229 | return false; 230 | removed_indices: [2] IndexList; 231 | added_indices: [2] IndexList; 232 | reset: [2] bool; 233 | removed_indices[0].size = 0; 234 | removed_indices[1].size = 0; 235 | added_indices[0].size = 0; 236 | added_indices[1].size = 0; 237 | append_changed_indices(chess, nnue, removed_indices, added_indices, reset); 238 | 239 | for c: 0..1 { 240 | if reset[c] then { 241 | memcpy(accumulator.accumulation[c].data, ft_biases.data, kHalfDimensions * size_of(s16)); 242 | } else { 243 | memcpy(accumulator.accumulation[c].data, prevAcc.accumulation[c].data, kHalfDimensions * size_of(s16)); 244 | // Difference calculation for the deactivated features 245 | for k: 0..removed_indices[c].size-1 { 246 | index := removed_indices[c].values[k]; 247 | offset := kHalfDimensions * index; 248 | for j: 0..kHalfDimensions-1 { 249 | accumulator.accumulation[c][j] -= ft_weights[offset + j]; 250 | } 251 | } 252 | } 253 | 254 | // Difference calculation for the activated features 255 | for k: 0..added_indices[c].size-1 { 256 | index := added_indices[c].values[k]; 257 | offset := kHalfDimensions * index; 258 | for j: 0..kHalfDimensions-1 { 259 | accumulator.accumulation[c][j] += ft_weights[offset + j]; 260 | } 261 | } 262 | } 263 | 264 | accumulator.computedAccumulation = 1; 265 | return true; 266 | } 267 | 268 | refresh_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) { 269 | accumulator := *(nnue[0].accumulator); 270 | activeIndices: [2] IndexList; 271 | activeIndices[0].size = 0; 272 | activeIndices[1].size = 0; 273 | append_active_indices(chess, activeIndices); 274 | for c: 0..1 { 275 | memcpy(accumulator.accumulation[c].data, ft_biases.data, kHalfDimensions * size_of(s16)); 276 | for k: 0..activeIndices[c].size-1 { 277 | index := activeIndices[c].values[k]; 278 | offset := kHalfDimensions * index; 279 | for j: 0..kHalfDimensions-1 { 280 | accumulator.accumulation[c][j] += ft_weights[offset + j]; 281 | } 282 | } 283 | } 284 | accumulator.computedAccumulation = 1; 285 | } 286 | 287 | append_active_indices :: (chess: *Chess, active: [] IndexList) { 288 | half_kp_append_active_indices(chess, chess.w_king, 0, *active[0]); 289 | half_kp_append_active_indices(chess, chess.b_king, 1, *active[1]); 290 | } 291 | 292 | append_changed_indices :: (chess: *Chess, nnue: [3] *NNUEdata, removed: [] IndexList, added: [] IndexList, reset: [] bool) { 293 | dp := *nnue[0].dirtyPiece; 294 | if nnue[1].accumulator.computedAccumulation then { 295 | { 296 | king := chess.w_king; 297 | ksq := cast(s32) bsf(king); 298 | reset[0] = dp.pc[0] == 1; 299 | if reset[0] then { 300 | half_kp_append_active_indices(chess, king, 0, *added[0]); 301 | } else { 302 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 303 | } 304 | } 305 | { 306 | king := chess.b_king; 307 | ksq := cast(s32) bsf(king); 308 | reset[1] = dp.pc[0] == 7; 309 | if reset[1] then { 310 | half_kp_append_active_indices(chess, king, 1, *added[1]); 311 | } else { 312 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 313 | } 314 | } 315 | } else { 316 | dp2 := *nnue[1].dirtyPiece; 317 | { 318 | king := chess.w_king; 319 | ksq := cast(s32) bsf(king); 320 | reset[0] = dp.pc[0] == 1 || dp2.pc[0] == 1; 321 | if reset[0] then { 322 | half_kp_append_active_indices(chess, king, 0, *added[0]); 323 | } else { 324 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 325 | half_kp_append_changed_indices(ksq, 0, dp2, *removed[0], *added[0]); 326 | } 327 | } 328 | 329 | { 330 | king := chess.b_king; 331 | ksq := cast(s32) bsf(king); 332 | reset[1] = dp.pc[0] == 7 || dp2.pc[0] == 7; 333 | if reset[1] then { 334 | half_kp_append_active_indices(chess, king, 1, *added[1]); 335 | } else { 336 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 337 | half_kp_append_changed_indices(ksq, 1, dp2, *removed[1], *added[1]); 338 | } 339 | } 340 | } 341 | } 342 | 343 | half_kp_append_active_indices :: (chess: *Chess, king: u64, c: s32, active: *IndexList) { 344 | ksq := cast(s32) bsf(king); 345 | ksq = orient(c, ksq) * PS_END; 346 | occupied := chess.occupied; 347 | kings := chess.w_king | chess.b_king; 348 | occupied ^= kings; 349 | while occupied { 350 | sq := cast(s32) bsf(occupied); 351 | occupied &= occupied - 1; 352 | pc := cast(s32) chess.pieces[sq]; 353 | active.values[active.size] = make_index(xx c, sq, pc, ksq); 354 | active.size += 1; 355 | } 356 | } 357 | 358 | bsf :: (value: u64) -> int #expand { 359 | result: int = 0; 360 | #asm { bsf.q result, value; } 361 | return result; 362 | } 363 | 364 | half_kp_append_changed_indices :: (ksq: s32, c: s32, dp: DirtyPiece, removed: *IndexList, added: *IndexList) { 365 | ksq = orient(c, ksq) * PS_END; 366 | num := dp.dirtyNum - 1; 367 | for i: 0..num { 368 | pc := dp.pc[i]; 369 | if pc == 1 || pc == 7 continue; 370 | from := dp.from[i]; 371 | to := dp.to[i]; 372 | if from != 64 then { 373 | removed.values[removed.size] = make_index(c, from, pc, ksq); 374 | removed.size += 1; 375 | } 376 | 377 | if to != 64 then { 378 | added.values[added.size] = make_index(c, to, pc, ksq); 379 | added.size += 1; 380 | } 381 | } 382 | } 383 | 384 | make_index :: (c: s32, s: s32, pc: s32, ksq: s32) -> s32 #expand { 385 | return orient(c, s) + PieceToIndex[c][pc] + ksq; 386 | } 387 | 388 | orient :: (c: s32, s: s32) -> s32 #expand { 389 | if c == 0 then { 390 | return s; 391 | } else { 392 | return s ^ 0x3F; 393 | } 394 | } 395 | 396 | PS_W_PAWN :: 1; 397 | PS_B_PAWN :: 1*64 + 1; 398 | PS_W_KNIGHT :: 2*64 + 1; 399 | PS_B_KNIGHT :: 3*64 + 1; 400 | PS_W_BISHOP :: 4*64 + 1; 401 | PS_B_BISHOP :: 5*64 + 1; 402 | PS_W_ROOK :: 6*64 + 1; 403 | PS_B_ROOK :: 7*64 + 1; 404 | PS_W_QUEEN :: 8*64 + 1; 405 | PS_B_QUEEN :: 9*64 + 1; 406 | PS_END :: 10*64 + 1; 407 | 408 | PieceToIndex: [2][14] s32 = .[ 409 | s32.[0, 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 410 | 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 0], 411 | s32.[ 0, 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 412 | 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 0] 413 | ]; 414 | 415 | transform :: (chess: *Chess, nnue: [3] *NNUEdata, output: *s8, out_mask: *u32) { 416 | if !update_accumulator(chess, nnue) then 417 | refresh_accumulator(chess, nnue); 418 | accumulation: [][256] s16 = nnue[0].accumulator.accumulation; 419 | offset := 0; 420 | p := chess.turn; 421 | for 0..1 { 422 | for i: 0..kHalfDimensions-1 { 423 | sum: s16 = accumulation[p][i]; 424 | output[offset + i] = cast(s8) clamp(sum, 0, 127); 425 | } 426 | offset += kHalfDimensions; 427 | p ^= 1; 428 | } 429 | } 430 | 431 | affine_txfm :: (input: *s8, output: *void, inDims: u32, $outDims: u32, biases: *s32, weights: *s8, in_mask: *u32, out_mask: *u32, pack8_and_calc_mask: bool) { 432 | 433 | tmp: [outDims] s32; 434 | for i: 0..outDims-1 { 435 | tmp[i] = biases[i]; 436 | } 437 | 438 | for idx: 0..inDims-1 { 439 | factor: s32 = input[idx]; 440 | if factor { 441 | for i: 0..outDims-1 { 442 | tmp[i] += factor * weights[outDims * idx + i]; 443 | } 444 | } 445 | } 446 | 447 | outVec := cast(*s8)output; 448 | for i: 0..outDims-1 { 449 | outVec[i] = cast(s8) clamp(tmp[i] >> 6, 0, 127); 450 | } 451 | } 452 | 453 | affine_propagate :: (input: *s8, biases: s32, weights: *s8) -> s32 { 454 | sum := biases; 455 | for j: 0..31 { 456 | sum += cast(s32)weights[j] * cast(s32)input[j]; 457 | } 458 | return sum; 459 | } 460 | 461 | #import "Basic"; 462 | #import "File"; 463 | -------------------------------------------------------------------------------- /nnue_probe.jai: -------------------------------------------------------------------------------- 1 | // This is handcrafted bindings for Daniel Shawul's NNUE-Probe Library. 2 | // NNUE-Probe can be used to get an optimized SIMD Matrix Multiplication 3 | // for computer architectures that the current Jai Chess Engine does not 4 | // support with the inline assembly. 5 | // e.g. AVX512, MMX, ARM NEON, etc. 6 | // This library is completely optional, and is not a necessary component 7 | // of the Chess Engine. 8 | 9 | nnue_probe :: #library "libnnueprobe"; 10 | 11 | nnue_evaluate :: (player: s32, pieces: *s32, squares: *s32) -> s32 #foreign nnue_probe; 12 | nnue_evaluate_incremental :: (player: s32, pieces: *s32, squares: *s32, nnue: **NNUEdata) -> s32 #foreign nnue_probe; 13 | nnue_init :: (file_name: *u8) -> bool #foreign nnue_probe; 14 | 15 | nnue_evaluate_board :: (chess: *Chess) -> int { 16 | pieces, squares := initialize_piece_list(chess); 17 | return nnue_evaluate(xx chess.turn, *pieces[0], *squares[0]); 18 | } 19 | 20 | nnue_evaluate :: (chess: *ChessGame) -> int { 21 | pieces, squares := initialize_piece_list(chess); 22 | a_nnue: [3] *NNUEdata; 23 | a_nnue[0] = null; 24 | a_nnue[1] = null; 25 | a_nnue[2] = null; 26 | 27 | i := 0; 28 | while i<3 && chess.ply >= i { 29 | a_nnue[i] = chess.nnue.data + chess.ply - i; 30 | i += 1; 31 | } 32 | 33 | return nnue_evaluate_incremental(xx chess.turn, *pieces[0], *squares[0], *a_nnue[0]); 34 | } 35 | 36 | initialize_piece_list :: (chess: *Chess) -> [33] s32, [33] s32 { 37 | pieces : [33] s32; 38 | squares: [33] s32; 39 | pieces[0] = xx Piece.W_KING; 40 | squares[0] = xx bit_scan_forward(chess.w_king); 41 | pieces[1] = xx Piece.W_KING; 42 | squares[1] = xx bit_scan_forward(chess.b_king); 43 | 44 | index := 2; 45 | occupied := chess.occupied & ~(chess.w_king | chess.b_king); 46 | while occupied { 47 | sq := cast(s32) bit_scan_forward(occupied); 48 | piece := cast(s32) chess.pieces[sq]; 49 | pieces[index] = piece; 50 | squares[index] = sq; 51 | index += 1; 52 | occupied &= occupied - 1; 53 | } 54 | 55 | pieces[index] = 0; 56 | squares[index] = 0; 57 | 58 | return pieces, squares; 59 | } 60 | 61 | nnue_startup :: () #expand { // initialization is done at compile time. 62 | #import "File_Utilities"; 63 | 64 | nnue_default :: "resources/nn-04cf2b4ed1da.nnue"; 65 | if !file_exists(nnue_default) { 66 | print("Error. % does not exist", nnue_default); 67 | `return; 68 | } 69 | nnue_init(nnue_default); 70 | } 71 | 72 | DirtyPiece :: struct { 73 | dirtyNum: s32; 74 | pc : [3] s32; 75 | from : [3] s32; 76 | to : [3] s32; 77 | } 78 | 79 | Accumulator :: struct { 80 | padding: [1088] u8; 81 | #place padding; 82 | accumulation: [2][256] s16 #align 64; 83 | computedAccumulation: s32; 84 | } 85 | 86 | NNUEdata :: struct { 87 | padding: [1152] u8; 88 | #place padding; 89 | accumulator: Accumulator; 90 | dirtyPiece: DirtyPiece; 91 | } 92 | -------------------------------------------------------------------------------- /nnue_sse.jai: -------------------------------------------------------------------------------- 1 | #run { 2 | nnue_default :: "resources/nn-04cf2b4ed1da.nnue"; 3 | if nnue_init(nnue_default) { 4 | print("NNUE % initialized\n", nnue_default); 5 | } else { 6 | assert(false, "Error. Neural Network is not initialized.\n"); 7 | } 8 | } 9 | 10 | nnue_startup :: () #expand {} // initialization is done at compile time. 11 | 12 | nnue_init :: (file_name: string) -> bool { 13 | verify_file :: (buffer: [] u8) -> bool { 14 | if buffer.count != 21022697 then 15 | return false; 16 | d := buffer.data; 17 | if < *s8 { 31 | 32 | wt_idx :: (r: int, c: int, dims: int) -> int { 33 | return c * 32 + r; 34 | } 35 | 36 | i := 0; 37 | for r: 0..31 { 38 | for c: 0..dims-1 { 39 | index := wt_idx(r, c, dims); 40 | weight[index] = < int { 119 | a_nnue: [3] *NNUEdata; 120 | a_nnue[0] = null; 121 | a_nnue[1] = null; 122 | a_nnue[2] = null; 123 | 124 | i := 0; 125 | while i<3 && chess.ply >= i { 126 | a_nnue[i] = *chess.nnue[chess.ply - i]; 127 | i += 1; 128 | } 129 | 130 | using chess.chess; 131 | return nnue_evaluate_pos(chess, a_nnue); 132 | } 133 | 134 | nnue_evaluate_board :: (chess: Chess) -> int { 135 | nnue: NNUEdata #align 32; 136 | nnue.accumulator.computedAccumulation = 0; 137 | nnue_data: [3] *NNUEdata; 138 | nnue_data[0] = *nnue; 139 | nnue_data[1] = null; 140 | nnue_data[2] = null; 141 | return nnue_evaluate_pos(*chess, nnue_data); 142 | } 143 | 144 | DirtyPiece :: struct { 145 | dirtyNum: s32; 146 | pc : [3] s32; 147 | from : [3] s32; 148 | to : [3] s32; 149 | } 150 | 151 | Accumulator :: struct { 152 | padding: [1088] u8; 153 | #place padding; 154 | 155 | accumulation: [2][256] s16 #align 64; 156 | computedAccumulation: s32; 157 | } 158 | 159 | NNUEdata :: struct { 160 | padding: [1152] u8; 161 | #place padding; 162 | 163 | accumulator: Accumulator; 164 | dirtyPiece: DirtyPiece; 165 | } 166 | 167 | #scope_file 168 | NNUE_Model :: struct { 169 | // features: 170 | ft_biases: [kHalfDimensions] s16 #align 64; 171 | ft_weights: [kHalfDimensions*FtInDims] s16 #align 64; 172 | 173 | // weights: 174 | hidden1_weights: [64*512] s8 #align 64; 175 | hidden2_weights: [64*32] s8 #align 64; 176 | output_weights: [1*32] s8 #align 64; 177 | 178 | // biases: 179 | hidden1_biases: [32] s32 #align 64; 180 | hidden2_biases: [32] s32 #align 64; 181 | output_biases : [1] s32 #align 64; 182 | } 183 | 184 | #no_reset nnue_model: NNUE_Model #align 64; 185 | using nnue_model; 186 | 187 | // dimensions 188 | kHalfDimensions :: 256; 189 | FtInDims :: 64*PS_END; // 63 * 641 190 | FtOutDims :: kHalfDimensions*2; 191 | NnueVersion : u32 : 0x7AF32F16; 192 | TransformerStart :: 3*4 + 177; 193 | NetworkStart :: TransformerStart+4 + 2*256 + 2*256*64*641; 194 | 195 | Position :: struct { 196 | player: s32; 197 | pieces: *s32; 198 | squares: *s32; 199 | nnue: [3] *NNUEdata; 200 | } 201 | 202 | IndexList :: struct { 203 | size: s32; 204 | values: [30] s32; 205 | } 206 | 207 | nnue_evaluate_pos :: (chess: *Chess, nnue: [3] *NNUEdata) -> s32 { 208 | input_mask: [FtOutDims / (8 * size_of(u32)) ] u32 #align 8; 209 | hidden1_mask: [8 / size_of(u32)] u32 #align 8; 210 | FV_SCALE :: 16; 211 | input: [FtOutDims] s8 #align 16; 212 | hidden1_out: [32] s8 #align 16; 213 | hidden2_out: [32] s8 #align 16; 214 | transform(chess, nnue, *input[0], *input_mask[0]); 215 | affine_txfm(*input[0], *hidden1_out[0], FtOutDims, *hidden1_biases[0], *hidden1_weights[0]); 216 | affine_txfm(*hidden1_out[0], *hidden2_out[0], 32, *hidden2_biases[0], *hidden2_weights[0]); 217 | out_value := inline affine_propagate(*hidden2_out[0], output_biases[0], *output_weights[0]); 218 | return out_value / FV_SCALE; 219 | } 220 | 221 | update_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) -> bool { 222 | 223 | acc_if :: inline (prevAcc: **Accumulator, nnue: *NNUEdata) -> bool { 224 | if !nnue then 225 | return true; 226 | prevAcc.* = *nnue.accumulator; 227 | return !prevAcc.*.computedAccumulation; 228 | } 229 | 230 | accumulator := *nnue[0].accumulator; 231 | if accumulator.computedAccumulation then 232 | return true; 233 | prevAcc: *Accumulator = null; 234 | if acc_if(*prevAcc, nnue[1]) && acc_if(*prevAcc, nnue[2]) then 235 | return false; 236 | removed_indices: [2] IndexList; 237 | added_indices: [2] IndexList; 238 | reset: [2] bool; 239 | removed_indices[0].size = 0; 240 | removed_indices[1].size = 0; 241 | added_indices[0].size = 0; 242 | added_indices[1].size = 0; 243 | append_changed_indices(chess, nnue, removed_indices, added_indices, reset); 244 | 245 | for c: 0..1 { 246 | accindex := 0; 247 | while accindex < 256 { 248 | acc_tile := *accumulator.accumulation[c][accindex]; 249 | r := reset[c] == true; 250 | copy_tile := ifx r then *ft_biases[accindex] else *prevAcc.accumulation[c][accindex]; 251 | #asm SSE { 252 | movdqa.x xmm0: vec, [copy_tile + 0x00]; 253 | movdqa.x xmm1: vec, [copy_tile + 0x10]; 254 | movdqa.x xmm2: vec, [copy_tile + 0x20]; 255 | movdqa.x xmm3: vec, [copy_tile + 0x30]; 256 | movdqa.x xmm4: vec, [copy_tile + 0x40]; 257 | movdqa.x xmm5: vec, [copy_tile + 0x50]; 258 | movdqa.x xmm6: vec, [copy_tile + 0x60]; 259 | movdqa.x xmm7: vec, [copy_tile + 0x70]; 260 | movdqa.x xmm8: vec, [copy_tile + 0x80]; 261 | movdqa.x xmm9: vec, [copy_tile + 0x90]; 262 | movdqa.x xmm10: vec, [copy_tile + 0xa0]; 263 | movdqa.x xmm11: vec, [copy_tile + 0xb0]; 264 | movdqa.x xmm12: vec, [copy_tile + 0xc0]; 265 | movdqa.x xmm13: vec, [copy_tile + 0xd0]; 266 | movdqa.x xmm14: vec, [copy_tile + 0xe0]; 267 | movdqa.x xmm15: vec, [copy_tile + 0xf0]; 268 | } 269 | 270 | if r == false then { 271 | // Difference calculation for the deactivated features 272 | for k: 0..removed_indices[c].size-1 { 273 | index := removed_indices[c].values[k] * kHalfDimensions; 274 | sub_tile := *ft_weights[index + accindex]; 275 | #asm SSE { 276 | psubw.x xmm0, [sub_tile + 0x00]; 277 | psubw.x xmm1, [sub_tile + 0x10]; 278 | psubw.x xmm2, [sub_tile + 0x20]; 279 | psubw.x xmm3, [sub_tile + 0x30]; 280 | psubw.x xmm4, [sub_tile + 0x40]; 281 | psubw.x xmm5, [sub_tile + 0x50]; 282 | psubw.x xmm6, [sub_tile + 0x60]; 283 | psubw.x xmm7, [sub_tile + 0x70]; 284 | psubw.x xmm8, [sub_tile + 0x80]; 285 | psubw.x xmm9, [sub_tile + 0x90]; 286 | psubw.x xmm10, [sub_tile + 0xa0]; 287 | psubw.x xmm11, [sub_tile + 0xb0]; 288 | psubw.x xmm12, [sub_tile + 0xc0]; 289 | psubw.x xmm13, [sub_tile + 0xd0]; 290 | psubw.x xmm14, [sub_tile + 0xe0]; 291 | psubw.x xmm15, [sub_tile + 0xf0]; 292 | } 293 | } 294 | } 295 | 296 | // Difference calculation for the activated features 297 | for k: 0..added_indices[c].size-1 { 298 | index := added_indices[c].values[k] * kHalfDimensions; 299 | add_tile := *ft_weights[index + accindex]; 300 | #asm SSE { 301 | paddw.x xmm0, [add_tile + 0x00]; 302 | paddw.x xmm1, [add_tile + 0x10]; 303 | paddw.x xmm2, [add_tile + 0x20]; 304 | paddw.x xmm3, [add_tile + 0x30]; 305 | paddw.x xmm4, [add_tile + 0x40]; 306 | paddw.x xmm5, [add_tile + 0x50]; 307 | paddw.x xmm6, [add_tile + 0x60]; 308 | paddw.x xmm7, [add_tile + 0x70]; 309 | paddw.x xmm8, [add_tile + 0x80]; 310 | paddw.x xmm9, [add_tile + 0x90]; 311 | paddw.x xmm10, [add_tile + 0xa0]; 312 | paddw.x xmm11, [add_tile + 0xb0]; 313 | paddw.x xmm12, [add_tile + 0xc0]; 314 | paddw.x xmm13, [add_tile + 0xd0]; 315 | paddw.x xmm14, [add_tile + 0xe0]; 316 | paddw.x xmm15, [add_tile + 0xf0]; 317 | } 318 | } 319 | 320 | #asm SSE { 321 | movdqa.x [acc_tile + 0x00], xmm0; 322 | movdqa.x [acc_tile + 0x10], xmm1; 323 | movdqa.x [acc_tile + 0x20], xmm2; 324 | movdqa.x [acc_tile + 0x30], xmm3; 325 | movdqa.x [acc_tile + 0x40], xmm4; 326 | movdqa.x [acc_tile + 0x50], xmm5; 327 | movdqa.x [acc_tile + 0x60], xmm6; 328 | movdqa.x [acc_tile + 0x70], xmm7; 329 | movdqa.x [acc_tile + 0x80], xmm8; 330 | movdqa.x [acc_tile + 0x90], xmm9; 331 | movdqa.x [acc_tile + 0xa0], xmm10; 332 | movdqa.x [acc_tile + 0xb0], xmm11; 333 | movdqa.x [acc_tile + 0xc0], xmm12; 334 | movdqa.x [acc_tile + 0xd0], xmm13; 335 | movdqa.x [acc_tile + 0xe0], xmm14; 336 | movdqa.x [acc_tile + 0xf0], xmm15; 337 | } 338 | accindex += 128; 339 | } 340 | } 341 | accumulator.computedAccumulation = 1; 342 | return true; 343 | } 344 | 345 | refresh_accumulator :: (chess: *Chess, nnue: [3] *NNUEdata) { 346 | accumulator := *(nnue[0].accumulator); 347 | activeIndices: [2] IndexList; 348 | activeIndices[0].size = 0; 349 | activeIndices[1].size = 0; 350 | append_active_indices(chess, activeIndices); 351 | 352 | for c: 0..1 { 353 | accindex := 0; 354 | while accindex < 256 { 355 | tile := *ft_biases[accindex]; 356 | acc_tile := *accumulator.accumulation[c][accindex]; 357 | #asm SSE { 358 | movdqa.x xmm0: vec, [tile + 0x00]; 359 | movdqa.x xmm1: vec, [tile + 0x10]; 360 | movdqa.x xmm2: vec, [tile + 0x20]; 361 | movdqa.x xmm3: vec, [tile + 0x30]; 362 | movdqa.x xmm4: vec, [tile + 0x40]; 363 | movdqa.x xmm5: vec, [tile + 0x50]; 364 | movdqa.x xmm6: vec, [tile + 0x60]; 365 | movdqa.x xmm7: vec, [tile + 0x70]; 366 | movdqa.x xmm8: vec, [tile + 0x80]; 367 | movdqa.x xmm9: vec, [tile + 0x90]; 368 | movdqa.x xmm10: vec, [tile + 0xa0]; 369 | movdqa.x xmm11: vec, [tile + 0xb0]; 370 | movdqa.x xmm12: vec, [tile + 0xc0]; 371 | movdqa.x xmm13: vec, [tile + 0xd0]; 372 | movdqa.x xmm14: vec, [tile + 0xe0]; 373 | movdqa.x xmm15: vec, [tile + 0xf0]; 374 | } 375 | for k: 0..activeIndices[c].size-1 { 376 | index := activeIndices[c].values[k]; 377 | offset := kHalfDimensions * index; 378 | add_tile := *ft_weights[offset + accindex]; 379 | #asm SSE { 380 | paddw.x xmm0, [add_tile + 0x00]; 381 | paddw.x xmm1, [add_tile + 0x10]; 382 | paddw.x xmm2, [add_tile + 0x20]; 383 | paddw.x xmm3, [add_tile + 0x30]; 384 | paddw.x xmm4, [add_tile + 0x40]; 385 | paddw.x xmm5, [add_tile + 0x50]; 386 | paddw.x xmm6, [add_tile + 0x60]; 387 | paddw.x xmm7, [add_tile + 0x70]; 388 | paddw.x xmm8, [add_tile + 0x80]; 389 | paddw.x xmm9, [add_tile + 0x90]; 390 | paddw.x xmm10, [add_tile + 0xa0]; 391 | paddw.x xmm11, [add_tile + 0xb0]; 392 | paddw.x xmm12, [add_tile + 0xc0]; 393 | paddw.x xmm13, [add_tile + 0xd0]; 394 | paddw.x xmm14, [add_tile + 0xe0]; 395 | paddw.x xmm15, [add_tile + 0xf0]; 396 | } 397 | } 398 | #asm SSE { 399 | movdqa.x [acc_tile + 0x00], xmm0; 400 | movdqa.x [acc_tile + 0x10], xmm1; 401 | movdqa.x [acc_tile + 0x20], xmm2; 402 | movdqa.x [acc_tile + 0x30], xmm3; 403 | movdqa.x [acc_tile + 0x40], xmm4; 404 | movdqa.x [acc_tile + 0x50], xmm5; 405 | movdqa.x [acc_tile + 0x60], xmm6; 406 | movdqa.x [acc_tile + 0x70], xmm7; 407 | movdqa.x [acc_tile + 0x80], xmm8; 408 | movdqa.x [acc_tile + 0x90], xmm9; 409 | movdqa.x [acc_tile + 0xa0], xmm10; 410 | movdqa.x [acc_tile + 0xb0], xmm11; 411 | movdqa.x [acc_tile + 0xc0], xmm12; 412 | movdqa.x [acc_tile + 0xd0], xmm13; 413 | movdqa.x [acc_tile + 0xe0], xmm14; 414 | movdqa.x [acc_tile + 0xf0], xmm15; 415 | } 416 | accindex += 128; 417 | } 418 | } 419 | accumulator.computedAccumulation = 1; 420 | } 421 | 422 | append_active_indices :: (chess: *Chess, active: []IndexList) { 423 | half_kp_append_active_indices(chess, chess.w_king, 0, *active[0]); 424 | half_kp_append_active_indices(chess, chess.b_king, 1, *active[1]); 425 | } 426 | 427 | append_changed_indices :: (chess: *Chess, nnue: [3] *NNUEdata, removed: [] IndexList, added:[] IndexList, reset: [] bool) { 428 | dp := *nnue[0].dirtyPiece; 429 | if nnue[1].accumulator.computedAccumulation then { 430 | { 431 | king := chess.w_king; 432 | ksq := cast(s32) bsf(king); 433 | reset[0] = dp.pc[0] == 1; 434 | if reset[0] then { 435 | half_kp_append_active_indices(chess, king, 0, *added[0]); 436 | } else { 437 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 438 | } 439 | } 440 | { 441 | king := chess.b_king; 442 | ksq := cast(s32) bsf(king); 443 | reset[1] = dp.pc[0] == 7; 444 | if reset[1] then { 445 | half_kp_append_active_indices(chess, king, 1, *added[1]); 446 | } else { 447 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 448 | } 449 | } 450 | } else { 451 | dp2 := *nnue[1].dirtyPiece; 452 | { 453 | king := chess.w_king; 454 | ksq := cast(s32) bsf(king); 455 | reset[0] = dp.pc[0] == 1 || dp2.pc[0] == 1; 456 | if reset[0] then { 457 | half_kp_append_active_indices(chess, king, 0, *added[0]); 458 | } else { 459 | half_kp_append_changed_indices(ksq, 0, dp, *removed[0], *added[0]); 460 | half_kp_append_changed_indices(ksq, 0, dp2, *removed[0], *added[0]); 461 | } 462 | } 463 | 464 | { 465 | king := chess.b_king; 466 | ksq := cast(s32) bsf(king); 467 | reset[1] = dp.pc[0] == 7 || dp2.pc[0] == 7; 468 | if reset[1] then { 469 | half_kp_append_active_indices(chess, king, 1, *added[1]); 470 | } else { 471 | half_kp_append_changed_indices(ksq, 1, dp, *removed[1], *added[1]); 472 | half_kp_append_changed_indices(ksq, 1, dp2, *removed[1], *added[1]); 473 | } 474 | } 475 | } 476 | } 477 | 478 | half_kp_append_active_indices :: (chess: *Chess, king: u64, c: s32, active: *IndexList) { 479 | ksq := cast(s32) bsf(king); 480 | ksq = orient(c, ksq) * PS_END; 481 | occupied := chess.occupied; 482 | kings := chess.w_king | chess.b_king; 483 | occupied ^= kings; 484 | while occupied { 485 | sq := cast(s32) bsf(occupied); 486 | occupied &= occupied - 1; 487 | pc := cast(s32) chess.pieces[sq]; 488 | active.values[active.size] = make_index(xx c, sq, pc, ksq); 489 | active.size += 1; 490 | } 491 | } 492 | 493 | bsf :: (value: u64) -> int #expand { 494 | result: int = 0; 495 | #asm { bsf.q result, value; } 496 | return result; 497 | } 498 | 499 | half_kp_append_changed_indices :: (ksq: s32, c: s32, dp: DirtyPiece, removed: *IndexList, added: *IndexList) { 500 | ksq = orient(c, ksq) * PS_END; 501 | num := dp.dirtyNum - 1; 502 | for i: 0..num { 503 | pc := dp.pc[i]; 504 | if pc == 1 || pc == 7 continue; 505 | from := dp.from[i]; 506 | to := dp.to[i]; 507 | if from != 64 then { 508 | removed.values[removed.size] = make_index(c, from, pc, ksq); 509 | removed.size += 1; 510 | } 511 | 512 | if to != 64 then { 513 | added.values[added.size] = make_index(c, to, pc, ksq); 514 | added.size += 1; 515 | } 516 | } 517 | } 518 | 519 | make_index :: (c: s32, s: s32, pc: s32, ksq: s32) -> s32 #expand { 520 | return orient(c, s) + PieceToIndex[c][pc] + ksq; 521 | } 522 | 523 | orient :: (c: s32, s: s32) -> s32 #expand { 524 | if c == 0 then { 525 | return s; 526 | } else { 527 | return s ^ 0x3F; 528 | } 529 | } 530 | 531 | PS_W_PAWN :: 1; 532 | PS_B_PAWN :: 1*64 + 1; 533 | PS_W_KNIGHT :: 2*64 + 1; 534 | PS_B_KNIGHT :: 3*64 + 1; 535 | PS_W_BISHOP :: 4*64 + 1; 536 | PS_B_BISHOP :: 5*64 + 1; 537 | PS_W_ROOK :: 6*64 + 1; 538 | PS_B_ROOK :: 7*64 + 1; 539 | PS_W_QUEEN :: 8*64 + 1; 540 | PS_B_QUEEN :: 9*64 + 1; 541 | PS_END :: 10*64 + 1; 542 | 543 | PieceToIndex: [2][14] s32 = .[ 544 | s32.[0, 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 545 | 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 0], 546 | s32.[ 0, 0, PS_B_QUEEN, PS_B_ROOK, PS_B_BISHOP, PS_B_KNIGHT, PS_B_PAWN, 547 | 0, PS_W_QUEEN, PS_W_ROOK, PS_W_BISHOP, PS_W_KNIGHT, PS_W_PAWN, 0] 548 | ]; 549 | 550 | transform :: (chess: *Chess, nnue: [3] *NNUEdata, output: *s8, out_mask: *u32) { 551 | if !update_accumulator(chess, nnue) then 552 | refresh_accumulator(chess, nnue); 553 | accumulation: [][256] s16 = nnue[0].accumulator.accumulation; 554 | 555 | // 128 bit xmm register 556 | // 8 bit data 557 | // 128 / 8 => 16 numbers at a time. 558 | // 256 numbers total / 16 numbers per SIMD = 16 times. 559 | data := *output[0]; 560 | turn := chess.turn; 561 | for p: 0..1 { 562 | accindex := 0; 563 | while accindex < 256 { 564 | accum := *accumulation[turn][accindex]; 565 | #asm SSE { 566 | xmm0: vec; xmm1: vec; xmm2: vec; xmm3: vec; 567 | xmm4: vec; xmm5: vec; xmm6: vec; xmm7: vec; 568 | movaps.x xmm0, [accum + 0x00]; 569 | packsswb.x xmm0, [accum + 0x10]; 570 | movaps.x xmm1, [accum + 0x20]; 571 | packsswb.x xmm1, [accum + 0x30]; 572 | movaps.x xmm2, [accum + 0x40]; 573 | packsswb.x xmm2, [accum + 0x50]; 574 | movaps.x xmm3, [accum + 0x60]; 575 | packsswb.x xmm3, [accum + 0x70]; 576 | movaps.x xmm4, [accum + 0x80]; 577 | packsswb.x xmm4, [accum + 0x90]; 578 | movaps.x xmm5, [accum + 0xa0]; 579 | packsswb.x xmm5, [accum + 0xb0]; 580 | movaps.x xmm6, [accum + 0xc0]; 581 | packsswb.x xmm6, [accum + 0xd0]; 582 | movaps.x xmm7, [accum + 0xe0]; 583 | packsswb.x xmm7, [accum + 0xf0]; 584 | movups.x [data + 0x00], xmm0; 585 | movups.x [data + 0x10], xmm1; 586 | movups.x [data + 0x20], xmm2; 587 | movups.x [data + 0x30], xmm3; 588 | movups.x [data + 0x40], xmm4; 589 | movups.x [data + 0x50], xmm5; 590 | movups.x [data + 0x60], xmm6; 591 | movups.x [data + 0x70], xmm7; 592 | add data, 0x80; 593 | } 594 | accindex += 128; 595 | } 596 | 597 | turn ^= 1; 598 | } 599 | } 600 | 601 | affine_txfm :: (input: *s8, output: *void, inDims: u32, biases: *s32, weights: *s8) #expand { 602 | 603 | // GCC -O3 "optimized" output 604 | // terrible scrabbled eggs output, but faster than CPU w/o SIMD 605 | // the SSE code is a bit difficult to translate. 606 | sse_simd :: (factor: s32, tmp: *s32, weights: *s8) #expand { 607 | edx := factor; 608 | rsi := tmp; 609 | rdi := weights; 610 | #asm { 611 | movdqa.x xmm1:, [rdi]; 612 | pxor.x xmm6:, xmm6; 613 | pxor.x xmm7:, xmm7; 614 | movd xmm5:, edx; 615 | movdqa.x xmm2:, xmm6; 616 | movdqa.x xmm9:, xmm7; 617 | pshufd.x xmm0:, xmm5, 0; 618 | pcmpgtb.x xmm2, xmm1; 619 | movdqa.x xmm3:, xmm1; 620 | movdqa.x xmm5, xmm0; 621 | psrlq.x xmm5, 32; 622 | movdqa.x xmm10:, xmm7; 623 | punpcklbw.x xmm3, xmm2; 624 | punpckhbw.x xmm1, xmm2; 625 | pcmpgtw.x xmm9, xmm3; 626 | pcmpgtw.x xmm10, xmm1; 627 | movdqa.x xmm2, xmm3; 628 | punpckhwd.x xmm2, xmm9; 629 | movdqa.x xmm4:, xmm2; 630 | psrlq.x xmm2, 32; 631 | pmuludq.x xmm4, xmm0; 632 | pmuludq.x xmm2, xmm5; 633 | pshufd.x xmm4, xmm4, 8; 634 | pshufd.x xmm2, xmm2, 8; 635 | punpckldq.x xmm4, xmm2; 636 | movdqu.x xmm2, [rsi+16]; 637 | paddd.x xmm4, xmm2; 638 | movdqa.x xmm2, xmm1; 639 | punpckhwd.x xmm1, xmm10; 640 | punpcklwd.x xmm2, xmm10; 641 | movdqa.x xmm10, xmm1; 642 | movups.x [rsi+16], xmm4; 643 | movdqa.x xmm8:, xmm2; 644 | psrlq.x xmm2, 32; 645 | pmuludq.x xmm8, xmm0; 646 | pmuludq.x xmm2, xmm5; 647 | pmuludq.x xmm10, xmm0; 648 | pshufd xmm2, xmm2, 8; 649 | pshufd xmm8, xmm8, 8; 650 | punpckldq.x xmm8, xmm2; 651 | movdqu.x xmm2, [rsi+32]; 652 | paddd.x xmm8, xmm2; 653 | movdqa.x xmm2, xmm1; 654 | pshufd xmm1, xmm10, 8; 655 | psrlq xmm2, 32; 656 | movups.x [rsi+32], xmm8; 657 | movdqa.x xmm8, xmm7; 658 | pmuludq xmm2, xmm5; 659 | pshufd xmm2, xmm2, 8; 660 | punpckldq xmm1, xmm2; 661 | movdqu.x xmm2, [rsi+48]; 662 | paddd xmm2, xmm1; 663 | movdqa xmm1, xmm3; 664 | punpcklwd xmm1, xmm9; 665 | movups.x [rsi+48], xmm2; 666 | movdqa xmm3, xmm1; 667 | psrlq xmm1, 32; 668 | pmuludq xmm3, xmm0; 669 | pmuludq xmm1, xmm5; 670 | pshufd xmm3, xmm3, 8; 671 | pshufd xmm1, xmm1, 8; 672 | punpckldq xmm3, xmm1; 673 | movdqu.x xmm1, [rsi]; 674 | paddd.x xmm1, xmm3; 675 | movdqu.x xmm3, [rsi+80]; 676 | movups.x [rsi], xmm1; 677 | movdqa.x xmm1, [rdi+16]; 678 | pcmpgtb xmm6, xmm1; 679 | movdqa xmm2, xmm6; 680 | movdqa xmm6, xmm1; 681 | punpcklbw xmm6, xmm2; 682 | punpckhbw xmm1, xmm2; 683 | pcmpgtw xmm8, xmm6; 684 | pcmpgtw xmm7, xmm1; 685 | movdqa xmm2, xmm6; 686 | punpckhwd xmm2, xmm8; 687 | movdqa xmm4, xmm2; 688 | psrlq xmm2, 32; 689 | pmuludq xmm4, xmm0; 690 | pmuludq xmm2, xmm5; 691 | pshufd xmm4, xmm4, 8; 692 | pshufd xmm2, xmm2, 8; 693 | punpckldq xmm4, xmm2; 694 | movdqa xmm2, xmm1; 695 | punpckhwd xmm1, xmm7; 696 | punpcklwd xmm2, xmm7; 697 | paddd xmm4, xmm3; 698 | movdqa xmm7, xmm1; 699 | movdqa xmm3, xmm2; 700 | psrlq xmm2, 32; 701 | movups.x [rsi+80], xmm4; 702 | pmuludq xmm3, xmm0; 703 | pmuludq xmm2, xmm5; 704 | pmuludq xmm7, xmm0; 705 | pshufd xmm2, xmm2, 8; 706 | pshufd xmm3, xmm3, 8; 707 | punpckldq xmm3, xmm2; 708 | movdqu.x xmm2, [rsi+96]; 709 | paddd xmm3, xmm2; 710 | movdqa xmm2, xmm1; 711 | pshufd xmm1, xmm7, 8; 712 | psrlq xmm2, 32; 713 | movups.x [rsi+96], xmm3; 714 | pmuludq xmm2, xmm5; 715 | pshufd xmm2, xmm2, 8; 716 | punpckldq xmm1, xmm2; 717 | movdqu.x xmm2, [rsi+112]; 718 | paddd xmm2, xmm1; 719 | movdqa xmm1, xmm6; 720 | movdqu.x xmm6, [rsi+64]; 721 | punpcklwd xmm1, xmm8; 722 | movups [rsi+112], xmm2; 723 | pmuludq xmm0, xmm1; 724 | psrlq xmm1, 32; 725 | pmuludq xmm1, xmm5; 726 | pshufd xmm0, xmm0, 8; 727 | pshufd xmm1, xmm1, 8; 728 | punpckldq xmm0, xmm1; 729 | paddd xmm0, xmm6; 730 | movups [rsi+64], xmm0; 731 | } 732 | } 733 | 734 | 735 | tmp: [32] s32; 736 | memcpy(*tmp[0], *biases[0], size_of(s32) * 32); 737 | 738 | offset := 0; 739 | mask: u32 = 0; 740 | input_pointer := input; 741 | #asm SSE { 742 | pxor.x zeroes: vec, zeroes; 743 | } 744 | 745 | while offset < inDims { 746 | // input 747 | #asm SSE, SSE2 { 748 | movups.x xmm0: vec, [input_pointer]; 749 | pcmpgtb.x xmm0, zeroes; 750 | pmovmskb.x mask, xmm0; 751 | add input_pointer, 16; 752 | } 753 | 754 | while mask { 755 | idx: int; 756 | #asm SSE { 757 | bsf idx, mask; 758 | add idx, offset; 759 | } 760 | factor: s32 = input[idx]; 761 | index := idx << 5; // idx * 32. 762 | sse_simd(factor, *tmp[0], *weights[index]); 763 | mask &= mask - 1; 764 | } 765 | 766 | offset += 16; 767 | } 768 | 769 | 770 | #asm SSE2 { 771 | mov.d reg: gpr, 0x00_7f_00_7f; 772 | movd xmm_127: vec, reg; 773 | pshufd.x xmm_127, xmm_127, 0; 774 | pxor.x xmm_000: vec, xmm_000; 775 | } 776 | 777 | outVec := output; 778 | tmp_data := tmp.data; 779 | for #v2 < 0..7 { 780 | #asm SSE { 781 | movups.x xmm_relu: vec, [tmp_data]; 782 | packssdw.x xmm_relu, xmm_000; 783 | psraw.x xmm_relu, 6; 784 | pmaxsw.x xmm_relu, xmm_000; 785 | pminsw.x xmm_relu, xmm_127; 786 | packsswb.x xmm_relu, xmm_000; 787 | movups.x [outVec], xmm_relu; 788 | add tmp_data, 0x10; 789 | add outVec, 0x04; 790 | } 791 | } 792 | } 793 | 794 | affine_propagate :: (input: *s8, biases: s32, weights: *s8) -> s32 #expand { 795 | eax: s32 = 0x0001_0001; 796 | #asm SSE, SSE2, SSE3, SSE4_1 { 797 | movups.x xmm0: vec, [input]; 798 | movups.x xmm1: vec, [input + 0x10]; 799 | pmaddubsw.x xmm0, [weights]; 800 | pmaddubsw.x xmm1, [weights + 0x10]; 801 | movd ones_xmm: vec, eax; 802 | pshufd ones_xmm, ones_xmm, 0x0; 803 | pmaddwd.x xmm0, ones_xmm; 804 | pmaddwd.x xmm1, ones_xmm; 805 | paddd.x xmm0, xmm1; 806 | pshufd xmm1, xmm0, 0x1b; 807 | paddd.x xmm0, xmm1; 808 | movd eax, xmm0; 809 | pextrd val: gpr, xmm0, 1; 810 | add eax, val; 811 | add eax, biases; 812 | } 813 | 814 | return eax; 815 | } 816 | 817 | #import "Basic"; 818 | #import "File"; 819 | 820 | -------------------------------------------------------------------------------- /resources/AnonymousPro.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/AnonymousPro.ttf -------------------------------------------------------------------------------- /resources/capture.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/capture.wav -------------------------------------------------------------------------------- /resources/chess_pieces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/chess_pieces.png -------------------------------------------------------------------------------- /resources/move.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/move.wav -------------------------------------------------------------------------------- /resources/nn-04cf2b4ed1da.nnue: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/nn-04cf2b4ed1da.nnue -------------------------------------------------------------------------------- /resources/settings_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieltan1517/chess-jai/cdb560927f4a9cc02ccd91480f24f47f59609114/resources/settings_icon.png -------------------------------------------------------------------------------- /search.jai: -------------------------------------------------------------------------------- 1 | #import "Math"; 2 | #import "Basic"; 3 | #import "Thread"; 4 | 5 | lazy_smp_threads: Thread_Group; 6 | num_threads: int = 0; 7 | chess_work: [] ChessGame; 8 | 9 | free_threads :: () { 10 | if num_threads > 0 11 | shutdown(*lazy_smp_threads); 12 | } 13 | 14 | uci_search :: (chess: *ChessGame) -> bestmove: Move16 { 15 | time_begin = seconds_since_init(); 16 | nodes_searched = 0; 17 | // copy the work. add to queue. 18 | // make sure we are on max difficulty before starting threads. 19 | if difficulty == 8 { 20 | for *chess_work { 21 | copy_chessgame(it, chess); 22 | add_work(*lazy_smp_threads, it); 23 | } 24 | start(*lazy_smp_threads); 25 | } 26 | 27 | // start the lazy SMP threads! 28 | bestmove := search(chess); 29 | 30 | // make sure work is finished before we report bestmove. 31 | work_remaining := num_threads; 32 | while work_remaining > 0 { 33 | results := get_completed_work(*lazy_smp_threads); 34 | work_remaining -= results.count; 35 | reset_temporary_storage(); 36 | } 37 | 38 | return bestmove; 39 | } 40 | 41 | set_threads :: (num_wanted: int) { 42 | num_wanted -= 1; 43 | if num_threads == num_wanted then 44 | return; 45 | if num_threads > 0 { 46 | shutdown(*lazy_smp_threads); 47 | } 48 | 49 | // initialize the threads 50 | num_threads = num_wanted; 51 | init(*lazy_smp_threads, cast(s32)num_threads, search_thread, false); 52 | lazy_smp_threads.logging = false; 53 | 54 | // initialize the workers 55 | for *chess_work { 56 | free_chess_game(it); 57 | } 58 | if chess_work.count 59 | array_free(chess_work); 60 | 61 | chess_work = NewArray(num_threads, ChessGame, alignment=64); 62 | for *chess_work { 63 | it.main_thread = false; 64 | initialize_chess_game_memory(it); 65 | } 66 | } 67 | 68 | set_multi_pv :: (number: int) { 69 | multi_pv = number; 70 | } 71 | 72 | set_difficulty :: (number: int) { 73 | difficulty = number; 74 | } 75 | 76 | difficulty: int = 8; 77 | 78 | search_thread :: (group: *Thread_Group, thread: *Thread, work: *void) -> Thread_Continue_Status { 79 | chess := cast(*ChessGame) work; 80 | search(chess); 81 | return .CONTINUE; 82 | } 83 | 84 | // main search function. iterative deepening. 85 | search :: (chess: *ChessGame) -> bestmove: Move16 { 86 | 87 | print_stats :: (depth: int, line: int) #expand { 88 | if chess.main_thread == false return; 89 | 90 | builder: String_Builder; 91 | builder.allocator = temp; 92 | t_ms := cast(int)(`time_taken*1000.0); // convert secs to ms 93 | nps := cast(int)(nodes_searched/(`time_taken+0.0001)); 94 | print_to_builder(*builder, "info depth % seldepth % ", depth, chess.maxply); 95 | if score >= -INF+20 && score <= INF-20 then { 96 | print_to_builder(*builder, "nodes % time % score cp % nps % multipv % pv", nodes_searched, t_ms, score, nps, line); 97 | } else { 98 | mate := INF - abs(score); 99 | mate = (mate/2) + (mate & 1); 100 | print_to_builder(*builder, "nodes % time % score mate % nps % multipv % pv", nodes_searched, t_ms, mate, nps, line); 101 | } 102 | count := chess.history.pv_table[0].move_count - 1; 103 | for i: 0..count { 104 | move := chess.history.pv_table[0][i]; 105 | if move == Move16.Quiet break; // do not print out NULL moves. 106 | append(*builder, #char " "); 107 | append(*builder, move); 108 | } 109 | str := builder_to_string(*builder,, allocator=__temporary_allocator); 110 | print("%1%2", str, NEWLINE); 111 | } 112 | 113 | clear_history(chess); 114 | if chess.maxdepth == -1 then 115 | chess.maxdepth = S64_MAX; // set maxdepth arbitarily high to make it loop infinitely. 116 | if chess.maxnodes == -1 then 117 | chess.maxnodes = S64_MAX; // set maxnodes arbitarily high to make it loop infinitely. 118 | if chess.movetime == -1 then 119 | chess.movetime = S64_MAX; // set movetime arbitarily high to make it loop infinitely. 120 | // this is basically a fancy wrapper around negamax, negamax does all the work, this just sets everything up. 121 | score : int; 122 | alpha: int = -INF+1; 123 | beta: int = INF-1; 124 | delta := 50; 125 | cur_depth := 1; 126 | 127 | maxdepth := set_maxdepth(chess.maxdepth); 128 | maxnodes := chess.maxnodes; 129 | 130 | root_moves: MoveQueue; 131 | best_score := -INF; 132 | best_move := Move16.Quiet; 133 | 134 | num_multi_pv := get_num_multipv(chess); 135 | 136 | while outer_loop := cur_depth <= maxdepth { 137 | line := 1; 138 | root_moves.count = 0; 139 | while line <= num_multi_pv { 140 | if cur_depth <= 3 then { 141 | alpha = -INF; 142 | beta = INF; 143 | } 144 | reset_temporary_storage(); 145 | chess.ply = 0; 146 | chess.node_state = NodeState.NULL | NodeState.SSE; 147 | chess.maxply = 0; 148 | chess.depth = cur_depth; 149 | score = negamax_root(chess, *root_moves, line, cur_depth, alpha, beta, chess.fifty); 150 | time_taken := seconds_since_init() - time_begin; 151 | if stop() || nodes_searched >= maxnodes then 152 | break outer_loop; 153 | 154 | if score <= alpha then { 155 | if chess.main_thread == true 156 | print("info depth %1 lowerbound %2%3", cur_depth, score, NEWLINE); 157 | alpha = max(score-delta, -INF); 158 | delta += delta + delta / 5; 159 | continue; 160 | } 161 | 162 | if score >= beta then { 163 | if chess.main_thread == true 164 | print("info depth %1 upperbound %2%3", cur_depth, score, NEWLINE); 165 | beta = min(score+delta, INF); 166 | delta += delta + delta / 5; 167 | continue; 168 | } 169 | 170 | alpha = max(score - delta, -INF); 171 | beta = min(score + delta, INF); 172 | 173 | chess.score = cast, no_check(s16)score; 174 | 175 | if line == 1 { 176 | best_score = cast(s16) score; 177 | best_move = get_bestmove(chess); 178 | } else { 179 | if score > best_score then { 180 | best_score = cast(s16) score; 181 | best_move = get_bestmove(chess); 182 | } 183 | } 184 | 185 | print_stats(cur_depth, line); 186 | line += 1; 187 | } 188 | 189 | cur_depth += 1; 190 | } 191 | 192 | search_age += 1; 193 | return pick_move(best_move, best_score, num_multi_pv, *root_moves); 194 | 195 | } 196 | 197 | count_moves :: (chess: *Chess) -> int { 198 | moves: Moves(true); 199 | generate_moves(chess, *moves); 200 | return moves.count; 201 | } 202 | 203 | set_maxdepth :: (maxdepth: int) -> int { 204 | DEPTH_DIFFICULTY :: int.[1, 2, 3, 4, 5, 6, 9]; 205 | if difficulty == 8 then { 206 | return min(maxdepth, 1000); 207 | } else { 208 | return DEPTH_DIFFICULTY[difficulty-1]; 209 | } 210 | } 211 | 212 | pick_move :: (bestmove: Move16, topscore: s32, nummultipv: int, moves: *MoveQueue) -> Move16 { 213 | 214 | ERROR :: int.[512, 256, 175, 128, 75, 50, 20]; 215 | BLUNDERING :: u64.[50, 25, 25, 10, 10, 5, 2]; 216 | 217 | if difficulty == 8 then { 218 | // only looking for best move. 219 | return bestmove; 220 | } else { 221 | 222 | // pick randomly between the different suboptimal moves. 223 | // used to vary the computer difficulty and create 224 | // "different difficulty levels" for the 225 | // chess engine. 226 | // the greater the margin_error, the greater the chance of the computer making a 227 | // mistake. 228 | 229 | margin_error := ERROR[difficulty-1]; 230 | print("info string difficulty %1%2", difficulty, NEWLINE); 231 | 232 | if (random_u64(*move_rng) % 100) < BLUNDERING[difficulty-1] { 233 | // give the computer a random chance to blunder, increase error margin. 234 | margin_error += 256; 235 | print("info string blunder move%1", NEWLINE); 236 | } 237 | 238 | move := bestmove; 239 | i := 0; 240 | while i < moves.count { 241 | score: int = moves.array[i].priority; 242 | margin: int = cast(int)random_u64(*move_rng) % margin_error; 243 | score += margin; 244 | if score > topscore { 245 | move = moves.array[i].move; 246 | } 247 | i += 1; 248 | } 249 | 250 | return move; 251 | } 252 | } 253 | 254 | move_rng: PRNG = ---; 255 | 256 | get_num_multipv :: (chess: *Chess) -> int { 257 | 258 | LINES :: int.[8, 5, 4, 3, 3, 3, 2, 1]; 259 | num_moves := count_moves(chess); 260 | num_multi_pv := multi_pv; 261 | if difficulty != 8 { 262 | num_multi_pv = max(LINES[difficulty-1], num_multi_pv); 263 | } 264 | 265 | num_multi_pv = min(num_moves, num_multi_pv); 266 | return num_multi_pv; 267 | } 268 | 269 | initialize_move_randomness :: () { 270 | now := current_time_consensus(); 271 | move_rng.seed = cast, no_check(u64)now.low ^ cast, no_check(u64)now.high; 272 | } 273 | 274 | negamax_root :: (chess: *ChessGame, root_moves: *MoveQueue, multipv: int, depth: int, alpha: int, beta: int, fifty: int) -> int { 275 | 276 | root_multipv :: (move: Move16, moves: *MoveQueue) -> bool { 277 | i := 0; 278 | while i> 4); 350 | } 351 | eval = -negamax_zw(chess, depth - R - 1, -alpha-1, -alpha, next_fifty); 352 | } 353 | 354 | // search the move again that has failed to be proved to be bad with normal alpha beta score bounds 355 | if eval > alpha then { 356 | eval = -negamax_pv(chess, depth-1, -beta, -alpha, next_fifty); 357 | } 358 | } 359 | 360 | unmake_move(chess, move, cap, castling); 361 | chess.en_passant = ep; 362 | if eval > best_score then { 363 | best_score = eval; 364 | if best_score > alpha { 365 | using chess.history; 366 | hash_flag = TFLAGS.EXACT; 367 | bmove = move; 368 | pv_table[ply][0] = move; 369 | move_count := pv_table[ply+1].move_count; 370 | memcpy(*pv_table[ply][1], *pv_table[ply+1][0], move_count * size_of(Move16)); 371 | pv_table[ply].move_count = move_count + 1; 372 | alpha = best_score; 373 | 374 | if best_score >= beta then { 375 | // non-captures == Quiet|Pawn Push Moves 376 | if flags <= Move16.Double_Pawn_Push then { 377 | update_quiet(chess, move, depth, quiet_moves, num_quiets, cap_moves, num_captures); 378 | } 379 | tt_store(chess.hash, best_score, TFLAGS.BETA, depth, move); 380 | return best_score; 381 | } 382 | } 383 | } 384 | moves_searched += 1; 385 | } 386 | 387 | if move_count == 0 then 388 | return ifx in_check(chess) then -INF+ply else 0+ply; 389 | 390 | if hash_flag == TFLAGS.EXACT && get_move16_flag(bmove) > Move16.Double_Pawn_Push then 391 | update_tactics(chess, bmove, depth, cap_moves, num_captures, quiet_moves, num_quiets); 392 | 393 | tt_store(chess.hash, best_score, hash_flag, depth, bmove); 394 | append(root_moves, cast(s16)best_score, bmove); 395 | return best_score; 396 | } 397 | 398 | negamax_pv :: (chess: *ChessGame, depth: int, alpha: int, beta: int, fifty: int) -> int { 399 | ply := chess.ply; 400 | if fifty >= 100 then 401 | return 0+ply; 402 | 403 | // mate distance pruning 404 | alpha = max(alpha,-INF+ply); 405 | beta = min(beta, INF-ply); 406 | if alpha >= beta then { 407 | return alpha; 408 | } 409 | 410 | if is_draw(chess, fifty) then 411 | return 0+ply; 412 | 413 | if depth <= 0 then 414 | return quiescene(chess, alpha, beta, fifty); 415 | 416 | // cannot trust that tt_entry will remain the same. 417 | // it can be overwritten by accident due to hash collisions 418 | // esp. during multi-threading. need to copy out the ttentry. 419 | 420 | // tt probe. 421 | tthit, ttentry := tt_probe_negamax(chess, ply, chess.hash, alpha, beta, depth); 422 | ttmove := chess.history.pv_table[ply][0]; 423 | hash_move := ifx tthit then ttentry.ttmove else Move16.Quiet; 424 | 425 | defer { 426 | using chess.history; 427 | memset(*pv_table[ply+1], 0, size_of(PV_Line)); 428 | killer_moves[ply+1][1] = 0; 429 | killer_moves[ply+1][0] = 0; 430 | } 431 | 432 | // internal iterative deepening. 433 | if depth > 8 && ttmove == 0 && hash_move == 0 then 434 | depth -= 1; 435 | 436 | // NOTE: we make eval really high, 437 | // so we will look at checks much more 438 | // carefully, when improving=1, more branches to look at. 439 | eval := INF; 440 | if tthit == true then 441 | eval = ttentry.score; 442 | else if !in_check(chess) then 443 | eval = cast(s16) evaluate(chess, fifty); 444 | 445 | array_add(*chess.eval, eval); 446 | defer pop(*chess.eval); 447 | 448 | ep := chess.en_passant; 449 | hash_flag := TFLAGS.ALPHA; 450 | cap_moves: [32] Move16; 451 | quiet_moves: [64] Move16; 452 | num_captures := 0; 453 | num_quiets := 0; 454 | moves_searched := 0; 455 | 456 | improving := get_improving(chess); 457 | late_move_prune := (3 + (depth*depth)) / (2-improving); 458 | 459 | tt_capture: bool = false; 460 | tt_quiet: bool = false; 461 | prev_to :: (chess: ChessGame) -> int { 462 | if chess.ply >= 1 { 463 | count := chess.moves.count; 464 | prev_move1 := chess.moves[count-1].mov16; 465 | to := get_move16_to(prev_move1); 466 | return to; 467 | } 468 | return -1; 469 | } 470 | 471 | move_count := 0; 472 | bmove := Move16.Quiet; 473 | pto := prev_to(chess); 474 | 475 | best_score: int = -INF; 476 | score_margin := -80_000 * depth; 477 | for :pv move, move_score : chess { 478 | move_count += 1; 479 | if (chess.node_state & NodeState.SSE) == 0 && ply == chess.exply && move == chess.excluded_move then { 480 | continue; 481 | } 482 | 483 | flags, from, to := decode_move16(move); 484 | pfrom := piece_at(chess, from); 485 | att := piece_at(chess, from); 486 | vic := piece_at(chess, to); 487 | 488 | // piece 489 | if chess.depth>6 && moves_searched>0 { 490 | // Late Move Pruning. Search the Root Plies Exhuastively 491 | if flags <= Move16.Double_Pawn_Push && (moves_searched > late_move_prune) then 492 | break; 493 | if flags <= Move16.Double_Pawn_Push && move_score < score_margin then 494 | break; 495 | if depth < 6 && flags == Move16.Capture && PVALUE[att] > PVALUE[vic] && see(chess,move, -250*depth) then 496 | continue; 497 | else if depth < 6 && flags <= Move16.Double_Pawn_Push && see(chess,move, -90*depth*depth) then 498 | continue; 499 | } 500 | 501 | ext := 0; 502 | if chess.node_state & NodeState.SSE && depth>=7 && ply < chess.depth && move == hash_move && tthit && cast(int)ttentry.depth >= depth-3 503 | && ttentry.flag == TFLAGS.ALPHA && abs(alpha) < (INF-50) { 504 | if to == pto && flags >= Move16.Capture { 505 | // recapture extension. 506 | ext = 1; 507 | tt_capture = true; 508 | tt_quiet = false; 509 | } else { 510 | // singular extension: 511 | // if one move is better than all the rest, then we consider this singular 512 | // "singular" is determined by checking all other moves at a shallow depth on a nullwindow 513 | singular_beta := ttentry.score - depth; 514 | chess.excluded_move = move; 515 | chess.exply = ply; 516 | chess.node_state ^= NodeState.SSE; 517 | score := negamax_zw(chess, depth/2-1, singular_beta-1, singular_beta, fifty); 518 | chess.node_state ^= NodeState.SSE; 519 | chess.excluded_move = Move16.Quiet; 520 | chess.exply = INF; 521 | if score < singular_beta then { 522 | ext = 1; 523 | tt_capture = flags > Move16.Double_Pawn_Push; 524 | tt_quiet = flags <= Move16.Double_Pawn_Push; 525 | } else if singular_beta >= beta then { 526 | return singular_beta; 527 | } else if ttentry.score >= beta then { 528 | ext = -2; 529 | } else if ttentry.score <= alpha && ttentry.score <= score { 530 | ext = -1; 531 | } 532 | } 533 | } 534 | 535 | if flags <= Move16.Double_Pawn_Push { 536 | // add quiet move. 537 | quiet_moves[num_quiets] = move; 538 | num_quiets += 1; 539 | } else { 540 | // add capture move. 541 | cap_moves[num_captures] = move; 542 | num_captures += 1; 543 | } 544 | 545 | // make move. 546 | next_fifty := fifty_move(chess, move, fifty); 547 | cap, castling := make_move(chess, move); 548 | eval := 0; 549 | if ext==1 || moves_searched == 0 { 550 | eval = -negamax_pv(chess, depth+ext-1, -beta, -alpha, next_fifty); 551 | } else { 552 | if in_check(chess) { 553 | // do not reduce depth on check. 554 | eval = -negamax_zw(chess, depth, -alpha-1, -alpha, next_fifty); 555 | } else if flags > Move16.Double_Pawn_Push { 556 | // captures. promotions. 557 | R := 1 - 4 * move_score / (abs(move_score) + 24576); 558 | R = max(0, R); 559 | eval = -negamax_zw(chess, depth+ext-R-1, -alpha-1, -alpha, next_fifty); 560 | } else if num_quiets > 3 && flags <= Move16.Double_Pawn_Push { 561 | // Late Move Reduction when not in check and not giving check. 562 | R := LateMoveReduction[min(depth,63)][min(moves_searched,63)]; 563 | R -= improving; 564 | if chess.probcut then { 565 | R += 2; 566 | } 567 | 568 | R -= 1 + 15 / (3 + depth); 569 | 570 | // increase reduction if capture. 571 | if tt_capture then { 572 | R += 1; 573 | } 574 | 575 | // decrease reduction if quiet. 576 | if tt_quiet then { 577 | R -= 2; 578 | } 579 | 580 | R -= move_score / 40_000; 581 | R = max(0, R); 582 | eval = -negamax_zw(chess, depth+ext-R-1, -alpha-1, -alpha, next_fifty); 583 | } else { 584 | // do normal search. 585 | eval = -negamax_zw(chess, depth+ext-1, -alpha-1, -alpha, next_fifty); 586 | } 587 | 588 | // search the move again that has failed to be proved to be bad with normal alpha beta score bounds 589 | if eval > alpha then 590 | eval = -negamax_pv(chess, depth+ext-1, -beta, -alpha, next_fifty); 591 | } 592 | 593 | unmake_move(chess, move, cap, castling); 594 | chess.en_passant = ep; 595 | if eval > best_score then { 596 | best_score = eval; 597 | bmove = move; 598 | if best_score > alpha { 599 | using chess.history; 600 | hash_flag = TFLAGS.EXACT; 601 | pv_table[ply][0] = move; 602 | move_count := pv_table[ply+1].move_count; 603 | memcpy(*pv_table[ply][1], *pv_table[ply+1][0], move_count * size_of(Move16)); 604 | pv_table[ply].move_count = move_count + 1; 605 | alpha = eval; 606 | 607 | if best_score >= beta then { 608 | // non-captures == Quiet|Pawn Push Moves 609 | if flags <= Move16.Double_Pawn_Push 610 | update_quiet(chess, move, depth, quiet_moves, num_quiets, cap_moves, num_captures); 611 | tt_store(chess.hash, best_score, TFLAGS.BETA, depth, move); 612 | return best_score; 613 | } 614 | } 615 | } 616 | moves_searched += 1; 617 | } 618 | 619 | if move_count == 0 then 620 | return ifx in_check(chess) then -INF+ply else 0+ply; 621 | 622 | if hash_flag == TFLAGS.EXACT && get_move16_flag(bmove) > Move16.Double_Pawn_Push then 623 | update_tactics(chess, bmove, depth, cap_moves, num_captures, quiet_moves, num_quiets); 624 | 625 | tt_store(chess.hash, best_score, hash_flag, depth, bmove); 626 | return best_score; 627 | 628 | } 629 | 630 | update_tactics :: (chess: *ChessGame, move: Move16, depth: int, cap_moves: []Move16, num_captures: int, quiet_moves: []Move16, num_quiets: int) { 631 | ply := chess.ply; 632 | incr : s32 = min(cast(s32)(depth*depth), 128); 633 | prev1, to1 := get_prev_move(chess, 1); 634 | prev2, to2 := get_prev_move(chess, 2); 635 | prev4, to4 := get_prev_move(chess, 4); 636 | 637 | using chess.history; 638 | for i: 0..num_captures-1 { 639 | prev_move := cap_moves[i]; 640 | if prev_move != move { 641 | from := get_move16_from(prev_move); 642 | p := piece_at(chess, from); 643 | to := get_move16_to(prev_move); 644 | opp := norm(piece_at(chess, to)); 645 | incr_history(*cap_history[p][to][opp], -incr); 646 | } 647 | } 648 | 649 | // decrement all quiet moves which < alpha. 650 | for i: 0..num_quiets-1 { 651 | quiet_move := quiet_moves[i]; 652 | from := get_move16_from(quiet_move); 653 | p := piece_at(chess, from); 654 | to := get_move16_to(quiet_move); 655 | incr_history(*history_moves[p][to], -incr); 656 | incr_history(*counter_history[prev1][to1][p][to], -incr); 657 | incr_history(*counter_history[prev2][to2][p][to], -incr); 658 | incr_history(*counter_history[prev4][to4][p][to], -incr); 659 | } 660 | 661 | from := get_move16_from(move); 662 | p := piece_at(chess, from); 663 | to := get_move16_to(move); 664 | opp := norm(piece_at(chess, to)); 665 | incr_history(*cap_history[p][to][opp], incr); 666 | } 667 | 668 | pv :: (chess: *ChessGame, body: Code, f: For_Flags) #expand { 669 | // main principle variation search move picker. 670 | stage: enum {PV; CAPTURES; KILLERS; QUIETS; END; } = .PV; 671 | move_queue: MoveQueue; 672 | unsafe: u64 = 0; 673 | check: u64 = 0; 674 | turn := chess.turn; 675 | while outer_loop := !stop() { 676 | while move_queue.count <= 0 { 677 | if stage == { 678 | case .PV; 679 | stage = .CAPTURES; 680 | if is_legal(chess, `hash_move) then { 681 | append(*move_queue, 0x7FFF, `hash_move); 682 | } else if is_legal(chess, `ttmove) { 683 | append(*move_queue, 0x7FFF, `ttmove); 684 | } 685 | case .CAPTURES; 686 | stage = .KILLERS; 687 | unsafe, check = generate_attacks(chess); 688 | generate_tactics(unsafe, check, chess, *move_queue); 689 | filter_capture_pv(*move_queue, `ttmove, `hash_move); 690 | sort_captures(*move_queue, chess); 691 | case .KILLERS; 692 | using chess.history; 693 | stage = .QUIETS; 694 | move := killer_moves[`ply][0]; 695 | if is_legal(chess, move) { 696 | append(*move_queue, 0x7FFF, move); 697 | } 698 | 699 | move = killer_moves[`ply][1]; 700 | if is_legal(chess, move) { 701 | append(*move_queue, 0x7FFF-1, move); 702 | } 703 | case .QUIETS; 704 | using chess.history; 705 | stage = .END; 706 | generate_quiets(unsafe, check, chess, *move_queue); 707 | kill1 := killer_moves[`ply][0]; 708 | kill2 := killer_moves[`ply][1]; 709 | filter_quiet_pv(*move_queue, `ttmove, `hash_move, kill1, kill2); 710 | sort_quiets(*move_queue, chess); 711 | case .END; 712 | break outer_loop; // terminate loop 713 | } 714 | } 715 | `it, priority := pop(*move_queue); 716 | `it_index := cast(int)priority; 717 | #insert body; 718 | } 719 | } 720 | 721 | pvcut :: (chess: *ChessGame, body: Code, f: For_Flags) #expand { 722 | // main principle variation search move picker. 723 | stage: enum {PV; CAPTURES; KILLERS; QUIETS; END; } = .PV; 724 | move_queue: MoveQueue; 725 | unsafe: u64 = 0; 726 | check: u64 = 0; 727 | turn := chess.turn; 728 | while outer_loop := !stop() { 729 | while move_queue.count <= 0 { 730 | if stage == { 731 | case .PV; 732 | stage = .CAPTURES; 733 | if is_legal(chess, `hash_move) then { 734 | append(*move_queue, 0x7FFF, `hash_move); 735 | } else if is_legal(chess, `ttmove) { 736 | append(*move_queue, 0x7FFF, `ttmove); 737 | } 738 | case .CAPTURES; 739 | stage = .END; 740 | unsafe, check = generate_attacks(chess); 741 | generate_tactics(unsafe, check, chess, *move_queue); 742 | filter_capture_pv(*move_queue, `ttmove, `hash_move); 743 | sort_captures(*move_queue, chess); 744 | case .END; 745 | break outer_loop; // terminate loop 746 | } 747 | } 748 | `it, priority := pop(*move_queue); 749 | `it_index := cast(int)priority; 750 | #insert body; 751 | } 752 | } 753 | 754 | filter_capture_pv :: (move_queue: *MoveQueue, pv: Move16, hash: Move16) { 755 | i := 0; 756 | while i < move_queue.count { 757 | move := move_queue.array[i].move; 758 | if move == pv || move == hash then { 759 | last := move_queue.count - 1; 760 | move_queue.array[i] = move_queue.array[last]; 761 | move_queue.count -= 1; 762 | } else { 763 | i += 1; 764 | } 765 | } 766 | } 767 | 768 | filter_quiet_pv :: (move_queue: *MoveQueue, pv: Move16, hash: Move16, kill1: Move16, kill2: Move16) { 769 | i := 0; 770 | while i < move_queue.count { 771 | move := move_queue.array[i].move; 772 | if move == pv || move == hash || move == kill1 || move == kill2 then { 773 | last := move_queue.count - 1; 774 | move_queue.array[i] = move_queue.array[last]; 775 | move_queue.count -= 1; 776 | } else { 777 | i += 1; 778 | } 779 | } 780 | } 781 | 782 | sort_captures :: (move_queue: *MoveQueue, chess: *ChessGame) { 783 | 784 | score_capture :: (flag: Move16, from: int, to: int, chess: *ChessGame) -> s32 { 785 | using chess.history; 786 | if flag == { 787 | case .Capture; 788 | vic := norm(piece_at(chess, to)); 789 | att := piece_at(chess, from); 790 | return mvv_lva(vic, att) + cap_history[att][to][vic]; 791 | case .Ep_Capture; 792 | att := piece_at(chess, from); 793 | return mvv_lva(.W_PAWN, .W_PAWN) + cap_history[att][to][0]; 794 | case .Knight_Promotion; 795 | att := piece_at(chess, from); 796 | return cap_history[att][to][0]; 797 | case .Bishop_Promotion; 798 | att := piece_at(chess, from); 799 | return cap_history[att][to][0]; 800 | case .Rook_Promotion; 801 | att := piece_at(chess, from); 802 | return cap_history[att][to][0]; 803 | case .Queen_Promotion; 804 | att := piece_at(chess, from); 805 | return 20000 + cap_history[att][to][0]; 806 | case .Knight_Promotion_Capture; 807 | vic := norm(piece_at(chess, to)); 808 | att := piece_at(chess, from); 809 | return cap_history[att][to][vic]; 810 | case .Bishop_Promotion_Capture; 811 | vic := norm(piece_at(chess, to)); 812 | att := piece_at(chess, from); 813 | return cap_history[att][to][vic]; 814 | case .Rook_Promotion_Capture; 815 | vic := norm(piece_at(chess, to)); 816 | att := piece_at(chess, from); 817 | return cap_history[att][to][vic]; 818 | case .Queen_Promotion_Capture; 819 | vic := norm(piece_at(chess, to)); 820 | att := piece_at(chess, from); 821 | return cap_history[att][to][vic]; 822 | case .King_Castle; 823 | att := piece_at(chess, from); 824 | return cap_history[att][to][0]; 825 | case .Queen_Castle; 826 | att := piece_at(chess, from); 827 | return cap_history[att][to][0]; 828 | } 829 | 830 | assert(false); 831 | return 0; 832 | } 833 | 834 | for i: 0..move_queue.count-1 { 835 | move := move_queue.array[i].move; 836 | flags, from, to := decode_move16(move); 837 | priority := score_capture(flags, from, to, chess); 838 | move_queue.array[i].priority = priority; 839 | } 840 | construct_heap(move_queue); 841 | } 842 | 843 | sort_quiets :: (move_queue: *MoveQueue, chess: *ChessGame) { 844 | 845 | score_quiets :: (flag: Move16, from: int, to: int, chess: *ChessGame, counter: [][64] s32, follow1: [][64] s32, follow2: [][64] s32) -> s32 { 846 | piece := piece_at(chess, from); 847 | return chess.history.history_moves[piece][to] + (counter[piece][to] * 2) + follow1[piece][to] + (follow2[piece][to] / 2); 848 | } 849 | 850 | 851 | counter: [][64] s32 = get_countermove_history(chess, 1); 852 | follow1: [][64] s32 = get_countermove_history(chess, 2); 853 | follow2: [][64] s32 = get_countermove_history(chess, 4); 854 | 855 | for i: 0..move_queue.count-1 { 856 | move := move_queue.array[i].move; 857 | flags, from, to := decode_move16(move); 858 | priority := score_quiets(flags, from, to, chess, counter, follow1, follow2); 859 | move_queue.array[i].priority = priority; 860 | } 861 | construct_heap(move_queue); 862 | 863 | } 864 | 865 | negamax_zw :: (chess: *ChessGame, depth: int, alpha: int, beta: int, fifty: int) -> int { 866 | read_input(chess.main_thread, nodes_searched, chess.maxnodes, time_begin, chess.movetime); 867 | ply := chess.ply; 868 | if ply && fifty >= 100 then 869 | return 0+ply; 870 | 871 | if is_draw(chess, fifty) then 872 | return 0+ply; 873 | 874 | // mate distance pruning 875 | alpha = max(alpha,-INF+ply); 876 | beta = min(beta, INF-ply); 877 | if alpha >= beta then { 878 | return alpha; 879 | } 880 | 881 | // cannot trust that tt_entry will remain the same. 882 | // it can be overwritten by accident due to hash collisions 883 | // esp. during multi-threading. need to copy out the ttentry. 884 | 885 | // tt probe. 886 | tthit, ttentry := tt_probe_negamax(chess, ply, chess.hash, alpha, beta, depth); 887 | tt_prune(tthit, ttentry, ply, false, alpha, beta, depth); 888 | 889 | if depth <= 0 then 890 | return quiescene(chess, alpha, beta, fifty); 891 | 892 | ttmove := chess.history.pv_table[ply][0]; 893 | hash_move := ifx tthit then ttentry.ttmove else Move16.Quiet; 894 | 895 | defer { 896 | using chess.history; 897 | memset(*pv_table[ply+1], 0, size_of(PV_Line)); 898 | killer_moves[ply+1][1] = 0; 899 | killer_moves[ply+1][0] = 0; 900 | } 901 | 902 | // NOTE: we make eval really high, 903 | // so we will look at checks much more 904 | // carefully, when improving=1, more branches to look at. 905 | array_add(*chess.eval, INF); 906 | defer pop(*chess.eval); 907 | if !in_check(chess) { 908 | eval := ifx tthit then ttentry.score else evaluate(chess, fifty); 909 | chess.eval[chess.eval.count-1] = cast(s16) eval; 910 | improving := get_improving(chess); 911 | 912 | // reverse futility pruning. stockfish has margin at 214*depth 913 | margin := 125 *(depth-improving); 914 | if depth < 11 && (eval-margin) >= beta then { 915 | return eval; 916 | } 917 | 918 | // razoring. 919 | if depth <= 5 && eval + (200*depth) <= alpha { 920 | score := quiescene(chess, alpha, beta, fifty); 921 | if score <= alpha then { 922 | return score; 923 | } 924 | } 925 | 926 | // null move pruning w/ verification at higher depths. 927 | if depth > 3 && eval >= beta then { 928 | R := 4 + depth/3 + min((eval-beta) / 256, 3); 929 | ep := make_null_move(chess); 930 | score := -negamax_zw(chess, depth-R-1, -beta, -beta + 1, 0); 931 | unmake_null_move(chess, ep); 932 | 933 | // verification at higher depths. 934 | if depth>12 || zugzwang(chess) then { 935 | score = negamax_zw(chess, depth-R-1, beta-1, beta, 0); 936 | } 937 | 938 | if score >= beta then { 939 | return score; 940 | } 941 | } 942 | 943 | // prob cut. 944 | prob_beta := beta + 125 - 46 * improving; 945 | if depth > 5 && abs(beta) < 20_000 && !(tthit && ttentry.depth >= depth-3 && ttentry.score != 0 && ttentry.score < prob_beta) then { 946 | chess.probcut += 1; 947 | defer chess.probcut -= 1; 948 | ep := chess.en_passant; 949 | for :pvcut move, move_score: chess { 950 | flags, from, to := decode_move16(move); 951 | pfrom := piece_at(chess, from); 952 | 953 | // make move. 954 | cap, castling := make_move(chess, move); 955 | score := -quiescene(chess, -prob_beta, -prob_beta + 1, 0); 956 | if score >= prob_beta then 957 | score = -negamax_zw(chess, depth-4, -prob_beta, -prob_beta + 1, 0); 958 | 959 | // unmake move. 960 | unmake_move(chess, move, cap, castling); 961 | chess.en_passant = ep; 962 | 963 | if score >= prob_beta then { 964 | tt_store(chess.hash, score, TFLAGS.BETA, depth-4, move); 965 | return score; 966 | } 967 | } 968 | } 969 | } 970 | 971 | ep := chess.en_passant; 972 | cap_moves: [32] Move16; 973 | quiet_moves: [64] Move16; 974 | num_captures := 0; 975 | num_quiets := 0; 976 | moves_searched := 0; 977 | prev_to :: (chess: ChessGame) -> int { 978 | if chess.ply >= 1 { 979 | count := chess.moves.count; 980 | prev_move1 := chess.moves[count-1].mov16; 981 | to := get_move16_to(prev_move1); 982 | return to; 983 | } 984 | return -1; 985 | } 986 | 987 | 988 | 989 | improving := get_improving(chess); 990 | late_move_prune := (3 + (depth*depth)) / (2-improving); 991 | tt_capture: bool = false; 992 | tt_quiet: bool = false; 993 | move_count := 0; 994 | bmove := Move16.Quiet; 995 | pto := prev_to(chess); 996 | best_score: int = -INF; 997 | score_margin := -80_000 * depth; 998 | for :pv move, move_score : chess { 999 | move_count += 1; 1000 | if (chess.node_state & NodeState.SSE) == 0 && ply == chess.exply && move == chess.excluded_move then { 1001 | continue; 1002 | } 1003 | 1004 | flags, from, to := decode_move16(move); 1005 | pfrom := piece_at(chess, from); 1006 | att := piece_at(chess, from); 1007 | vic := piece_at(chess, to); 1008 | 1009 | // piece 1010 | if chess.depth>6 && moves_searched>0 { 1011 | // Late Move Pruning. Search the Root Plies Exhuastively 1012 | if flags <= Move16.Double_Pawn_Push && (moves_searched > late_move_prune) then 1013 | break; 1014 | if flags <= Move16.Double_Pawn_Push && move_score < score_margin then 1015 | break; 1016 | if depth < 6 && flags == Move16.Capture && PVALUE[att] > PVALUE[vic] && see(chess, move, -250*depth) then 1017 | continue; 1018 | else if depth < 6 && flags <= Move16.Double_Pawn_Push && see(chess, move, -90*depth*depth) then 1019 | continue; 1020 | } 1021 | 1022 | ext := 0; 1023 | if chess.node_state & NodeState.SSE && depth>=7 && ply < chess.depth && move == hash_move && tthit && cast(int)ttentry.depth >= depth-3 1024 | && ttentry.flag == TFLAGS.ALPHA { 1025 | if to == pto && flags >= Move16.Capture { 1026 | // recapture extension. 1027 | ext = 1; 1028 | tt_capture = true; 1029 | tt_quiet = false; 1030 | } else { 1031 | // singular extension: 1032 | // if one move is better than all the rest, then we consider this singular 1033 | // singular is determined by checking all other moves at a shallow depth on a nullwindow 1034 | singular_beta := ttentry.score - depth; 1035 | chess.excluded_move = move; 1036 | chess.exply = ply; 1037 | chess.node_state ^= NodeState.SSE; 1038 | score := negamax_zw(chess, depth/2-1, singular_beta-1, singular_beta, fifty); 1039 | chess.node_state ^= NodeState.SSE; 1040 | chess.excluded_move = Move16.Quiet; 1041 | chess.exply = INF; 1042 | if score < singular_beta then { 1043 | ext = 1; 1044 | tt_capture = flags > Move16.Double_Pawn_Push; 1045 | tt_quiet = flags <= Move16.Double_Pawn_Push; 1046 | } else if singular_beta >= beta then { 1047 | return singular_beta; 1048 | } else if ttentry.score >= beta then { 1049 | ext = -2; 1050 | } else if ttentry.score <= alpha && ttentry.score <= score { 1051 | ext = -1; 1052 | } 1053 | } 1054 | } 1055 | 1056 | if flags <= Move16.Double_Pawn_Push { 1057 | // add quiet move. 1058 | quiet_moves[num_quiets] = move; 1059 | num_quiets += 1; 1060 | } else { 1061 | // add capture move. 1062 | cap_moves[num_captures] = move; 1063 | num_captures += 1; 1064 | } 1065 | 1066 | // make move. 1067 | next_fifty := fifty_move(chess, move, fifty); 1068 | cap, castling := make_move(chess, move); 1069 | eval := 0; 1070 | if ext==1 || moves_searched == 0 { 1071 | eval = -negamax_zw(chess, depth+ext-1, -beta, -alpha, next_fifty); 1072 | } else { 1073 | if in_check(chess) { 1074 | // do not reduce depth on check. 1075 | eval = -negamax_zw(chess, depth, -alpha-1, -alpha, next_fifty); 1076 | } else if flags > Move16.Double_Pawn_Push { 1077 | // captures. promotions. 1078 | R := 1 - 4 * move_score / (abs(move_score) + 24576); 1079 | R = max(0, R); 1080 | eval = -negamax_zw(chess, depth+ext-R-1, -alpha-1, -alpha, next_fifty); 1081 | } else if ply > 0 && num_quiets > 3 && flags <= Move16.Double_Pawn_Push { 1082 | // Late Move Reduction when not in check and not giving check. 1083 | R := LateMoveReduction[min(depth,63)][min(moves_searched,63)]; 1084 | R -= improving; 1085 | if chess.probcut then { 1086 | R += 2; 1087 | } 1088 | 1089 | // increase reduction if capture. 1090 | if tt_capture then { 1091 | R += 1; 1092 | } 1093 | 1094 | // decrease reduction if quiet. 1095 | if tt_quiet then { 1096 | R -= 2; 1097 | } 1098 | 1099 | R -= move_score / 40_000; 1100 | R = max(0, R); 1101 | eval = -negamax_zw(chess, depth+ext-R-1, -alpha-1, -alpha, next_fifty); 1102 | } else { 1103 | // do normal search. 1104 | eval = -negamax_zw(chess, depth+ext-1, -alpha-1, -alpha, next_fifty); 1105 | } 1106 | } 1107 | 1108 | unmake_move(chess, move, cap, castling); 1109 | chess.en_passant = ep; 1110 | if eval >= best_score { 1111 | best_score = eval; 1112 | bmove = move; 1113 | if best_score >= beta then { 1114 | using chess.history; 1115 | pv_table[ply][0] = move; 1116 | move_count := pv_table[ply+1].move_count; 1117 | memcpy(*pv_table[ply][1], *pv_table[ply+1][0], move_count * size_of(Move16)); 1118 | pv_table[ply].move_count = move_count + 1; 1119 | 1120 | // non-captures == Quiet|Pawn Push Moves 1121 | if flags <= Move16.Double_Pawn_Push 1122 | update_quiet(chess, move, depth, quiet_moves, num_quiets, cap_moves, num_captures); 1123 | tt_store(chess.hash, best_score, TFLAGS.BETA, depth, move); 1124 | return best_score; 1125 | } 1126 | } 1127 | 1128 | moves_searched += 1; 1129 | } 1130 | 1131 | if move_count == 0 then 1132 | return ifx in_check(chess) then -INF+ply else 0+ply; 1133 | tt_store(chess.hash, best_score, .ALPHA, depth, bmove); 1134 | return best_score; 1135 | } 1136 | 1137 | update_quiet :: (chess: *ChessGame, move: Move16, depth: int, quiet_moves: []Move16, num_quiets: int, cap_moves: []Move16, num_captures: int) { 1138 | ply := chess.ply; 1139 | incr : s32 = min(cast(s32) (depth*depth), 512); 1140 | prev1, to1 := get_prev_move(chess, 1); 1141 | prev2, to2 := get_prev_move(chess, 2); 1142 | prev4, to4 := get_prev_move(chess, 4); 1143 | 1144 | using chess.history; 1145 | // decrement all quiet moves which < alpha. 1146 | for i: 0..num_quiets-1 { 1147 | prev_move := quiet_moves[i]; 1148 | if prev_move != move { 1149 | from := get_move16_from(prev_move); 1150 | p := piece_at(chess, from); 1151 | to := get_move16_to(prev_move); 1152 | incr_history(*history_moves[p][to], -incr); 1153 | incr_history(*counter_history[prev1][to1][p][to], -incr*2); 1154 | incr_history(*counter_history[prev2][to2][p][to], -incr); 1155 | incr_history(*counter_history[prev4][to4][p][to], -incr/2); 1156 | } 1157 | } 1158 | 1159 | for i: 0..num_captures-1 { 1160 | prev_move := cap_moves[i]; 1161 | from := get_move16_from(prev_move); 1162 | p := piece_at(chess, from); 1163 | to := get_move16_to(prev_move); 1164 | opp := norm(piece_at(chess, to)); 1165 | incr_history(*cap_history[p][to][opp], -incr); 1166 | } 1167 | 1168 | // update history heuristic 1169 | p := piece_at(chess, get_move16_from(move)); 1170 | to := get_move16_to(move); 1171 | incr_history(*history_moves[p][to], incr); 1172 | incr_history(*counter_history[prev1][to1][p][to], incr*2); 1173 | incr_history(*counter_history[prev2][to2][p][to], incr); 1174 | incr_history(*counter_history[prev4][to4][p][to], incr/2); 1175 | 1176 | if move != killer_moves[ply][0] then { 1177 | killer_moves[ply][1] = killer_moves[ply][0]; 1178 | killer_moves[ply][0] = move; 1179 | } 1180 | } 1181 | 1182 | incr_history :: (data: *s32, incr: s32) #expand { 1183 | < Piece #expand { 1187 | if p >= Piece.B_KING 1188 | p -= 7; 1189 | return p; 1190 | } 1191 | 1192 | tt_probe_negamax :: (chess: ChessGame, ply: int, hash: u64, alpha: int, beta: int, depth: int) -> bool, TTData { 1193 | tthit: bool = false; 1194 | ttentry: TTData; 1195 | 1196 | if (chess.node_state & NodeState.SSE) == 0 { 1197 | if ply != chess.exply then { 1198 | tthit, ttentry = tt_probe(chess.hash, alpha, beta, depth); 1199 | } 1200 | } else { 1201 | tthit, ttentry = tt_probe(chess.hash, alpha, beta, depth); 1202 | } 1203 | 1204 | return tthit, ttentry; 1205 | } 1206 | 1207 | tt_probe :: (hash: u64, alpha: int, beta: int, depth: int) -> bool, TTData { 1208 | key := hash % cast,no_check (u64) ttable.count; 1209 | prefetch(*ttable[key], Prefetch_Hint.T0); 1210 | entry := ttable[key]; 1211 | if (entry.hash ^ (entry.padding & ~0xFF)) == hash then { 1212 | // change the tt age. 1213 | entry.age = search_age; 1214 | memcpy(*ttable[key], *entry, size_of(TTEntry)); 1215 | return true, entry.data; 1216 | } 1217 | return false, entry.data; 1218 | } 1219 | 1220 | tt_prune :: (tthit: bool, ttentry: TTData, ply: int, pv_node: bool, alpha: int, beta: int, depth: int) #expand { 1221 | ttscore :: (score: int) -> int #expand { 1222 | if score > INF-20 then 1223 | return INF-ply; 1224 | else if score < -INF+20 then 1225 | return -INF+ply; 1226 | return score; 1227 | } 1228 | 1229 | if tthit && !pv_node && ply>0 && ttentry.depth >= depth { 1230 | score := ttscore(ttentry.score); 1231 | 1232 | flag := ttentry.flag; 1233 | if flag == TFLAGS.EXACT then 1234 | `return score; 1235 | if flag == TFLAGS.ALPHA && score <= alpha then 1236 | `return score; 1237 | if flag == TFLAGS.BETA && score >= beta then 1238 | `return score; 1239 | } 1240 | 1241 | } 1242 | 1243 | tt_store :: (hash: u64, score: int, flag: TFLAGS, depth: int, ttmove: Move16) { 1244 | key := hash % cast,no_check(u64) ttable.count; 1245 | prefetch(*ttable[key], Prefetch_Hint.T0); 1246 | entry: TTEntry; 1247 | entry.score = cast (s16) score; 1248 | entry.flag = flag; 1249 | entry.depth = cast, trunc(u8) depth; 1250 | entry.ttmove = ttmove; 1251 | entry.age = search_age; 1252 | entry.hash = hash ^ (entry.padding & ~0xFF); 1253 | 1254 | ttentry := *ttable[key]; 1255 | if depth > ttentry.depth || ttentry.age != search_age then 1256 | memcpy(ttentry, *entry, size_of(TTEntry)); 1257 | } 1258 | 1259 | // perft results are found here: https://www.chessprogramming.org/Perft_Results 1260 | // more perft results: http://www.rocechess.ch/perft.html 1261 | perft_all :: () { 1262 | setup_perft_run :: ($fen: string, expected: ..int) #expand { 1263 | print("Testing FEN=[%1]%2", fen, NEWLINE); 1264 | if equal(fen, "startpos") { 1265 | chess_startpos(*chess); 1266 | } else { 1267 | chess_fen(*chess, fen); 1268 | } 1269 | 1270 | for exp, depth: expected { 1271 | perft_run(depth+1, exp); 1272 | } 1273 | print("------------------------------------%1%1", NEWLINE); 1274 | } 1275 | 1276 | perft_run :: (depth: int, expected: int) #expand { 1277 | time_taken := seconds_since_init(); 1278 | nodes := perft(*chess, depth); 1279 | time_taken = seconds_since_init() - time_taken; 1280 | if nodes == expected { 1281 | print("perft successful at the depth of: %1, %2 nodes found, time taken: %3%4", depth, nodes, time_taken, NEWLINE); 1282 | } else { 1283 | print("FAILED at the depth of: %1, expected: %2, nodes found: %3, time taken: %4\n", depth, expected, nodes, time_taken, NEWLINE); 1284 | } 1285 | } 1286 | 1287 | 1288 | chess: Chess; 1289 | setup_perft_run("startpos", 1290 | 20, 1291 | 400, 1292 | 8_902, 1293 | 197_281, 1294 | 4_865_609, 1295 | 119_060_324); 1296 | // position 1: initial position 1297 | setup_perft_run(fen="rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", 1298 | 20, 1299 | 400, 1300 | 8_902, 1301 | 197_281, 1302 | 4_865_609); 1303 | 1304 | // position 2: kiwipete by peter mckenzie 1305 | setup_perft_run(fen="r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -", 1306 | 48, 1307 | 2_039, 1308 | 97_862, 1309 | 4_085_603, 1310 | 193_690_690); 1311 | //8_031_647_685); 1312 | 1313 | // position 3: en paesant bugs 1314 | setup_perft_run(fen="8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -", 1315 | 14, 1316 | 191, 1317 | 2_812, 1318 | 43_238, 1319 | 674_624, 1320 | 11_030_083, 1321 | 178_633_661); 1322 | //8_031_647_685); // <-- this takes way too long... 1323 | 1324 | // position 4: castling 1325 | setup_perft_run("r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1", 1326 | 6, 1327 | 264, 1328 | 9_467, 1329 | 422_333, 1330 | 15_833_292, 1331 | 706_045_033); 1332 | 1333 | // position 4: castling (mirrored) 1334 | setup_perft_run("r2q1rk1/pP1p2pp/Q4n2/bbp1p3/Np6/1B3NBn/pPPP1PPP/R3K2R b KQ - 0 1", 1335 | 6, 1336 | 264, 1337 | 9_467, 1338 | 422_333, 1339 | 15_833_292, 1340 | 706_045_033); 1341 | 1342 | // position 5: TalkChess discussed. rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8 1343 | setup_perft_run("rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8", 1344 | 44, 1345 | 1_486, 1346 | 62_379, 1347 | 2_103_487, 1348 | 89_941_194); 1349 | 1350 | // position 6: Steven Edwards weirdness 1351 | setup_perft_run("r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10", 1352 | 46, 1353 | 2_079, 1354 | 89_890, 1355 | 3_894_594, 1356 | 164_075_551); 1357 | 1358 | // position 7: promotion bugs 1359 | setup_perft_run("n1n5/PPPk4/8/8/8/8/4Kppp/5N1N b - - 0 1", 1360 | 24, 1361 | 496, 1362 | 9_483, 1363 | 182_838, 1364 | 3_605_103); 1365 | 1366 | print("All perft tests complete!%1", NEWLINE); 1367 | } 1368 | 1369 | perft_divide :: (chess: *Chess, depth: int) -> int { 1370 | taken := seconds_since_init(); 1371 | if depth <= 0 then return 1; 1372 | 1373 | moves: Moves; 1374 | generate_moves(chess, *moves); 1375 | 1376 | num_nodes := 0; 1377 | perft_count := 0; 1378 | 1379 | ep := chess.en_passant; 1380 | for i: 0..moves.count-1 { 1381 | move := moves.array[i]; 1382 | cap, flags, hash := make_move(chess, move); 1383 | move_count := perft(chess, depth-1); 1384 | _, from, to := decode_move16(move); 1385 | print("%1%2: %3 nodes%4", cast(serialized_bb)from, cast(serialized_bb)to, move_count, NEWLINE); 1386 | 1387 | num_nodes += move_count; 1388 | unmake_move(chess, move, cap, flags, hash); 1389 | chess.en_passant = ep; 1390 | perft_count += 1; 1391 | } 1392 | 1393 | print("moves from here: %1%2%2", perft_count, NEWLINE); 1394 | taken = seconds_since_init() - taken; 1395 | print("Total %1 nodes%3Time taken: %2%3", num_nodes, taken, NEWLINE); 1396 | print("NPS: %1%2", cast(float)num_nodes / taken, NEWLINE); 1397 | return num_nodes; 1398 | } 1399 | 1400 | INF :: S16_MAX; // Alias 1401 | 1402 | #import "Machine_X64"; 1403 | // search capture moves only, we do this to avoid horizon effect 1404 | // see: https://www.chessprogramming.org/Quiescence_Search 1405 | quiescene :: (chess: *ChessGame, alpha: int, beta: int, fifty: int = 0) -> int { 1406 | pv :: (chess: *ChessGame, body: Code, f: For_Flags) #expand { 1407 | stage: enum {PV; CAPTURES; QUIETS; END; } = .PV; 1408 | 1409 | move_queue: MoveQueue(32); 1410 | unsafe: u64 = 0; 1411 | check: u64 = 0; 1412 | turn := chess.turn; 1413 | while outer_loop := !stop() { 1414 | while move_queue.count <= 0 { 1415 | if stage == { 1416 | case .PV; 1417 | stage = .CAPTURES; 1418 | if is_legal(chess, `ttmove) then { 1419 | append(*move_queue, 0x7FFF, `ttmove); 1420 | } 1421 | if `hashmove != `ttmove && is_legal(chess, `hashmove) then { 1422 | append(*move_queue, 0x7FFF-1, `hashmove); 1423 | } 1424 | case .CAPTURES; 1425 | unsafe, check = generate_attacks(chess); 1426 | if check == 0 then { 1427 | stage = .END; 1428 | } else { 1429 | stage = .QUIETS; 1430 | } 1431 | generate_tactics(unsafe, check, chess, *move_queue); 1432 | filter_capture_pv(*move_queue, `ttmove, `hashmove); 1433 | sort_captures(*move_queue, chess); 1434 | case .QUIETS; 1435 | stage = .END; 1436 | generate_quiets(unsafe, check, chess, *move_queue); 1437 | filter_capture_pv(*move_queue, `ttmove, `hashmove); 1438 | sort_quiets(*move_queue, chess); 1439 | case .END; 1440 | break outer_loop; // terminate loop 1441 | } 1442 | } 1443 | `it, `it_index := pop(*move_queue); 1444 | #insert body; 1445 | } 1446 | } 1447 | 1448 | can_delta_prune :: (using chess: *Chess) -> bool #expand { 1449 | w := w_queen | w_rook | w_bishop | w_knight; 1450 | b := b_queen | b_rook | b_bishop | b_knight; 1451 | return w != 0 || b != 0; 1452 | } 1453 | 1454 | is_promoting_pawn :: inline (using chess: *Chess) -> bool { 1455 | return (w_pawn & cast(u64)rank_7) != 0 || (b_pawn & cast(u64)rank_2) != 0; 1456 | } 1457 | 1458 | // a bad capture => true, a good capture => false 1459 | bad_capture :: (chess: *Chess, move: Move16, from: int, to: int) -> bool { 1460 | pfrom := cast(int) piece_at(chess, from); 1461 | pto := cast(int) piece_at(chess, to); 1462 | if PVALUE[pfrom] <= PVALUE[pto] then { 1463 | return false; 1464 | } 1465 | 1466 | return see(chess, move, 0); 1467 | } 1468 | ply := chess.ply; 1469 | pv_node := alpha != beta-1; 1470 | tthit, ttentry := tt_probe(chess.hash, alpha, beta, 0); 1471 | tt_prune(tthit, ttentry, ply, pv_node, alpha, beta, 0); 1472 | best_move := Move16.Quiet; 1473 | 1474 | ttmove := chess.history.pv_table[ply][0]; 1475 | if get_move16_flag(ttmove) < Move16.Capture || !is_legal(chess, ttmove) then { 1476 | ttmove = 0; 1477 | } 1478 | 1479 | hashmove := Move16.Quiet; 1480 | if tthit && ttmove == 0 && get_move16_flag(ttentry.ttmove) >= Move16.Capture then { 1481 | hashmove = ttentry.ttmove; 1482 | } 1483 | 1484 | defer { 1485 | using chess.history; 1486 | memset(*pv_table[ply+1], 0, size_of(PV_Line)); 1487 | killer_moves[ply+1][1] = 0; 1488 | killer_moves[ply+1][0] = 0; 1489 | } 1490 | 1491 | best_score: int = -INF; 1492 | hash_flag := TFLAGS.ALPHA; 1493 | pos_check := in_check(chess); 1494 | if !pos_check { 1495 | // no standing pat while in check 1496 | best_score = evaluate(chess, fifty); 1497 | if best_score >= beta then { 1498 | return best_score; 1499 | } 1500 | 1501 | // delta pruning: 1502 | if can_delta_prune(chess) { 1503 | if best_score < (alpha - 975) then { 1504 | return best_score; 1505 | } 1506 | } 1507 | 1508 | if alpha < best_score then { 1509 | // hash_flag = TFLAGS.EXACT; 1510 | alpha = best_score; 1511 | } 1512 | } 1513 | 1514 | // search captures only, captures negate the fifty move rule 1515 | ep := chess.en_passant; 1516 | move_count := 0; 1517 | for :pv move, move_score: chess { 1518 | move_count += 1; 1519 | type, from, to := decode_move16(move); 1520 | 1521 | // skip bad captures w/ SEE algorithm. 1522 | if pos_check == false && bad_capture(chess, move, from, to) then 1523 | continue; 1524 | 1525 | cap, flags := make_move(chess, move); 1526 | next_fifty := fifty_move(chess, move, fifty); 1527 | score := -quiescene(chess, -beta, -alpha, next_fifty); 1528 | unmake_move(chess, move, cap, flags); 1529 | chess.en_passant = ep; 1530 | if score >= best_score { 1531 | best_score = score; 1532 | if best_score > alpha then { 1533 | hash_flag = TFLAGS.EXACT; 1534 | best_move = move; 1535 | alpha = best_score; 1536 | 1537 | using chess.history; 1538 | pv_table[ply][0] = move; 1539 | move_count := pv_table[ply+1].move_count; 1540 | memcpy(*pv_table[ply][1], *pv_table[ply+1][0], move_count * size_of(Move16)); 1541 | pv_table[ply].move_count = move_count + 1; 1542 | if best_score >= beta then { 1543 | tt_store(chess.hash, best_score, TFLAGS.BETA, 0, move); 1544 | return best_score; 1545 | } 1546 | 1547 | } 1548 | } 1549 | 1550 | } 1551 | 1552 | if move_count == 0 && pos_check then { 1553 | return -INF+ply; 1554 | } 1555 | 1556 | tt_store(chess.hash, best_score, hash_flag, 0, best_move); 1557 | return best_score; 1558 | 1559 | } 1560 | 1561 | PV_Line :: struct { 1562 | move_count: u16; 1563 | argmove: [79] Move16; 1564 | } 1565 | 1566 | operator [] :: (using p: PV_Line, index: int) -> Move16 { 1567 | return argmove[index]; 1568 | } 1569 | 1570 | operator []= :: (using p: *PV_Line, index: int, move: Move16) #expand { 1571 | argmove[index] = move; 1572 | } 1573 | 1574 | operator *[] :: (p: *PV_Line, index: int) -> *Move16 #expand { 1575 | return *p.argmove[index]; 1576 | } 1577 | 1578 | operator [] :: (using m: Moves, index: int) -> Move16 { 1579 | return array[index]; 1580 | } 1581 | 1582 | time_begin: float64; 1583 | 1584 | // performance test 1585 | // walks the move generation tree of strictly legal moves to count the leaf nodes 1586 | perft :: (chess: *Chess, depth: int) -> int { 1587 | gen_moves :: (chess: *Chess, body: Code, f: For_Flags) #expand { 1588 | moves: Moves; 1589 | unsafe, check := generate_attacks(chess); 1590 | generate_tactics(unsafe, check, chess, *moves); 1591 | i := 0; 1592 | `it := Move16.Quiet; 1593 | `it_index := 0; 1594 | while i < moves.count { 1595 | it = moves[i]; 1596 | #insert body; 1597 | i += 1; 1598 | it_index += 1; 1599 | } 1600 | moves.count = 0; 1601 | memset(*moves, 0, size_of(Moves)); 1602 | generate_quiets(unsafe, check, chess, *moves); 1603 | i = 0; 1604 | while i < moves.count { 1605 | it = moves[i]; 1606 | #insert body; 1607 | i += 1; 1608 | it_index += 1; 1609 | } 1610 | } 1611 | if depth == 0 then return 1; 1612 | 1613 | if depth == 1 then { 1614 | moves: Moves(true); 1615 | generate_moves(chess, *moves); 1616 | return moves.count; 1617 | } 1618 | 1619 | num_nodes := 0; 1620 | ep := chess.en_passant; 1621 | for :gen_moves mov: chess { 1622 | cap, castling, hash := make_move(chess, mov); 1623 | 1624 | num_nodes += perft(chess, depth-1); 1625 | 1626 | unmake_move(chess, mov, cap, castling, hash); 1627 | chess.en_passant = ep; 1628 | } 1629 | return num_nodes; 1630 | 1631 | } 1632 | 1633 | #no_reset LateMoveReduction: [64][64] int; 1634 | #run { 1635 | for i: 1..63 { 1636 | for j: 1..63 { 1637 | LMR := log(cast(float)i) * log(cast(float)j) * 0.5; 1638 | LateMoveReduction[i][j] = cast (int) LMR; 1639 | } 1640 | } 1641 | } 1642 | 1643 | // most valuable victim, least valuable attacker. 1644 | // see: https://www.chessprogramming.org/MVV-LVA 1645 | mvv_lva :: (victim: Piece, attacker: Piece) -> score: s32 { 1646 | piece_score :: #run -> [13] s32 { 1647 | using Piece; 1648 | array: [13] s32; 1649 | array[cast(int)NONE ] = 0; 1650 | array[cast(int)W_KING ] = 9000; 1651 | array[cast(int)W_QUEEN ] = 9000; 1652 | array[cast(int)W_ROOK ] = 5000; 1653 | array[cast(int)W_BISHOP ] = 3500; 1654 | array[cast(int)W_KNIGHT ] = 3250; 1655 | array[cast(int)W_PAWN ] = 1000; 1656 | 1657 | array[cast(int)B_KING ] = 9000; 1658 | array[cast(int)B_QUEEN ] = 9000; 1659 | array[cast(int)B_ROOK ] = 5000; 1660 | array[cast(int)B_BISHOP ] = 3500; 1661 | array[cast(int)B_KNIGHT ] = 1000; 1662 | array[cast(int)B_PAWN ] = 1000; 1663 | return array; 1664 | } 1665 | 1666 | v := cast(int) victim; 1667 | a := cast(int) attacker; 1668 | vs := piece_score[v] - (piece_score[a]/100) + 10000; 1669 | return vs; 1670 | } 1671 | 1672 | nodes_searched := 0; 1673 | 1674 | History :: struct { 1675 | pv_table: [128] PV_Line; 1676 | cap_history: [13][64][7] s32; 1677 | history_moves: [13][64] s32; 1678 | killer_moves: [128][2] Move16; 1679 | counter_history: [13][64][13][64] s32; 1680 | } 1681 | 1682 | TTEntry :: struct { 1683 | hash: u64; 1684 | union { 1685 | using data: TTData; 1686 | padding: u64; 1687 | } 1688 | } 1689 | 1690 | ttable: [] TTEntry; 1691 | 1692 | init_ttable :: (size: int = 16_000_000) { 1693 | if ttable.count > 0 then 1694 | array_free(ttable); 1695 | num_entries := size / size_of(TTEntry); 1696 | ttable = NewArray(num_entries, TTEntry); 1697 | memset(*ttable[0], 0, size_of(TTEntry)*num_entries); 1698 | } 1699 | 1700 | TTData :: struct { 1701 | age : u8; // note: we put age as the first byte to make it easier to mask. 1702 | flag : TFLAGS; 1703 | depth : u8; 1704 | ttmove: Move16; 1705 | score : s16; 1706 | } 1707 | 1708 | search_age : u8 = 0; 1709 | 1710 | TFLAGS :: enum u8 { EXACT; ALPHA; BETA; } 1711 | 1712 | Clear_Hash :: () { 1713 | memset(*ttable[0], 0, size_of(TTEntry)*ttable.count); 1714 | } 1715 | 1716 | Moves :: struct(perft1 := false) { 1717 | count: s32; 1718 | #if perft1 == false { 1719 | array: [64] Move16; 1720 | } 1721 | } 1722 | 1723 | add_move :: (moves: *Moves, from: int, to: int, flags: Move16) #expand { 1724 | #if moves.perft1 == false { 1725 | move := to_move16(from, to, flags); 1726 | moves.array[moves.count] = move; 1727 | } 1728 | moves.count += 1; 1729 | } 1730 | 1731 | add_move :: (queue: *MoveQueue, from: int, to: int, flags: Move16) #expand { 1732 | move := to_move16(from, to, flags); 1733 | append(queue, 0, move); 1734 | } 1735 | 1736 | // static exchange evaluation. 1737 | see :: (chess: *Chess, move: Move16, threshold: int) -> bool { 1738 | attacks_to :: (occ: u64, sq: int) -> u64 #expand { 1739 | to := cast,no_check(u64) (1 << sq); 1740 | atts : u64 = 0; 1741 | atts |= pawn_captures(to, Turn.BLACK) & chess.w_pawn; 1742 | atts |= pawn_captures(to, Turn.WHITE) & chess.b_pawn; 1743 | atts |= knight_moves(to) & (chess.w_knight|chess.b_knight); 1744 | atts |= bishop_moves(sq, occ) & (chess.w_bishop|chess.b_bishop|chess.w_queen|chess.b_queen); 1745 | atts |= rook_moves(sq, occ) & (chess.w_rook|chess.b_rook|chess.w_queen|chess.b_queen); 1746 | atts |= king_moves(to) & (chess.w_king|chess.b_king); 1747 | return atts; 1748 | } 1749 | 1750 | consider_x_rays :: (occ: u64, to: int, from_set: u64, bishops: u64, rooks: u64) -> u64 #expand { 1751 | atts : u64 = 0; 1752 | atts |= bishop_moves(to, occ) & bishops; 1753 | atts |= rook_moves(to, occ) & rooks; 1754 | return atts & occ; 1755 | } 1756 | 1757 | get_least_valuable_piece :: (attadef: u64, turn: Turn)-> bitboard: u64, piece: int #expand { 1758 | 1759 | least_valuable_piece :: (attadef: u64, $turn: Turn) -> bitboard: u64, piece: int #expand { 1760 | subset: u64 = 0; 1761 | pawns := get_pawn(chess, turn); 1762 | subset = attadef & pawns; 1763 | if subset then 1764 | return get_bit(subset), xx Piece.W_PAWN; 1765 | 1766 | knights := get_knight(chess, turn); 1767 | subset = attadef & knights; 1768 | if subset then 1769 | return get_bit(subset), xx Piece.W_KNIGHT; 1770 | 1771 | bishops := get_bishop(chess, turn); 1772 | subset = attadef & bishops; 1773 | if subset then 1774 | return get_bit(subset), xx Piece.W_BISHOP; 1775 | 1776 | rooks := get_rook(chess, turn); 1777 | subset = attadef & rooks; 1778 | if subset then 1779 | return get_bit(subset), xx Piece.W_ROOK; 1780 | 1781 | queens := get_queen(chess, turn); 1782 | subset = attadef & queens; 1783 | if subset then 1784 | return get_bit(subset), xx Piece.W_QUEEN; 1785 | 1786 | kings := get_king(chess, turn); 1787 | subset = attadef & kings; 1788 | if subset then 1789 | return get_bit(subset), xx Piece.W_KING; 1790 | 1791 | return 0, xx Piece.NONE; 1792 | } 1793 | if turn == Turn.WHITE { 1794 | bitboard, piece := least_valuable_piece(attadef, Turn.WHITE); 1795 | return bitboard, piece; 1796 | } else { 1797 | bitboard, piece := least_valuable_piece(attadef, Turn.BLACK); 1798 | return bitboard, piece; 1799 | } 1800 | } 1801 | 1802 | get_bit :: (bits: u64) -> u64 { 1803 | b: int = cast, no_check(int) bits; 1804 | b = -b; 1805 | return bits & (cast, no_check(u64)b); 1806 | } 1807 | 1808 | _, from, to := decode_move16(move); 1809 | target := cast(int) piece_at(chess, to); 1810 | apiece := cast(int) piece_at(chess, from); 1811 | 1812 | gain: [16] s16; 1813 | d := 0; 1814 | may_x_ray: u64 = chess.w_pawn | chess.w_bishop | chess.w_rook | chess.w_queen 1815 | | chess.b_pawn | chess.b_bishop | chess.b_rook | chess.b_queen; 1816 | from_set := cast,no_check(u64) 1 << from; 1817 | occupied := chess.occupied; 1818 | attadef := attacks_to(occupied, to); 1819 | gain[d] = PVALUE[target] - cast(s16)threshold; 1820 | if gain[d] < 0 then 1821 | return false; 1822 | 1823 | turn := chess.turn; 1824 | 1825 | bishops := (chess.w_bishop|chess.b_bishop|chess.w_queen|chess.b_queen); 1826 | rooks := (chess.w_rook|chess.b_rook|chess.w_queen|chess.b_queen); 1827 | 1828 | while from_set != 0 { 1829 | d += 1; // next depth and side 1830 | turn ^= 1; 1831 | gain[d] = PVALUE[apiece] - gain[d-1]; // speculative store, if defended 1832 | 1833 | // prune. 1834 | if max(-gain[d-1], gain[d]) < 0 then 1835 | break; 1836 | 1837 | attadef ^= from_set; // reset bit in set to traverse 1838 | occupied ^= from_set; // reset bit in temporary occupancy (for x-Rays) 1839 | if from_set & may_x_ray then 1840 | attadef |= consider_x_rays(occupied, to, from_set, bishops, rooks); 1841 | 1842 | from_set, apiece = get_least_valuable_piece(attadef, turn); 1843 | } 1844 | 1845 | for #v2 < i: 1..d-1 { 1846 | gain[i-1]= -max(-gain[i-1], gain[i]); 1847 | } 1848 | 1849 | return gain[0] < threshold; 1850 | 1851 | } 1852 | 1853 | PVALUE :: #run -> [13] s16 { 1854 | using Piece; 1855 | array: [13] s16; 1856 | array[cast(int)NONE ] = 0; 1857 | array[cast(int)W_KING ] = 10000; 1858 | array[cast(int)W_QUEEN ] = 1000; 1859 | array[cast(int)W_ROOK ] = 500; 1860 | array[cast(int)W_BISHOP ] = 300; 1861 | array[cast(int)W_KNIGHT ] = 300; 1862 | array[cast(int)W_PAWN ] = 100; 1863 | 1864 | array[cast(int)B_KING ] = 10000; 1865 | array[cast(int)B_QUEEN ] = 1000; 1866 | array[cast(int)B_ROOK ] = 500; 1867 | array[cast(int)B_BISHOP ] = 300; 1868 | array[cast(int)B_KNIGHT ] = 300; 1869 | array[cast(int)B_PAWN ] = 100; 1870 | return array; 1871 | } 1872 | 1873 | get_bestmove :: (c: *ChessGame) -> Move16 #expand { 1874 | return c.history.pv_table[0][0]; 1875 | } 1876 | 1877 | copy_chessgame :: (dest: *ChessGame, src: *ChessGame) { 1878 | memcpy(*dest.chess, *src.chess, size_of(Chess)); 1879 | // skip history, since that is zero. 1880 | dest.ply = src.ply; 1881 | dest.maxply = src.maxply; 1882 | dest.depth = src.depth; 1883 | dest.maxnodes = src.maxnodes; 1884 | dest.movetime = src.movetime; 1885 | dest.maxdepth = src.maxdepth; 1886 | dest.excluded_move = src.excluded_move; 1887 | dest.exply = src.exply; 1888 | dest.probcut = src.probcut; 1889 | 1890 | // don't copy NNUEdata or node_state. 1891 | array_copy(*dest.moves, src.moves); 1892 | array_copy(*dest.rtable, src.rtable); 1893 | array_copy(*dest.eval, src.eval); 1894 | dest.fifty = src.fifty; 1895 | } 1896 | 1897 | ChessGame :: struct { 1898 | #as using chess: Chess; 1899 | history: History; 1900 | ply: int; 1901 | maxply: int; 1902 | depth: int; 1903 | maxnodes: int; 1904 | movetime: int; 1905 | maxdepth: int; 1906 | excluded_move: Move16; 1907 | exply: int; 1908 | probcut: s8 = 0; 1909 | main_thread := false; 1910 | fifty: int; 1911 | score: s16 = 0; 1912 | 1913 | nnue: [] NNUEdata; 1914 | node_state: NodeState; 1915 | moves: [..] Move32; 1916 | rtable: [..] u64; 1917 | eval: [..] s16; 1918 | } 1919 | 1920 | free_chess_game :: (chess: *ChessGame) { 1921 | array_free(chess.nnue); 1922 | array_free(chess.moves); 1923 | array_free(chess.rtable); 1924 | array_free(chess.eval); 1925 | free(chess); 1926 | } 1927 | 1928 | clear_history :: (chess: *ChessGame) { 1929 | using chess.history; 1930 | memset(pv_table.data, 0, size_of(type_of(pv_table))); 1931 | memset(cap_history.data, 0, size_of(type_of(cap_history))); 1932 | memset(history_moves.data, 0, size_of(type_of(history_moves))); 1933 | memset(killer_moves.data, 0, size_of(type_of(killer_moves))); 1934 | memset(counter_history.data, 0, size_of(type_of(counter_history))); 1935 | } 1936 | 1937 | NodeState :: enum_flags u8 { 1938 | NULL; 1939 | SSE; 1940 | } 1941 | 1942 | Move32 :: struct { 1943 | mov16: Move16; 1944 | piece: Piece; 1945 | } 1946 | 1947 | clear :: (using c: *ChessGame) { 1948 | array_reset(*moves); 1949 | array_reset(*rtable); 1950 | array_reset(*eval); 1951 | } 1952 | 1953 | // init chess game 1954 | initialize_chess_game_memory :: (using c: *ChessGame) { 1955 | MAX :: 500; 1956 | array_reserve(*moves, MAX); 1957 | array_reserve(*rtable, MAX); 1958 | array_reserve(*eval, MAX); 1959 | nnue = NewArray(MAX, NNUEdata, alignment=64); 1960 | //assert((cast(s64)nnue.data % 64) == 0); 1961 | } 1962 | 1963 | is_draw :: (using,except(fifty) c: *ChessGame, fifty: int) -> bool { 1964 | count_p := popcount(c.occupied); 1965 | if count_p <= 3 then { 1966 | if count_p == 2 then { 1967 | return true; 1968 | } 1969 | if (w_bishop|w_knight|b_bishop|b_knight) & c.occupied then { 1970 | return true; 1971 | } 1972 | } 1973 | 1974 | hash_val := c.hash; 1975 | repeat_count := 0; 1976 | index := rtable.count-1; 1977 | while fifty > 0 { 1978 | if rtable[index] == hash_val then { 1979 | repeat_count += 1; 1980 | if repeat_count >= 2 then 1981 | return true; 1982 | } 1983 | index -= 1; 1984 | fifty -= 1; 1985 | } 1986 | 1987 | return false; 1988 | } 1989 | 1990 | make_move :: (c: *ChessGame, move: Move16) -> Piece, Castling { 1991 | 1992 | make_move_nnue :: (using c: *ChessGame, move: Move16) #expand { 1993 | dp := *nnue[ply+1].dirtyPiece; 1994 | nnue[ply+1].accumulator.computedAccumulation = 0; 1995 | dp.dirtyNum = 1; 1996 | // remove captured piece. 1997 | if flags == Move16.Capture || flags >= Move16.Knight_Promotion_Capture { 1998 | dp.dirtyNum = 2; 1999 | dp.pc[1] = xx piece_at(c, to); 2000 | dp.from[1] = xx to; 2001 | dp.to[1] = 64; 2002 | } else if flags == Move16.Ep_Capture { 2003 | epto := bit_scan_forward(ifx c.turn==Turn.WHITE chess.en_passant>>8 else chess.en_passant<<8); 2004 | dp.dirtyNum = 2; 2005 | dp.pc[1] = xx piece_at(c, epto); 2006 | dp.from[1] = xx epto; 2007 | dp.to[1] = 64; 2008 | } 2009 | 2010 | dp.pc[0] = xx piece_at(c, from); 2011 | dp.from[0] = xx from; 2012 | dp.to[0] = xx to; 2013 | 2014 | if flags >= Move16.Knight_Promotion then { 2015 | pic : s32 = xx Piece.NONE; 2016 | if flags == Move16.Knight_Promotion || flags == Move16.Knight_Promotion_Capture { 2017 | pic = xx (ifx turn == Turn.WHITE Piece.W_KNIGHT else Piece.B_KNIGHT); 2018 | } else if flags == Move16.Bishop_Promotion || flags == Move16.Bishop_Promotion_Capture { 2019 | pic = xx (ifx turn == Turn.WHITE Piece.W_BISHOP else Piece.B_BISHOP); 2020 | } else if flags == Move16.Rook_Promotion || flags == Move16.Rook_Promotion_Capture { 2021 | pic = xx (ifx turn == Turn.WHITE Piece.W_ROOK else Piece.B_ROOK); 2022 | } else if flags == Move16.Queen_Promotion || flags == Move16.Queen_Promotion_Capture { 2023 | pic = xx (ifx turn == Turn.WHITE Piece.W_QUEEN else Piece.B_QUEEN); 2024 | } 2025 | 2026 | dp.to[0] = 64; 2027 | dp.pc[dp.dirtyNum] = pic; 2028 | dp.from[dp.dirtyNum] = 64; 2029 | dp.to[dp.dirtyNum] = xx to; 2030 | dp.dirtyNum += 1; 2031 | } 2032 | 2033 | if flags == Move16.King_Castle { 2034 | pic: s32 = xx (ifx turn == Turn.WHITE Piece.W_ROOK else Piece.B_ROOK); 2035 | from_castle: s32 = xx (ifx turn == Turn.WHITE serialized_bb.h1 else serialized_bb.h8); 2036 | to_castle: s32 = xx (ifx turn == Turn.WHITE serialized_bb.f1 else serialized_bb.f8); 2037 | dp.dirtyNum = 2; 2038 | dp.pc[1] = pic; 2039 | dp.from[1] = from_castle; 2040 | dp.to[1] = to_castle; 2041 | } 2042 | 2043 | if flags == Move16.Queen_Castle { 2044 | pic : s32 = xx (ifx turn == Turn.WHITE Piece.W_ROOK else Piece.B_ROOK); 2045 | from_castle: s32 = xx (ifx turn == Turn.WHITE serialized_bb.a1 else serialized_bb.a8); 2046 | to_castle: s32 = xx (ifx turn == Turn.WHITE serialized_bb.d1 else serialized_bb.d8); 2047 | dp.dirtyNum = 2; 2048 | dp.pc[1] = pic; 2049 | dp.from[1] = from_castle; 2050 | dp.to[1] = to_castle; 2051 | } 2052 | } 2053 | fetch_and_add(*nodes_searched); 2054 | flags, from, to := decode_move16(move); 2055 | piece := c.pieces[from]; 2056 | move32: Move32; 2057 | move32.mov16 = move; 2058 | move32.piece = piece; 2059 | make_move_nnue(c, move); 2060 | 2061 | cap, castling, hash := make_move(*c.chess, move); 2062 | key := hash % cast, no_check(u64) ttable.count; 2063 | array_add(*c.moves, move32); 2064 | array_add(*c.rtable, hash); 2065 | 2066 | c.ply += 1; 2067 | c.maxply = max(c.maxply, c.ply); 2068 | return cap, castling; 2069 | } 2070 | 2071 | unmake_move :: (c: *ChessGame, move: Move16, cap: Piece, castling: Castling) { 2072 | pop(*c.moves); 2073 | hash := pop(*c.rtable); 2074 | unmake_move(*c.chess, move, cap, castling, hash); 2075 | c.ply -= 1; 2076 | } 2077 | 2078 | get_prev_move :: (chess: *ChessGame, num: int) -> piece: int, to: int { 2079 | if chess.ply < num return 0, 0; 2080 | 2081 | idx := chess.moves.count-num; 2082 | prev_move := chess.moves[idx]; 2083 | piece, to := piece_to(prev_move); 2084 | return piece, to; 2085 | } 2086 | 2087 | get_improving :: (using c: *ChessGame) -> int { 2088 | if ply>1 && eval[eval.count-1] > eval[eval.count-3] then 2089 | return 1; 2090 | else 2091 | return 0; 2092 | } 2093 | 2094 | piece_to :: (mov: Move32) -> int, int #expand { 2095 | to := get_move16_to(mov.mov16); 2096 | return cast(int)mov.piece, to; 2097 | } 2098 | 2099 | make_null_move :: (c: *ChessGame) -> ep: u64 { 2100 | 2101 | make_null_move_nnue :: (using c: *ChessGame) #expand { 2102 | memcpy(*c.nnue[c.ply+1].accumulator, *c.nnue[c.ply].accumulator, size_of(Accumulator)); 2103 | dp := *c.nnue[c.ply+1].dirtyPiece; 2104 | dp.dirtyNum = 0; 2105 | } 2106 | 2107 | ep, hash := make_null_move(*c.chess); 2108 | null_move: Move32 = Move32.{0, 0}; 2109 | array_add(*c.moves, null_move); 2110 | array_add(*c.rtable, hash); 2111 | make_null_move_nnue(c); 2112 | c.ply += 1; 2113 | c.maxply = max(c.maxply, c.ply); 2114 | return ep; 2115 | 2116 | } 2117 | 2118 | unmake_null_move :: (c: *ChessGame, ep: u64) { 2119 | pop(*c.moves); 2120 | hash := pop(*c.rtable); 2121 | unmake_null_move(*c.chess, ep, hash); 2122 | c.ply -= 1; 2123 | } 2124 | 2125 | get_countermove_history :: (chess: *ChessGame, num: int) -> [][64] s32 { 2126 | using chess.history; 2127 | prev, to := get_prev_move(chess, num); 2128 | return counter_history[prev][to]; 2129 | } 2130 | 2131 | // fetch and add. 2132 | fetch_and_add :: (val: *int) #expand { 2133 | #if CPU == .X64 { 2134 | #asm { 2135 | mov incr: gpr, 1; 2136 | xadd.q [val], incr; 2137 | } 2138 | } else { 2139 | // unknown cpu architecture. just default to simple incrementing of number. 2140 | // this doesn't support multi-threading...but it should be okay-ish... 2141 | val.* += 1; 2142 | } 2143 | } 2144 | 2145 | multi_pv: int = 1; 2146 | 2147 | heapify :: (queue: *MoveQueue, index: int) { 2148 | array: [] Queue_Pair = queue.array; 2149 | while true { 2150 | largest := index; 2151 | left := 2 * index + 1; 2152 | right := 2 * index + 2; 2153 | 2154 | N := queue.count; 2155 | 2156 | if left < N && array[left].priority > array[largest].priority then 2157 | largest = left; 2158 | 2159 | if right < N && array[right].priority > array[largest].priority then 2160 | largest = right; 2161 | 2162 | if largest == index then 2163 | break; 2164 | 2165 | temp := array[index]; 2166 | array[index] = array[largest]; 2167 | array[largest] = temp; 2168 | index = largest; 2169 | } 2170 | 2171 | } 2172 | 2173 | pop :: (queue: *MoveQueue) -> move: Move16, priority: s32 { 2174 | move := queue.array[0].move; 2175 | priority := queue.array[0].priority; 2176 | queue.count -= 1; 2177 | queue.array[0] = queue.array[queue.count]; 2178 | heapify(queue, 0); 2179 | return move, priority; 2180 | } 2181 | 2182 | construct_heap :: (queue: *MoveQueue) { 2183 | 2184 | count := queue.array.count/2 - 1; 2185 | for #v2 < index: 0..count { 2186 | heapify(queue, index); 2187 | } 2188 | 2189 | } 2190 | 2191 | MoveQueue :: struct(SZ: int = 128) { 2192 | count: int; 2193 | array: [SZ] Queue_Pair; 2194 | } 2195 | 2196 | append :: (queue: *MoveQueue, priority: s16, move: Move16) { 2197 | index := queue.count; 2198 | queue.array[index].priority = priority; 2199 | queue.array[index].move = move; 2200 | queue.count += 1; 2201 | } 2202 | 2203 | Queue_Pair :: struct { 2204 | priority: s32; 2205 | move: Move16; 2206 | } 2207 | -------------------------------------------------------------------------------- /uci.jai: -------------------------------------------------------------------------------- 1 | main :: () { 2 | 3 | chess_startpos :: (chessgame: *ChessGame) #expand { 4 | chess_startpos(*chessgame.chess); 5 | chessgame.nnue[chessgame.ply].accumulator.computedAccumulation = 0; 6 | } 7 | 8 | chess_fen :: (chessgame: *ChessGame, fen_string: string) -> bool #expand { 9 | chessgame.nnue[chessgame.ply].accumulator.computedAccumulation = 0; 10 | return chess_fen(*chessgame.chess, fen_string); 11 | } 12 | 13 | nnue_startup(); 14 | init_global_bitboards(); 15 | initialize_move_randomness(); 16 | init_ttable(); 17 | initialize_chess_game_memory(*chess); 18 | chess.main_thread = true; 19 | chess_startpos(*chess); 20 | fifty := 0; 21 | 22 | for :getline input: os { 23 | reset_temporary_storage(); 24 | if equal(input, "quit") { 25 | free_threads(); 26 | return; 27 | } 28 | 29 | if equal(input, "uci") { 30 | print(uci_response); 31 | print("option name Clear Hash type button%1", NEWLINE); 32 | print("option name Hash type spin default 16 min 1 max 2000%1", NEWLINE); 33 | print("option name Threads type spin default 1 min 1 max 512%1", NEWLINE); 34 | print("option name MultiPV type spin default 1 min 1 max 100%1", NEWLINE); 35 | print("option name Difficulty type spin default 8 min 1 max 8%1", NEWLINE); 36 | print("uciok%1", NEWLINE); 37 | } 38 | 39 | if equal(input, "isready") { 40 | print("readyok%", NEWLINE); 41 | } 42 | 43 | if equal(input, "ucinewgame") { 44 | Clear_Hash(); 45 | chess_startpos(*chess); 46 | } 47 | 48 | if equal(input, "perft_all") { 49 | perft_all(); 50 | } 51 | 52 | if equal(input, "eval") { 53 | eval := uci_evaluate(*chess); 54 | print_chess(*chess); 55 | push_allocator(temp); 56 | str := to_fen_string(*chess); 57 | print("FEN=[%1]%2", str, NEWLINE); 58 | print("Evaluate = %1 cp%2%2", eval, NEWLINE); 59 | } 60 | 61 | if begins_with(input, "position ") { 62 | fifty = parse_position(input, *chess); 63 | } 64 | 65 | if begins_with(input, "go") { 66 | go_search(input, *chess, fifty); 67 | } 68 | 69 | if begins_with(input, "setoption ") { 70 | set_option(input); 71 | } 72 | } 73 | } 74 | 75 | #scope_file 76 | os: OS; 77 | 78 | parse_position :: (line: string, chess: *ChessGame) -> fifty: int { 79 | clear(chess); 80 | // TODO: this does not guard against incoherent IO, this is just 81 | // to get it working... 82 | fifty := 0; 83 | input := advance(line, 9); 84 | input = eat_spaces(input); 85 | if begins_with(input, "fen ") { 86 | input = advance(input, 4); 87 | input = eat_spaces(input); 88 | index := find_index_from_left(input, "moves"); 89 | fen_string := ifx index != -1 then slice(input, 0, index) else input; 90 | if !chess_fen(chess, fen_string) { 91 | print("invalid fen %1%2", fen_string, NEWLINE); 92 | chess_startpos(chess); 93 | return 0; 94 | } 95 | 96 | if index != -1 { 97 | input = advance(input, index); 98 | input = advance(input, 5); 99 | input = eat_spaces(input); 100 | } else { 101 | input = advance(input, input.count); 102 | } 103 | 104 | } else if begins_with(input, "startpos") { 105 | input = advance(input, 8); 106 | input = eat_spaces(input); 107 | chess_startpos(chess); 108 | index := find_index_from_left(input, "moves"); 109 | if index != -1 { 110 | input = advance(input, index); 111 | input = advance(input, 5); 112 | input = eat_spaces(input); 113 | } else { 114 | input = advance(input, input.count); 115 | } 116 | } else { 117 | // we don't know what the input is, so return 118 | return 0; 119 | } 120 | 121 | // we setup the position, now just to parse the moves 122 | while input { 123 | token := parse_token(*input); 124 | x1 := cast(int)(token[0] - #char "a"); 125 | x2 := cast(int)(token[1] - #char "1"); 126 | y1 := cast(int)(token[2] - #char "a"); 127 | y2 := cast(int)(token[3] - #char "1"); 128 | from := x2*8 + x1; 129 | to := y2*8 + y1; 130 | 131 | promote := ifx token.count == 5 then token[4] else 0; 132 | move_type := get_move16_flag(chess, from, to, promote); 133 | move := to_move16(from, to, move_type); 134 | make_move(chess, move); 135 | if (piece_at(chess,to) != Piece.W_PAWN && piece_at(chess,to) != Piece.B_PAWN) && move_type == Move16.Quiet { 136 | fifty += 1; 137 | } else { 138 | fifty = 0; 139 | } 140 | } 141 | 142 | return fifty; 143 | } 144 | 145 | go_search :: (line: string, chess: *ChessGame, fifty: int) { 146 | parse_token(*line); 147 | token := parse_token(*line); 148 | if equal(token, "perft") { 149 | depth, TF := parse_int(*line); 150 | if !TF return; 151 | perft_divide(chess, depth); 152 | return; 153 | } 154 | 155 | depth := -1; 156 | nodes := -1; 157 | movetime := -1; 158 | movestogo := -1; 159 | 160 | time := -1; 161 | incr := 0; 162 | 163 | while line { 164 | if token == { 165 | case "depth"; 166 | value, TF := parse_int(*line); 167 | if TF == false { 168 | print("info string error unable to parse depth%", NEWLINE); 169 | return; 170 | } 171 | depth = value; 172 | case "nodes"; 173 | value, TF := parse_int(*line); 174 | if TF == false { 175 | print("info string error unable to parse nodes%", NEWLINE); 176 | return; 177 | } 178 | nodes = value; 179 | case "movetime"; 180 | value, TF := parse_int(*line); 181 | if TF == false { 182 | print("info string error unable to parse movetime%", NEWLINE); 183 | return; 184 | } 185 | movetime = value; 186 | 187 | case "wtime"; 188 | value, TF := parse_int(*line); 189 | if TF == false { 190 | print("info string error unable to parse wtime%", NEWLINE); 191 | return; 192 | } 193 | if chess.turn == .WHITE { 194 | time = value; 195 | } 196 | 197 | case "btime"; 198 | value, TF := parse_int(*line); 199 | if TF == false { 200 | print("info string error unable to parse btime%", NEWLINE); 201 | return; 202 | } 203 | if chess.turn == .BLACK { 204 | time = value; 205 | } 206 | 207 | case "winc"; 208 | value, TF := parse_int(*line); 209 | if TF == false { 210 | print("info string error unable to parse winc%", NEWLINE); 211 | return; 212 | } 213 | if chess.turn == .WHITE { 214 | incr = value; 215 | } 216 | 217 | case "binc"; 218 | value, TF := parse_int(*line); 219 | if TF == false { 220 | print("info string error unable to parse binc%", NEWLINE); 221 | return; 222 | } 223 | if chess.turn == .BLACK { 224 | incr = value; 225 | } 226 | 227 | case "movestogo"; 228 | value, TF := parse_int(*line); 229 | if TF == false { 230 | print("info string error unable to parse movestogo%", NEWLINE); 231 | return; 232 | } 233 | movestogo = value; 234 | 235 | case; 236 | print("info string error unable to parse [%1]%2", line, NEWLINE); 237 | return; 238 | } 239 | 240 | token = parse_token(*line); 241 | token = eat_spaces(token); 242 | } 243 | 244 | if time > -1 && movetime == -1 then { 245 | movetime = time_management(time, incr, movestogo, chess.ply); 246 | } 247 | 248 | chess.maxnodes = nodes; 249 | chess.movetime = movetime; 250 | chess.maxdepth = depth; 251 | chess.fifty = fifty; 252 | mov := uci_search(chess); 253 | 254 | push_allocator(temp); 255 | str := to_string(mov); 256 | print("bestmove %1%2", str, NEWLINE); 257 | } 258 | 259 | set_option :: (line: string) { 260 | parse_token(*line); 261 | token, TF := parse_token(*line); 262 | if TF == false || !equal(token, "name") { 263 | print("info string error. unable to parse setoption%", NEWLINE); 264 | return; 265 | } 266 | 267 | token = eat_spaces(line); 268 | found, left, right := split_from_left(token, " value "); 269 | if left == { 270 | case "Clear Hash"; 271 | if right { 272 | print("info string error. Clear Hash cannot be assigned a value%", NEWLINE); 273 | return; 274 | } 275 | 276 | print("info string Transposition Table Cleared%", NEWLINE); 277 | Clear_Hash(); 278 | case "Hash"; 279 | num, tf := parse_int(*right); 280 | if !tf { 281 | print("info string error. Invalid Hash Value%", NEWLINE); 282 | return; 283 | } 284 | if num >= 1 && num <= 2000 { 285 | num *= 1_000_000; 286 | init_ttable(num); 287 | } else { 288 | print("info string error. Invalid Hash Value%", NEWLINE); 289 | return; 290 | } 291 | case "MultiPV"; 292 | num, tf := parse_int(*right); 293 | if !tf { 294 | print("info string error. Invalid MultiPV Value%", NEWLINE); 295 | return; 296 | } 297 | if num >= 1 && num <= 100 { 298 | set_multi_pv(num); 299 | } else { 300 | print("info string error. Invalid MultiPV Value%", NEWLINE); 301 | return; 302 | } 303 | case "Threads"; 304 | num, tf := parse_int(*right); 305 | if !tf { 306 | print("info string error. Invalid Thread Value%", NEWLINE); 307 | return; 308 | } 309 | if num >= 1 && num <= 512 { 310 | set_threads(num); 311 | } else { 312 | print("info string error. Invalid Thread Value%", NEWLINE); 313 | return; 314 | } 315 | case "Difficulty"; 316 | num, tf := parse_int(*right); 317 | if !tf { 318 | print("info string error. Invalid Difficulty Value%", NEWLINE); 319 | return; 320 | } 321 | if num >= 1 && num <= 8 { 322 | set_difficulty(num); 323 | } else { 324 | print("info string error. Invalid Difficulty Value%", NEWLINE); 325 | return; 326 | } 327 | 328 | case; 329 | print("info string error. invalid name: [%1]%2", left, NEWLINE); 330 | 331 | } 332 | } 333 | 334 | // decides what the movetime is given the time/increment/other parameters in milliseconds 335 | time_management :: (time: int, incr: int, movestogo: int, ply: int) -> movetime: int { 336 | div := 0; 337 | if movestogo != -1 then { 338 | div = movestogo; 339 | } else { 340 | div = max(60 - ply, 20); 341 | } 342 | 343 | if incr > time then 344 | incr = 0; 345 | 346 | time /= div; 347 | time -= 75; 348 | 349 | // time up 350 | if time < 0 { 351 | time = 0; 352 | incr -= 75; 353 | if incr < 0 then { 354 | incr = 1; 355 | } 356 | } 357 | 358 | return time + incr; 359 | } 360 | 361 | 362 | chess: ChessGame #align 64; 363 | 364 | 365 | 366 | 367 | 368 | 369 | -------------------------------------------------------------------------------- /windows.jai: -------------------------------------------------------------------------------- 1 | // contains windows OS specific code. 2 | 3 | 4 | EXE :: ".exe"; // nothing. 5 | NEWLINE :: "\r\n"; 6 | 7 | 8 | OS :: struct { 9 | none: void; 10 | } 11 | 12 | // defines the AI uci message loop. 13 | getline :: (os: *OS, loop_body: Code, flags: For_Flags) #expand { 14 | stdin = GetStdHandle(STD_INPUT_HANDLE); 15 | `it_index := 0; 16 | while outer := true { 17 | stopping = false; 18 | memset(buffera.data, 0, size_of(type_of(buffera))); 19 | memset(bufferb.data, 0, size_of(type_of(bufferb))); 20 | bytes_read: u32; 21 | if !ReadFile(stdin, buffera.data, buffera.count, *bytes_read, null) then { 22 | sleep_milliseconds(25); 23 | continue; 24 | } 25 | 26 | messages := to_string(buffera.data, cast(int)bytes_read); 27 | while messages { 28 | found, `it, rest := split_from_left(messages, "\r\n"); 29 | if ends_with(it, "\r\n") 30 | it.count -= 2; 31 | if ends_with(it, "\n") then { 32 | it.count -= 1; 33 | } 34 | #insert (break=break outer) loop_body; 35 | messages = rest; 36 | if !found break; 37 | } 38 | } 39 | } 40 | 41 | read_input :: (main_thread: bool, nodes: int, maxnodes: int, time_begin: float64, movetime: int) #expand { 42 | if stopping == true then 43 | `return 0; 44 | 45 | if (nodes & 8191) == 8191 { 46 | 47 | if nodes >= maxnodes { 48 | stopping = true; 49 | `return 0; 50 | } 51 | 52 | if main_thread == false 53 | return; 54 | 55 | if (nodes & 8191) == 8191 { 56 | time := seconds_since_init(); 57 | left: int = xx (1000.0 * (time - time_begin)); 58 | if left > movetime { 59 | stopping = true; 60 | `return 0; 61 | } 62 | } 63 | 64 | bytes_read: u32; 65 | bytes_available: u32; 66 | success := PeekNamedPipe(stdin, null, 0, null, *bytes_available, null) != 0; 67 | if success && bytes_available { 68 | ReadFile(stdin, bufferb.data, cast(u32) bufferb.count, *bytes_read, null); 69 | str := to_string(bufferb.data, bytes_read); 70 | while str { 71 | found, msg, rest := split_from_left(str, "\r\n"); 72 | if equal(str, "isready") { 73 | print("readyok\r\n"); 74 | } 75 | 76 | if equal(str, "stop") { 77 | stopping = true; 78 | `return 0; 79 | } 80 | 81 | if equal(str, "quit") { 82 | exit(0); 83 | } 84 | 85 | str = rest; 86 | if !found break; 87 | } 88 | } 89 | } 90 | } 91 | 92 | stop :: () -> bool #expand { 93 | return stopping == true; 94 | } 95 | 96 | #scope_file 97 | buffera: [4096] u8; 98 | bufferb: [4096] u8; 99 | stopping: bool = false; 100 | 101 | stdin: HANDLE; 102 | 103 | 104 | #import "Windows"; 105 | #import "Windows_Utf8"; 106 | #import "Basic"; 107 | #import "String"; 108 | #import "System"; // For get_path_of_running_executable. 109 | --------------------------------------------------------------------------------