├── .clang-format ├── .clang-tidy ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── dev ├── cmake │ ├── CompilerToolchain.cmake │ ├── GoogleBenchmark.cmake │ └── GoogleTest.cmake └── lint │ ├── autoformat.sh │ └── clang-tidy │ └── run-clang-tidy.py ├── doc └── presentation │ └── adhoc-2020-04-21 │ ├── benchmark.png │ ├── index.html │ └── remark-latest.min.js ├── include └── jitmap │ ├── bitset.h │ ├── container │ └── container.h │ ├── jitmap.h │ ├── query │ ├── compiler.h │ ├── expr.h │ ├── matcher.h │ ├── optimizer.h │ ├── parser.h │ ├── query.h │ ├── type_fwd.h │ └── type_traits.h │ ├── size.h │ ├── tiny.h │ └── util │ ├── aligned.h │ ├── compiler.h │ ├── exception.h │ ├── fmt.h │ ├── pimpl.h │ └── platform.h ├── src └── jitmap │ ├── CMakeLists.txt │ ├── jitmap.cc │ └── query │ ├── codegen.h │ ├── compiler.cc │ ├── expr.cc │ ├── matcher.cc │ ├── optimizer.cc │ ├── parser.cc │ ├── parser_internal.h │ └── query.cc ├── tests ├── BenchmarkCMakeLists.txt.in ├── CMakeLists.txt ├── GTestCMakeLists.txt.in ├── bitset_test.cc ├── jitmap_benchmark.cc ├── jitmap_test.cc ├── query │ ├── CMakeLists.txt │ ├── compiler_test.cc │ ├── expr_test.cc │ ├── matcher_test.cc │ ├── optimizer_test.cc │ ├── parser_test.cc │ └── query_test.cc └── query_test.h └── tools ├── CMakeLists.txt └── jitmap_ir.cc /.clang-format: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | --- 18 | BasedOnStyle: Google 19 | DerivePointerAlignment: false 20 | ColumnLimit: 90 21 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: 'bugprone-*,cert-*,-cert-err58-cpp,clang-analyzer-*,cppcoreguidelines-*,-cppcoreguidelines-special-member-functions,-cppcoreguidelines-owning-memory,google-*,-google-runtime-references,modernize-*performance-*,readability-*,-readability-convert-member-functions-to-static' 2 | CheckOptions: 3 | - key: readability-braces-around-statements.ShortStatementLines 4 | value: '1' 5 | - key: readability-implicit-bool-conversion.AllowIntegerConditions 6 | value: '1' 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build* 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/googletest"] 2 | path = vendor/googletest 3 | url = git@github.com:google/googletest.git 4 | [submodule "vendor/benchmark"] 5 | path = vendor/benchmark 6 | url = git@github.com:google/benchmark.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 3.0.0) 16 | project(jitmap) 17 | 18 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/dev/cmake") 19 | 20 | set(CMAKE_CXX_STANDARD 17) 21 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 22 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 23 | 24 | option(BUILD_TESTS "Builder unit tests" ON) 25 | option(FORCE_COLORED_OUTPUT "Always produce ANSI-colored output (GNU/Clang only)." ON) 26 | 27 | include(CompilerToolchain) 28 | find_package(LLVM 9 REQUIRED CONFIG) 29 | 30 | add_subdirectory(src/jitmap) 31 | add_subdirectory(tools) 32 | 33 | if (BUILD_TESTS) 34 | enable_testing() 35 | include(GoogleTest) 36 | include(GoogleBenchmark) 37 | add_subdirectory(tests) 38 | endif() 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jitmap: Jitted bitmaps 2 | 3 | jitmap is a small library providing an execution engine for logical binary 4 | expressions on bitmaps. Some examples where this is relevant: 5 | 6 | * In search engines, posting lists (sorted sequences of integers) are encoded 7 | with bitmaps. Evaluating a search query (logical expression on 8 | keywords) can be implemented with logical expression on bitmaps. 9 | 10 | * In columnar databases, selection vectors (index masks) are encoded with 11 | bitmaps, the results of predicate on column expressions. The bitmaps are then 12 | combined in a final bitmap. 13 | 14 | * In stream processing systems with rule engines, e.g. adtech bid requests 15 | filtering with campaign rules, bitmaps are used as a first-pass optimization 16 | to lower the number of (costly) rules to evaluate on each incoming event. 17 | 18 | jitmap compiles logical expressions into native functions with signature 19 | `void fn(const char**, char*)`. The functions are optimized to minimize memory 20 | transfers and uses the fastest vector instruction set provided by the host. 21 | 22 | The following snippet shows an example of what jitmap achieves: 23 | 24 | ```C 25 | typedef void (*dense_eval_fn)(const char**, char*); 26 | 27 | // a, b, c, and output are pointers to bitmap 28 | char* a, b, c, output; 29 | // Note that for now, jitmap only supports static sized bitmaps. 30 | const char** inputs[3] = {a, b, c}; 31 | 32 | // Compile an expression returned as a function pointer. The function can be 33 | // called from any thread in the same address space and has a global lifetime. 34 | // The generated symbol will be exposed to gdb and linux's perf utility. 35 | const char* symbol_name = "a_and_b_and_c"; 36 | dense_eval_fn a_and_b_and_c = jitmap_compile(symbol_name, "a & b & c"); 37 | 38 | // The result of `a & b & c` will be stored in `output`, applied vertically 39 | // using vectorized instruction available on the host. 40 | a_and_b_and_c(inputs, output); 41 | ``` 42 | 43 | ## Logical expression language 44 | 45 | jitmap offers a small DSL language to evaluate bitwise operations on bitmaps. 46 | The language supports variables (named bitmap), empty/full literals, and basic 47 | operators: not `!`, and `&`, or `!`, xor `^`. 48 | 49 | A query takes an expression and a list of bitmaps and execute the expression on 50 | the bitmaps resulting in a new bitmap. 51 | 52 | ### Supported expressions 53 | 54 | - Empty bitmap literal: `$0` 55 | - Full bitmap literal: `$1` 56 | - Variables (named bitmap): `[A-Za-z0-9_]+`, e.g. `country`, `color_red` 57 | - Not: `!e` 58 | - And: `e_1 & e_2` 59 | - Or: `e_1 | e_2` 60 | - Xor: `e_1 ^ e_2` 61 | 62 | ### Examples 63 | ``` 64 | # NOT(a) 65 | !a 66 | 67 | # a AND b 68 | a & b 69 | 70 | # 1 AND (a OR b) XOR c 71 | ($1 & (a | b) ^ c) 72 | ``` 73 | 74 | ## Developing/Debugging 75 | 76 | ### *jitmap-ir* tool 77 | 78 | The *jitmap-ir* command line utility takes an expression as first input argument 79 | and dumps the generated LLVM' IR to stdout. It is useful to debug and peek at 80 | the generated code. Using LLVM command line utilies, we can also look at the 81 | expected generated assembly for any platform. 82 | 83 | ```llvm 84 | # tools/jitmap-ir "(a & b) | (c & c) | (c ^ d) | (c & b) | (d ^ a)" 85 | ; ModuleID = 'jitmap_ir' 86 | source_filename = "jitmap_ir" 87 | target triple = "x86_64-pc-linux-gnu" 88 | 89 | ; Function Attrs: argmemonly 90 | define void @query(i32** nocapture readonly %inputs, i32* nocapture %output) #0 { 91 | entry: 92 | %bitmap_gep_0 = getelementptr inbounds i32*, i32** %inputs, i64 0 93 | %bitmap_0 = load i32*, i32** %bitmap_gep_0 94 | %bitmap_gep_1 = getelementptr inbounds i32*, i32** %inputs, i64 1 95 | %bitmap_1 = load i32*, i32** %bitmap_gep_1 96 | %bitmap_gep_2 = getelementptr inbounds i32*, i32** %inputs, i64 2 97 | %bitmap_2 = load i32*, i32** %bitmap_gep_2 98 | %bitmap_gep_3 = getelementptr inbounds i32*, i32** %inputs, i64 3 99 | %bitmap_3 = load i32*, i32** %bitmap_gep_3 100 | br label %loop 101 | 102 | loop: ; preds = %loop, %entry 103 | %i = phi i64 [ 0, %entry ], [ %next_i, %loop ] 104 | %gep_0 = getelementptr inbounds i32, i32* %bitmap_0, i64 %i 105 | %load_0 = load i32, i32* %gep_0 106 | %gep_1 = getelementptr inbounds i32, i32* %bitmap_1, i64 %i 107 | %load_1 = load i32, i32* %gep_1 108 | %gep_2 = getelementptr inbounds i32, i32* %bitmap_2, i64 %i 109 | %load_2 = load i32, i32* %gep_2 110 | %gep_3 = getelementptr inbounds i32, i32* %bitmap_3, i64 %i 111 | %load_3 = load i32, i32* %gep_3 112 | %0 = and i32 %load_0, %load_1 113 | %1 = and i32 %load_2, %load_2 114 | %2 = or i32 %0, %1 115 | %3 = xor i32 %load_2, %load_3 116 | %4 = or i32 %2, %3 117 | %5 = and i32 %load_2, %load_1 118 | %6 = or i32 %4, %5 119 | %7 = xor i32 %load_3, %load_0 120 | %8 = or i32 %6, %7 121 | %gep_output = getelementptr inbounds i32, i32* %output, i64 %i 122 | store i32 %8, i32* %gep_output 123 | %next_i = add i64 %i, 1 124 | %exit_cond = icmp eq i64 %next_i, 2048 125 | br i1 %exit_cond, label %after_loop, label %loop 126 | 127 | after_loop: ; preds = %loop 128 | ret void 129 | } 130 | 131 | attributes #0 = { argmemonly } 132 | ``` 133 | 134 | We can then use LLVM's `opt` and `llc` to transform the IR into native assembly. 135 | 136 | ```objdump 137 | # tools/jitmap-ir "(a & b) | (c & c) | (c ^ d) | (c & b) | (d ^ a)" | llc -O3 138 | .text 139 | .file "jitmap_ir" 140 | .globl query # -- Begin function query 141 | .p2align 4, 0x90 142 | .type query,@function 143 | query: # @query 144 | .cfi_startproc 145 | # %bb.0: # %entry 146 | pushq %rbp 147 | .cfi_def_cfa_offset 16 148 | pushq %rbx 149 | .cfi_def_cfa_offset 24 150 | .cfi_offset %rbx, -24 151 | .cfi_offset %rbp, -16 152 | movq (%rdi), %r8 153 | movq 8(%rdi), %r9 154 | movq 16(%rdi), %r10 155 | movq 24(%rdi), %r11 156 | movq $-8192, %rax # imm = 0xE000 157 | .p2align 4, 0x90 158 | .LBB0_1: # %loop 159 | # =>This Inner Loop Header: Depth=1 160 | movl 8192(%r8,%rax), %ecx 161 | movl 8192(%r9,%rax), %edx 162 | movl 8192(%r10,%rax), %edi 163 | movl 8192(%r11,%rax), %ebx 164 | movl %edi, %ebp 165 | xorl %ebx, %ebp 166 | xorl %ecx, %ebx 167 | andl %edx, %ecx 168 | orl %edi, %ebp 169 | andl %edx, %edi 170 | orl %ebp, %edi 171 | orl %edi, %ebx 172 | orl %ecx, %ebx 173 | movl %ebx, 8192(%rsi,%rax) 174 | addq $4, %rax 175 | jne .LBB0_1 176 | # %bb.2: # %after_loop 177 | popq %rbx 178 | .cfi_def_cfa_offset 16 179 | popq %rbp 180 | .cfi_def_cfa_offset 8 181 | retq 182 | .Lfunc_end0: 183 | .size query, .Lfunc_end0-query 184 | .cfi_endproc 185 | # -- End function 186 | 187 | .section ".note.GNU-stack","",@progbits 188 | 189 | ``` 190 | 191 | This code is still not fully optimized, `opt` is used for this. 192 | 193 | ```objdump 194 | # tools/jitmap-ir "(a & b) | (c & c) | (c ^ d) | (c & b) | (d ^ a)" | opt -O3 -S -mcpu=core-avx2| llc -O3 195 | ninja: no work to do. 196 | .text 197 | .file "jitmap_ir" 198 | .section .rodata.cst8,"aM",@progbits,8 199 | .p2align 3 # -- Begin function query 200 | .LCPI0_0: 201 | .quad 8192 # 0x2000 202 | .LCPI0_1: 203 | .quad -9223372036854775808 # 0x8000000000000000 204 | .text 205 | .globl query 206 | .p2align 4, 0x90 207 | .type query,@function 208 | query: # @query 209 | # %bb.0: # %entry 210 | pushq %rbp 211 | pushq %r15 212 | pushq %r14 213 | pushq %r12 214 | pushq %rbx 215 | # ... 216 | # And the holy grail fully vectorized loop 217 | .LBB0_2: # %vector.body 218 | # =>This Inner Loop Header: Depth=1 219 | vmovdqu (%r14,%rbx), %ymm0 220 | vmovdqu 32(%r14,%rbx), %ymm1 221 | vmovdqu (%r12,%rbx), %ymm2 222 | vmovdqu 32(%r12,%rbx), %ymm3 223 | vmovdqu (%rdi,%rbx), %ymm4 224 | vmovdqu 32(%rdi,%rbx), %ymm5 225 | vpand (%r15,%rbx), %ymm0, %ymm6 226 | vpand 32(%r15,%rbx), %ymm1, %ymm7 227 | vpor %ymm2, %ymm6, %ymm6 228 | vpor %ymm3, %ymm7, %ymm7 229 | vpxor %ymm2, %ymm4, %ymm2 230 | vpxor %ymm3, %ymm5, %ymm3 231 | vpxor %ymm0, %ymm4, %ymm0 232 | vpor %ymm0, %ymm2, %ymm0 233 | vpor %ymm0, %ymm6, %ymm0 234 | vpxor %ymm1, %ymm5, %ymm1 235 | vpor %ymm1, %ymm3, %ymm1 236 | vpor %ymm1, %ymm7, %ymm1 237 | vmovdqu %ymm0, (%rsi,%rbx) 238 | vmovdqu %ymm1, 32(%rsi,%rbx) 239 | addq $64, %rbx 240 | cmpq $8192, %rbx # imm = 0x2000 241 | jne .LBB0_2 242 | .LBB0_5: # %after_loop 243 | popq %rbx 244 | popq %r12 245 | popq %r14 246 | popq %r15 247 | popq %rbp 248 | vzeroupper 249 | retq 250 | .Lfunc_end0: 251 | .size query, .Lfunc_end0-query 252 | # -- End function 253 | 254 | .section ".note.GNU-stack","",@progbits 255 | ``` 256 | 257 | ## Symbols with linux's perf 258 | 259 | By default, perf will not be able to recognize the generated functions since the 260 | symbols are not available statically. Luckily, perf has two mechanisms for jit 261 | to register symbols. LLVM's jit use the jitdump [1] facility. At the time of 262 | writing this, one needs to patch perf with [2], see commit 077a9b7bd1 for more 263 | information. 264 | 265 | ``` 266 | # The `-k1` is required for jitdump to work. 267 | $ perf record -k1 jitmap_benchmark 268 | 269 | # By default, the output will be useless, since each instruction will be shown 270 | # instead of grouped by symbols. 271 | $ perf report --stdio 272 | ... 273 | # Overhead Command Shared Object Symbol 274 | # ........ ............... ................... .................................................................................... 275 | # 276 | 29.09% jitmap_benchmar jitmap_benchmark [.] jitmap::StaticBenchmark > 277 | 20.08% jitmap_benchmar jitmap_benchmark [.] jitmap::StaticBenchmark > 278 | 1.78% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb045 279 | 1.61% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb053 280 | 1.59% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb197 281 | 1.55% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb126 282 | 1.51% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb035 283 | 1.39% jitmap_benchmar [JIT] tid 24013 [.] 0x00007f628c6cb027 284 | 285 | # We must process the generate perf.data file by injecting symbols name 286 | $ perf inject --jit -i perf.data -o perf.jit.data && mv perf.jit.data perf.data 287 | $ perf report --stdio 288 | ... 289 | # Overhead Command Shared Object Symbol 290 | # ........ ............... ................... .................................................................................... 291 | # 292 | 29.09% jitmap_benchmar jitmap_benchmark [.] jitmap::StaticBenchmark > 293 | 20.08% jitmap_benchmar jitmap_benchmark [.] jitmap::StaticBenchmark > 294 | 6.48% jitmap_benchmar jitted-24013-16.so [.] and_2_popcount 295 | 6.46% jitmap_benchmar jitted-24013-32.so [.] and_4_popcount 296 | 6.42% jitmap_benchmar jitted-24013-46.so [.] and_8_popcount 297 | 6.19% jitmap_benchmar jitted-24013-77.so [.] and_4 298 | 6.19% jitmap_benchmar jitted-24013-61.so [.] and_2 299 | 4.59% jitmap_benchmar jitted-24013-91.so [.] and_8 300 | ``` 301 | 302 | [1] https://elixir.bootlin.com/linux/v4.10/source/tools/perf/Documentation/jitdump-specification.txt 303 | 304 | [2] https://lore.kernel.org/lkml/20191003105716.GB23291@krava/T/#u 305 | 306 | # TODO 307 | 308 | * Supports dynamic sized bitmaps 309 | * Implement roaring-bitmap-like compressed bitmaps 310 | * Get https://reviews.llvm.org/D67383 approved and merged to benefit from 311 | Tree-Height-Reduction pass. 312 | * Provide a C front-end api. 313 | -------------------------------------------------------------------------------- /dev/cmake/CompilerToolchain.cmake: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | if (${FORCE_COLORED_OUTPUT}) 16 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 17 | add_compile_options (-fdiagnostics-color=always) 18 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 19 | add_compile_options (-fcolor-diagnostics) 20 | endif () 21 | endif () 22 | 23 | 24 | set(CXX_WARNING_FLAGS -Werror -Wall) 25 | -------------------------------------------------------------------------------- /dev/cmake/GoogleBenchmark.cmake: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Download and unpack googlebenchmark at configure time 16 | configure_file(tests/BenchmarkCMakeLists.txt.in googlebenchmark-download/CMakeLists.txt) 17 | 18 | set(BENCHMARK_ENABLE_TESTING OFF) 19 | set(EXTRA_ARGS "-DBENCHMARK_ENABLE_TESTING=${BENCHMARK_ENABLE_TESTING}") 20 | 21 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" ${EXTRA_ARGS} . 22 | RESULT_VARIABLE result 23 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googlebenchmark-download ) 24 | if(result) 25 | message(FATAL_ERROR "CMake step for googlebenchmark failed: ${result}") 26 | endif() 27 | execute_process(COMMAND ${CMAKE_COMMAND} --build . 28 | RESULT_VARIABLE result 29 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googlebenchmark-download ) 30 | if(result) 31 | message(FATAL_ERROR "Build step for googlebenchmark failed: ${result}") 32 | endif() 33 | 34 | # Add googlebenchmark directly to our build. This defines 35 | # the benchmark and benchmark_main targets. 36 | add_subdirectory(${CMAKE_SOURCE_DIR}/vendor/benchmark 37 | ${CMAKE_CURRENT_BINARY_DIR}/googlebenchmark-build 38 | EXCLUDE_FROM_ALL) 39 | 40 | # The gtest/gtest_main targets carry header search path 41 | # dependencies automatically when using CMake 2.8.11 or 42 | # later. Otherwise we have to add them here ourselves. 43 | if (CMAKE_VERSION VERSION_LESS 2.8.11) 44 | include_directories("${benchmark_SOURCE_DIR}/include") 45 | endif() 46 | -------------------------------------------------------------------------------- /dev/cmake/GoogleTest.cmake: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Download and unpack googletest at configure time 16 | configure_file(tests/GTestCMakeLists.txt.in googletest-download/CMakeLists.txt) 17 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . 18 | RESULT_VARIABLE result 19 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) 20 | if(result) 21 | message(FATAL_ERROR "CMake step for googletest failed: ${result}") 22 | endif() 23 | execute_process(COMMAND ${CMAKE_COMMAND} --build . 24 | RESULT_VARIABLE result 25 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) 26 | if(result) 27 | message(FATAL_ERROR "Build step for googletest failed: ${result}") 28 | endif() 29 | 30 | # Prevent overriding the parent project's compiler/linker 31 | # settings on Windows 32 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 33 | 34 | # Add googletest directly to our build. This defines 35 | # the gtest and gtest_main targets. 36 | add_subdirectory(${CMAKE_SOURCE_DIR}/vendor/googletest 37 | ${CMAKE_CURRENT_BINARY_DIR}/googletest-build 38 | EXCLUDE_FROM_ALL) 39 | 40 | # The gtest/gtest_main targets carry header search path 41 | # dependencies automatically when using CMake 2.8.11 or 42 | # later. Otherwise we have to add them here ourselves. 43 | if (CMAKE_VERSION VERSION_LESS 2.8.11) 44 | include_directories("${gtest_SOURCE_DIR}/include") 45 | endif() 46 | -------------------------------------------------------------------------------- /dev/lint/autoformat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright 2020 RStudio, Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | format() { 18 | find include src tests tools -type f -exec clang-format -i ${} 19 | } 20 | -------------------------------------------------------------------------------- /dev/lint/clang-tidy/run-clang-tidy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | #===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===# 4 | # 5 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6 | # See https://llvm.org/LICENSE.txt for license information. 7 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8 | # 9 | #===------------------------------------------------------------------------===# 10 | # FIXME: Integrate with clang-tidy-diff.py 11 | 12 | """ 13 | Parallel clang-tidy runner 14 | ========================== 15 | 16 | Runs clang-tidy over all files in a compilation database. Requires clang-tidy 17 | and clang-apply-replacements in $PATH. 18 | 19 | Example invocations. 20 | - Run clang-tidy on all files in the current working directory with a default 21 | set of checks and show warnings in the cpp files and all project headers. 22 | run-clang-tidy.py $PWD 23 | 24 | - Fix all header guards. 25 | run-clang-tidy.py -fix -checks=-*,llvm-header-guard 26 | 27 | - Fix all header guards included from clang-tidy and header guards 28 | for clang-tidy headers. 29 | run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ 30 | -header-filter=extra/clang-tidy 31 | 32 | Compilation database setup: 33 | http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html 34 | """ 35 | 36 | from __future__ import print_function 37 | 38 | import argparse 39 | import glob 40 | import json 41 | import multiprocessing 42 | import os 43 | import re 44 | import shutil 45 | import subprocess 46 | import sys 47 | import tempfile 48 | import threading 49 | import traceback 50 | 51 | try: 52 | import yaml 53 | except ImportError: 54 | yaml = None 55 | 56 | is_py2 = sys.version[0] == '2' 57 | 58 | if is_py2: 59 | import Queue as queue 60 | else: 61 | import queue as queue 62 | 63 | def find_compilation_database(path): 64 | """Adjusts the directory until a compilation database is found.""" 65 | result = './' 66 | while not os.path.isfile(os.path.join(result, path)): 67 | if os.path.realpath(result) == '/': 68 | print('Error: could not find compilation database.') 69 | sys.exit(1) 70 | result += '../' 71 | return os.path.realpath(result) 72 | 73 | 74 | def make_absolute(f, directory): 75 | if os.path.isabs(f): 76 | return f 77 | return os.path.normpath(os.path.join(directory, f)) 78 | 79 | 80 | def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, 81 | header_filter, extra_arg, extra_arg_before, quiet, 82 | config): 83 | """Gets a command line for clang-tidy.""" 84 | start = [clang_tidy_binary] 85 | if header_filter is not None: 86 | start.append('-header-filter=' + header_filter) 87 | if checks: 88 | start.append('-checks=' + checks) 89 | if tmpdir is not None: 90 | start.append('-export-fixes') 91 | # Get a temporary file. We immediately close the handle so clang-tidy can 92 | # overwrite it. 93 | (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) 94 | os.close(handle) 95 | start.append(name) 96 | for arg in extra_arg: 97 | start.append('-extra-arg=%s' % arg) 98 | for arg in extra_arg_before: 99 | start.append('-extra-arg-before=%s' % arg) 100 | start.append('-p=' + build_path) 101 | if quiet: 102 | start.append('-quiet') 103 | if config: 104 | start.append('-config=' + config) 105 | start.append(f) 106 | return start 107 | 108 | 109 | def merge_replacement_files(tmpdir, mergefile): 110 | """Merge all replacement files in a directory into a single file""" 111 | # The fixes suggested by clang-tidy >= 4.0.0 are given under 112 | # the top level key 'Diagnostics' in the output yaml files 113 | mergekey="Diagnostics" 114 | merged=[] 115 | for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): 116 | content = yaml.safe_load(open(replacefile, 'r')) 117 | if not content: 118 | continue # Skip empty files. 119 | merged.extend(content.get(mergekey, [])) 120 | 121 | if merged: 122 | # MainSourceFile: The key is required by the definition inside 123 | # include/clang/Tooling/ReplacementsYaml.h, but the value 124 | # is actually never used inside clang-apply-replacements, 125 | # so we set it to '' here. 126 | output = { 'MainSourceFile': '', mergekey: merged } 127 | with open(mergefile, 'w') as out: 128 | yaml.safe_dump(output, out) 129 | else: 130 | # Empty the file: 131 | open(mergefile, 'w').close() 132 | 133 | 134 | def check_clang_apply_replacements_binary(args): 135 | """Checks if invoking supplied clang-apply-replacements binary works.""" 136 | try: 137 | subprocess.check_call([args.clang_apply_replacements_binary, '--version']) 138 | except: 139 | print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' 140 | 'binary correctly specified?', file=sys.stderr) 141 | traceback.print_exc() 142 | sys.exit(1) 143 | 144 | 145 | def apply_fixes(args, tmpdir): 146 | """Calls clang-apply-fixes on a given directory.""" 147 | invocation = [args.clang_apply_replacements_binary] 148 | if args.format: 149 | invocation.append('-format') 150 | if args.style: 151 | invocation.append('-style=' + args.style) 152 | invocation.append(tmpdir) 153 | subprocess.call(invocation) 154 | 155 | 156 | def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): 157 | """Takes filenames out of queue and runs clang-tidy on them.""" 158 | while True: 159 | name = queue.get() 160 | invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, 161 | tmpdir, build_path, args.header_filter, 162 | args.extra_arg, args.extra_arg_before, 163 | args.quiet, args.config) 164 | 165 | proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 166 | output, err = proc.communicate() 167 | if proc.returncode != 0: 168 | failed_files.append(name) 169 | with lock: 170 | sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) 171 | if len(err) > 0: 172 | sys.stdout.flush() 173 | sys.stderr.write(err.decode('utf-8')) 174 | queue.task_done() 175 | 176 | 177 | def main(): 178 | parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' 179 | 'in a compilation database. Requires ' 180 | 'clang-tidy and clang-apply-replacements in ' 181 | '$PATH.') 182 | parser.add_argument('-clang-tidy-binary', metavar='PATH', 183 | default='clang-tidy', 184 | help='path to clang-tidy binary') 185 | parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', 186 | default='clang-apply-replacements', 187 | help='path to clang-apply-replacements binary') 188 | parser.add_argument('-checks', default=None, 189 | help='checks filter, when not specified, use clang-tidy ' 190 | 'default') 191 | parser.add_argument('-config', default=None, 192 | help='Specifies a configuration in YAML/JSON format: ' 193 | ' -config="{Checks: \'*\', ' 194 | ' CheckOptions: [{key: x, ' 195 | ' value: y}]}" ' 196 | 'When the value is empty, clang-tidy will ' 197 | 'attempt to find a file named .clang-tidy for ' 198 | 'each source file in its parent directories.') 199 | parser.add_argument('-header-filter', default=None, 200 | help='regular expression matching the names of the ' 201 | 'headers to output diagnostics from. Diagnostics from ' 202 | 'the main file of each translation unit are always ' 203 | 'displayed.') 204 | if yaml: 205 | parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', 206 | help='Create a yaml file to store suggested fixes in, ' 207 | 'which can be applied with clang-apply-replacements.') 208 | parser.add_argument('-j', type=int, default=0, 209 | help='number of tidy instances to be run in parallel.') 210 | parser.add_argument('files', nargs='*', default=['.*'], 211 | help='files to be processed (regex on path)') 212 | parser.add_argument('-fix', action='store_true', help='apply fix-its') 213 | parser.add_argument('-format', action='store_true', help='Reformat code ' 214 | 'after applying fixes') 215 | parser.add_argument('-style', default='file', help='The style of reformat ' 216 | 'code after applying fixes') 217 | parser.add_argument('-p', dest='build_path', 218 | help='Path used to read a compile command database.') 219 | parser.add_argument('-extra-arg', dest='extra_arg', 220 | action='append', default=[], 221 | help='Additional argument to append to the compiler ' 222 | 'command line.') 223 | parser.add_argument('-extra-arg-before', dest='extra_arg_before', 224 | action='append', default=[], 225 | help='Additional argument to prepend to the compiler ' 226 | 'command line.') 227 | parser.add_argument('-quiet', action='store_true', 228 | help='Run clang-tidy in quiet mode') 229 | args = parser.parse_args() 230 | 231 | db_path = 'compile_commands.json' 232 | 233 | if args.build_path is not None: 234 | build_path = args.build_path 235 | else: 236 | # Find our database 237 | build_path = find_compilation_database(db_path) 238 | 239 | try: 240 | invocation = [args.clang_tidy_binary, '-list-checks'] 241 | invocation.append('-p=' + build_path) 242 | if args.checks: 243 | invocation.append('-checks=' + args.checks) 244 | invocation.append('-') 245 | if args.quiet: 246 | # Even with -quiet we still want to check if we can call clang-tidy. 247 | with open(os.devnull, 'w') as dev_null: 248 | subprocess.check_call(invocation, stdout=dev_null) 249 | else: 250 | subprocess.check_call(invocation) 251 | except: 252 | print("Unable to run clang-tidy.", file=sys.stderr) 253 | sys.exit(1) 254 | 255 | # Load the database and extract all files. 256 | database = json.load(open(os.path.join(build_path, db_path))) 257 | files = [make_absolute(entry['file'], entry['directory']) 258 | for entry in database] 259 | 260 | max_task = args.j 261 | if max_task == 0: 262 | max_task = multiprocessing.cpu_count() 263 | 264 | tmpdir = None 265 | if args.fix or (yaml and args.export_fixes): 266 | check_clang_apply_replacements_binary(args) 267 | tmpdir = tempfile.mkdtemp() 268 | 269 | # Build up a big regexy filter from all command line arguments. 270 | file_name_re = re.compile('|'.join(args.files)) 271 | print('|'.join(args.files)) 272 | 273 | return_code = 0 274 | try: 275 | # Spin up a bunch of tidy-launching threads. 276 | task_queue = queue.Queue(max_task) 277 | # List of files with a non-zero return code. 278 | failed_files = [] 279 | lock = threading.Lock() 280 | for _ in range(max_task): 281 | t = threading.Thread(target=run_tidy, 282 | args=(args, tmpdir, build_path, task_queue, lock, failed_files)) 283 | t.daemon = True 284 | t.start() 285 | 286 | # Fill the queue with files. 287 | for name in files: 288 | if file_name_re.search(name): 289 | task_queue.put(name) 290 | 291 | # Wait for all threads to be done. 292 | task_queue.join() 293 | if len(failed_files): 294 | return_code = 1 295 | 296 | except KeyboardInterrupt: 297 | # This is a sad hack. Unfortunately subprocess goes 298 | # bonkers with ctrl-c and we start forking merrily. 299 | print('\nCtrl-C detected, goodbye.') 300 | if tmpdir: 301 | shutil.rmtree(tmpdir) 302 | os.kill(0, 9) 303 | 304 | if yaml and args.export_fixes: 305 | print('Writing fixes to ' + args.export_fixes + ' ...') 306 | try: 307 | merge_replacement_files(tmpdir, args.export_fixes) 308 | except: 309 | print('Error exporting fixes.\n', file=sys.stderr) 310 | traceback.print_exc() 311 | return_code=1 312 | 313 | if args.fix: 314 | print('Applying fixes ...') 315 | try: 316 | apply_fixes(args, tmpdir) 317 | except: 318 | print('Error applying fixes.\n', file=sys.stderr) 319 | traceback.print_exc() 320 | return_code=1 321 | 322 | if tmpdir: 323 | shutil.rmtree(tmpdir) 324 | sys.exit(return_code) 325 | 326 | if __name__ == '__main__': 327 | main() 328 | -------------------------------------------------------------------------------- /doc/presentation/adhoc-2020-04-21/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsaintjacques/jitmap/62892cda0f9283331cf25e0f988ae05e70fc7835/doc/presentation/adhoc-2020-04-21/benchmark.png -------------------------------------------------------------------------------- /doc/presentation/adhoc-2020-04-21/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | jitmap: Execution engine for bitmaps 5 | 6 | 18 | 19 | 20 | 219 | 221 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /include/jitmap/bitset.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | namespace jitmap { 27 | 28 | using BitsetWordType = uint64_t; 29 | 30 | // Bitset is a container similar to `std::bitset` but wrapping a data pointer. 31 | // The ownership/lifetime of the data pointer is defined by `Ptr` type. 32 | template 33 | class Bitset { 34 | public: 35 | using word_type = BitsetWordType; 36 | using storage_type = typename std::pointer_traits::element_type; 37 | 38 | template 39 | using enable_if_writable = std::enable_if_t::value, Ret>; 40 | 41 | // Indicate if the pointer is read-only. 42 | static constexpr bool storage_is_const = std::is_const::value; 43 | static constexpr size_t kBitsPerWord = sizeof(word_type) * CHAR_BIT; 44 | 45 | // Construct a bitset from a pointer. 46 | Bitset(Ptr data) : data_(std::move(data)) { JITMAP_PRE_NE(data_, nullptr); } 47 | 48 | // Return the capacity (in bits) of the bitset. 49 | constexpr size_t size() const noexcept { return N; } 50 | 51 | template 52 | bool operator==(const Bitset& rhs) const { 53 | const auto& lhs_word = word(); 54 | const auto& rhs_word = rhs.word(); 55 | 56 | if (lhs_word == rhs_word) return true; 57 | 58 | for (size_t i = 0; i < size_words(); i++) { 59 | if (lhs_word[i] != rhs_word[i]) return false; 60 | } 61 | 62 | return true; 63 | } 64 | 65 | template 66 | bool operator!=(const Bitset& rhs) const { 67 | return !(*this == rhs); 68 | } 69 | 70 | // Accessors 71 | bool test(size_t i) const { 72 | if (i >= N) throw std::out_of_range("Can't access bit"); 73 | return operator[](i); 74 | } 75 | 76 | bool operator[](size_t i) const noexcept { 77 | return word()[i / sizeof(word_type)] & (1U << (i % sizeof(word_type))); 78 | } 79 | 80 | // Indicate if all bits are set. 81 | bool all() const noexcept { 82 | for (size_t i = 0; i < size_words(); i++) { 83 | if (word()[i] != std::numeric_limits::max()) return false; 84 | } 85 | 86 | return true; 87 | } 88 | 89 | // Indicate if at least one bit is set. 90 | bool any() const noexcept { 91 | for (size_t i = 0; i < size_words(); i++) { 92 | if (word()[i] != 0) return true; 93 | } 94 | 95 | return false; 96 | } 97 | 98 | // Indicate if no bit is set. 99 | bool none() const noexcept { 100 | for (size_t i = 0; i < size_words(); i++) { 101 | if (word()[i] != 0) return false; 102 | } 103 | 104 | return true; 105 | } 106 | 107 | // Count the number of set bits (ones). 108 | size_t count() const noexcept { 109 | size_t sum = 0; 110 | 111 | for (size_t i = 0; i < size_words(); i++) { 112 | sum += __builtin_popcountll(word()[i]); 113 | } 114 | 115 | return sum; 116 | } 117 | 118 | // Modifiers 119 | // 120 | // The in-place modifiers are enabled only if the storage pointer is not const. 121 | 122 | // Perform binary AND 123 | template 124 | enable_if_writable&> operator&=( 125 | const Bitset& other) noexcept { 126 | for (size_t i = 0; i < size_words(); i++) { 127 | word()[i] &= other.word()[i]; 128 | } 129 | return *this; 130 | } 131 | 132 | // Perform binary OR 133 | template 134 | enable_if_writable&> operator|=( 135 | const Bitset& other) noexcept { 136 | for (size_t i = 0; i < size_words(); i++) { 137 | word()[i] |= other.word()[i]; 138 | } 139 | return *this; 140 | } 141 | 142 | // Perform binary XOR 143 | template 144 | enable_if_writable&> operator^=( 145 | const Bitset& other) noexcept { 146 | for (size_t i = 0; i < size_words(); i++) { 147 | word()[i] ^= other.word()[i]; 148 | } 149 | return *this; 150 | } 151 | 152 | // Perform binary NOT 153 | template 154 | enable_if_writable&> operator~() noexcept { 155 | for (size_t i = 0; i < size_words(); i++) { 156 | word()[i] = ~word()[i]; 157 | } 158 | return *this; 159 | } 160 | 161 | // Set all bits. 162 | template 163 | enable_if_writable set() noexcept { 164 | memset(word(), 0xFF, size() / CHAR_BIT); 165 | } 166 | 167 | /* TODO 168 | // Set a single bit. 169 | template 170 | enable_if_writable set(size_t i, bool value = true) {} 171 | */ 172 | 173 | // Clear all bits. 174 | template 175 | enable_if_writable reset() noexcept { 176 | memset(word(), 0, size() / CHAR_BIT); 177 | } 178 | 179 | /* TODO 180 | // Clear a single bit. 181 | template 182 | enable_if_writable reset(size_t i) {} 183 | */ 184 | 185 | // Flip all bits (perform binary NOT). 186 | template 187 | enable_if_writable flip() noexcept { 188 | this->operator~(); 189 | } 190 | 191 | /* TODO 192 | // Flip a single bit. 193 | template 194 | enable_if_writable flip(size_t i) {} 195 | */ 196 | 197 | // Data pointers 198 | const char* data() const { return reinterpret_cast(&data_[0]); } 199 | 200 | template 201 | enable_if_writable data() { 202 | return reinterpret_cast(&data_[0]); 203 | } 204 | 205 | // Return the capacity (in words) of the bitset. 206 | constexpr size_t size_words() const noexcept { 207 | return N / (CHAR_BIT * sizeof(word_type)); 208 | } 209 | 210 | const word_type* word() const { return reinterpret_cast(&data_[0]); } 211 | 212 | template 213 | enable_if_writable word() { 214 | return reinterpret_cast(&data_[0]); 215 | } 216 | 217 | private: 218 | Ptr data_; 219 | 220 | static_assert(N % (kBitsPerWord) == 0, "Bitset size must be a multiple of word_type"); 221 | static_assert(N >= kBitsPerWord, "Bitset size must be greater than word_type"); 222 | 223 | // Friend itself of other template parameters, used for accessing `data_`. 224 | template 225 | friend class Bitset; 226 | }; 227 | 228 | // Create a bitset from a memory address. 229 | template 230 | Bitset make_bitset(Ptr ptr) { 231 | return {std::move(ptr)}; 232 | } 233 | 234 | template 235 | struct DeleteAligned { 236 | void operator()(T* data) const { free(data); } 237 | }; 238 | 239 | template 240 | auto allocate_aligned(size_t alignment, size_t length) { 241 | T* raw = reinterpret_cast(aligned_alloc(alignment, sizeof(T) * length)); 242 | return std::unique_ptr>{raw}; 243 | } 244 | 245 | template 246 | using OwnedBitset = Bitset>>; 247 | 248 | template 249 | OwnedBitset make_owned_bitset() { 250 | constexpr size_t kNumberWords = N / (sizeof(BitsetWordType) * CHAR_BIT); 251 | return make_bitset(allocate_aligned(kCacheLineSize, kNumberWords)); 252 | } 253 | 254 | template 255 | OwnedBitset operator~(const Bitset& bitset) { 256 | auto owned = make_owned_bitset(); 257 | 258 | auto owned_word = owned.word(); 259 | auto bitset_word = bitset.word(); 260 | 261 | size_t remaining = owned.size_words(); 262 | while (remaining-- != 0UL) { 263 | *owned_word++ = ~(*bitset_word++); 264 | } 265 | 266 | return owned; 267 | } 268 | 269 | template 270 | OwnedBitset operator&(const Bitset& lhs, 271 | const Bitset& rhs) { 272 | auto owned = make_owned_bitset(); 273 | 274 | auto owned_word = owned.word(); 275 | auto lhs_word = lhs.word(); 276 | auto rhs_word = rhs.word(); 277 | 278 | size_t remaining = owned.size_words(); 279 | while (remaining-- != 0UL) { 280 | *owned_word++ = *lhs_word++ & *rhs_word++; 281 | } 282 | 283 | return owned; 284 | } 285 | 286 | template 287 | OwnedBitset operator|(const Bitset& lhs, 288 | const Bitset& rhs) { 289 | auto owned = make_owned_bitset(); 290 | 291 | auto owned_word = owned.word(); 292 | auto lhs_word = lhs.word(); 293 | auto rhs_word = rhs.word(); 294 | 295 | size_t remaining = owned.size_words(); 296 | while (remaining-- != 0UL) { 297 | *owned_word++ = *lhs_word++ | *rhs_word++; 298 | } 299 | 300 | return owned; 301 | } 302 | 303 | template 304 | OwnedBitset operator^(const Bitset& lhs, 305 | const Bitset& rhs) { 306 | auto owned = make_owned_bitset(); 307 | 308 | auto owned_word = owned.word(); 309 | auto lhs_word = lhs.word(); 310 | auto rhs_word = rhs.word(); 311 | 312 | size_t remaining = owned.size_words(); 313 | while (remaining-- != 0UL) { 314 | *owned_word++ = *lhs_word++ ^ *rhs_word++; 315 | } 316 | 317 | return owned; 318 | } 319 | 320 | } // namespace jitmap 321 | -------------------------------------------------------------------------------- /include/jitmap/container/container.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace jitmap { 25 | 26 | enum ContainerType : uint8_t { 27 | BITMAP = 0, 28 | ARRAY = 1, 29 | RUN_LENGTH = 2, 30 | EMPTY = 3, 31 | FULL = 4, 32 | }; 33 | 34 | constexpr size_t kBitsInProxy = 64U; 35 | using ProxyBitmap = std::bitset; 36 | 37 | class Statistics { 38 | public: 39 | Statistics() : Statistics(0UL, 0L) {} 40 | Statistics(ProxyBitmap proxy, int32_t count) 41 | : proxy_(std::move(proxy)), count_(count) {} 42 | 43 | static Statistics Empty() { 44 | return Statistics(0xFFFFFFFFFFFFFFFFULL, kBitsPerContainer); 45 | } 46 | static Statistics Full() { return Statistics(); }; 47 | 48 | bool all() const noexcept { return count_ == kBitsPerContainer; } 49 | bool full() const noexcept { return all(); } 50 | 51 | bool any() const noexcept { return proxy_.any(); } 52 | 53 | bool none() const noexcept { return proxy_.none(); } 54 | bool empty() const noexcept { return none(); } 55 | 56 | private: 57 | ProxyBitmap proxy_; 58 | int32_t count_ = 0UL; 59 | }; 60 | 61 | class Container { 62 | public: 63 | using index_type = uint16_t; 64 | 65 | Container() : statistics_(std::nullopt) {} 66 | Container(Statistics statistics) : statistics_(std::move(statistics)) {} 67 | 68 | size_t count() const noexcept { return statistics().all(); } 69 | 70 | bool all() const noexcept { return count() == kBitsPerContainer; } 71 | bool full() const noexcept { return all(); } 72 | 73 | bool any() const noexcept { return statistics().any(); } 74 | 75 | bool none() const noexcept { return statistics().none(); } 76 | bool empty() const noexcept { return none(); } 77 | 78 | bool has_statistics() const noexcept { return statistics_.has_value(); } 79 | const Statistics& statistics() const noexcept { 80 | if (!has_statistics()) statistics_ = ComputeStatistics(); 81 | return statistics_.value(); 82 | }; 83 | 84 | virtual bool operator[](index_type index) const noexcept = 0; 85 | 86 | private: 87 | virtual Statistics ComputeStatistics() const noexcept = 0; 88 | 89 | // Field is mutable because it is lazily computed. 90 | mutable std::optional statistics_; 91 | }; 92 | 93 | class EmptyContainer final : public Container { 94 | public: 95 | bool operator[](index_type index) const noexcept final { return false; } 96 | 97 | private: 98 | Statistics ComputeStatistics() const noexcept final { return Statistics::Empty(); } 99 | }; 100 | 101 | class FullContainer final : public Container { 102 | public: 103 | bool operator[](index_type index) const noexcept final { return true; } 104 | 105 | private: 106 | Statistics ComputeStatistics() const noexcept final { return Statistics::Full(); } 107 | }; 108 | 109 | template 110 | class BaseContainer : public Container { 111 | public: 112 | using SelfType = EffectiveType; 113 | static constexpr ContainerType type = Type; 114 | 115 | constexpr ContainerType container_type() const { return type; } 116 | }; 117 | 118 | using DenseBitset = std::bitset; 119 | 120 | class DenseContainer final : public BaseContainer { 121 | bool operator[](index_type index) const noexcept { return bitmap_[index]; } 122 | 123 | private: 124 | Statistics ComputeStatistics() const noexcept final { 125 | return {0, static_cast(bitmap_.count())}; 126 | } 127 | 128 | DenseBitset bitmap_; 129 | }; 130 | 131 | }; // namespace jitmap 132 | -------------------------------------------------------------------------------- /include/jitmap/jitmap.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | namespace jitmap { 29 | 30 | enum ContainerType : uint8_t { 31 | BITMAP = 0, 32 | ARRAY = 1, 33 | RUN_LENGTH = 2, 34 | STATIC = 3, 35 | }; 36 | 37 | constexpr size_t kBitsInProxy = 64U; 38 | using ProxyBitmap = std::bitset; 39 | 40 | class Statistics { 41 | public: 42 | Statistics() : Statistics(0UL, 0L) {} 43 | Statistics(ProxyBitmap proxy, int32_t count) 44 | : proxy_(std::move(proxy)), count_(count) {} 45 | 46 | static Statistics Empty() { 47 | return Statistics(0xFFFFFFFFFFFFFFFFULL, kBitsPerContainer); 48 | } 49 | static Statistics Full() { return Statistics(); }; 50 | 51 | bool all() const noexcept { return count_ == kBitsPerContainer; } 52 | bool full() const noexcept { return all(); } 53 | 54 | bool any() const noexcept { return proxy_.any(); } 55 | 56 | bool none() const noexcept { return proxy_.none(); } 57 | bool empty() const noexcept { return none(); } 58 | 59 | private: 60 | ProxyBitmap proxy_; 61 | int32_t count_ = 0UL; 62 | }; 63 | 64 | class Container { 65 | public: 66 | using index_type = uint16_t; 67 | 68 | Container() : statistics_(std::nullopt) {} 69 | Container(Statistics statistics) : statistics_(std::move(statistics)) {} 70 | 71 | size_t count() const noexcept { return statistics().all(); } 72 | 73 | bool all() const noexcept { return count() == kBitsPerContainer; } 74 | bool full() const noexcept { return all(); } 75 | 76 | bool any() const noexcept { return statistics().any(); } 77 | 78 | bool none() const noexcept { return statistics().none(); } 79 | bool empty() const noexcept { return none(); } 80 | 81 | bool has_statistics() const noexcept { return statistics_.has_value(); } 82 | const Statistics& statistics() const noexcept { 83 | if (!has_statistics()) statistics_ = ComputeStatistics(); 84 | return statistics_.value(); 85 | }; 86 | 87 | virtual bool operator[](index_type index) const noexcept = 0; 88 | 89 | private: 90 | virtual Statistics ComputeStatistics() const noexcept = 0; 91 | 92 | // Field is mutable because it is lazily computed. 93 | mutable std::optional statistics_; 94 | }; 95 | 96 | class FullContainer final : public Container { 97 | public: 98 | bool operator[](index_type index) const noexcept final { return true; } 99 | 100 | private: 101 | Statistics ComputeStatistics() const noexcept final { return Statistics::Full(); } 102 | }; 103 | 104 | class EmptyContainer final : public Container { 105 | public: 106 | bool operator[](index_type index) const noexcept final { return false; } 107 | 108 | private: 109 | Statistics ComputeStatistics() const noexcept final { return Statistics::Empty(); } 110 | }; 111 | 112 | template 113 | class BaseContainer : public Container { 114 | public: 115 | using SelfType = EffectiveType; 116 | static constexpr ContainerType type = Type; 117 | 118 | constexpr ContainerType container_type() const { return type; } 119 | }; 120 | 121 | using DenseBitset = std::bitset; 122 | 123 | class DenseContainer final : public BaseContainer { 124 | bool operator[](index_type index) const noexcept { return bitmap_[index]; } 125 | 126 | private: 127 | Statistics ComputeStatistics() const noexcept final { 128 | return {0, static_cast(bitmap_.count())}; 129 | } 130 | 131 | DenseBitset bitmap_; 132 | }; 133 | 134 | class Bitmap { 135 | public: 136 | using index_type = uint64_t; 137 | using key_index_type = uint32_t; 138 | 139 | std::pair key(index_type index) const { 140 | return {index >> kLogBitsPerContainer, index & 0xFF}; 141 | } 142 | 143 | bool operator[](index_type index) const { 144 | auto [k, offset] = key(index); 145 | 146 | if (auto result = containers_.find(k); result != containers_.end()) 147 | return result->second->operator[](offset); 148 | 149 | return false; 150 | } 151 | 152 | private: 153 | std::unordered_map> containers_; 154 | }; 155 | 156 | }; // namespace jitmap 157 | -------------------------------------------------------------------------------- /include/jitmap/query/compiler.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace llvm { 26 | class LLVMContext; 27 | class Module; 28 | } // namespace llvm 29 | 30 | namespace jitmap { 31 | namespace query { 32 | 33 | class CompilerException : public Exception { 34 | public: 35 | using Exception::Exception; 36 | }; 37 | 38 | // Signature of generated functions 39 | typedef void (*DenseEvalFn)(const char**, char*); 40 | typedef int32_t (*DenseEvalPopCountFn)(const char**, char*); 41 | 42 | struct CompilerOptions { 43 | // Controls LLVM optimization level (-O0, -O1, -O2, -O3). Anything above 3 44 | // will be clamped to 3. 45 | uint8_t optimization_level = 3; 46 | 47 | // CPU architecture to optimize for. This will dictate the "best" vector 48 | // instruction set to compile with. If unspecified or empty, llvm will 49 | // auto-detect the host cpu architecture. 50 | // 51 | // Invoke clang with `-mcpu=?` options to get a list of supported strings, e.g. 52 | // - core-avx-i 53 | // - core-avx2 54 | // - skylake-avx512 55 | std::string cpu = ""; 56 | }; 57 | 58 | class Expr; 59 | 60 | class JitEngineImpl; 61 | // The JitEngine class transforms IR queries into executable functions. 62 | class JitEngine : util::Pimpl { 63 | public: 64 | // Create a JitEngine. 65 | // 66 | // \param[in] options, see `CompilerOptions` documentation. 67 | // 68 | // \return a JitEngine 69 | static std::shared_ptr Make(CompilerOptions options = {}); 70 | 71 | // Compile a query expression into the module. 72 | // 73 | // Takes an expression, lowers it to LLVM's IR. Pass the IR module to the 74 | // internal jit engine which compiles it into assembly. Inject an executable 75 | // function symbol in the current process. See `Lookup` in order to retrieve 76 | // a function pointer to this symbol. 77 | // 78 | // \param[in] name, the query name, will be used in the generated symbol name. 79 | // The name must be unique with regards to previously 80 | // compiled queries. 81 | // \param[in] expr, the query expression. 82 | // 83 | // \throws CompilerException if any errors is encountered. 84 | void Compile(const std::string& name, const Expr& expression); 85 | 86 | // Lower a query expression to LLVM's IR representation. 87 | // 88 | // This method is used for debugging. An executable symbol is _not_ generated 89 | // with this function, see `Compile`. 90 | // 91 | // \param[in] name, the query name, will be used in the generated symbol name. 92 | // \param[in] expr, the query expression. 93 | // 94 | // \return the IR of the compiled function. 95 | // 96 | // \throws CompilerException if any errors is encountered. 97 | std::string CompileIR(const std::string& name, const Expr& expression); 98 | 99 | // Lookup a query 100 | DenseEvalFn LookupUserQuery(const std::string& query_name); 101 | DenseEvalPopCountFn LookupUserPopCountQuery(const std::string& query_name); 102 | 103 | // Return the LLVM name for the host CPU. 104 | // 105 | // This is the string given to `-march/-mtune/-mcpu`. See 106 | // http://llvm.org/doxygen/Host_8h_source.html for more information. 107 | std::string GetTargetCPU() const; 108 | 109 | // Return the LLVM target triple for the host. 110 | // 111 | // The format is ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT. See 112 | // http://llvm.org/doxygen/Triple_8h_source.html for more information. 113 | std::string GetTargetTriple() const; 114 | 115 | private: 116 | explicit JitEngine(CompilerOptions options); 117 | }; 118 | 119 | } // namespace query 120 | } // namespace jitmap 121 | -------------------------------------------------------------------------------- /include/jitmap/query/expr.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace jitmap { 25 | namespace query { 26 | 27 | class ExprBuilder; 28 | 29 | class Expr { 30 | public: 31 | enum Type { 32 | // Literals 33 | EMPTY_LITERAL = 0, 34 | FULL_LITERAL, 35 | VARIABLE, 36 | 37 | // Operators 38 | NOT_OPERATOR, 39 | AND_OPERATOR, 40 | OR_OPERATOR, 41 | XOR_OPERATOR, 42 | }; 43 | 44 | Type type() const { return type_; } 45 | 46 | bool IsLiteral() const; 47 | bool IsVariable() const; 48 | bool IsOperator() const; 49 | bool IsUnaryOperator() const; 50 | bool IsBinaryOperator() const; 51 | 52 | template 53 | auto Visit(Visitor&& v) const; 54 | template 55 | auto Visit(Visitor&& v); 56 | 57 | // Convenience and debug operators 58 | bool operator==(const Expr& rhs) const; 59 | bool operator==(Expr* rhs) const { return *this == *rhs; } 60 | bool operator!=(const Expr& rhs) const { return !(*this == rhs); } 61 | std::string ToString() const; 62 | 63 | // Return all Reference expressions. 64 | std::vector Variables() const; 65 | 66 | // Copy the expression 67 | Expr* Copy(ExprBuilder* builder) const; 68 | 69 | virtual ~Expr() {} 70 | 71 | protected: 72 | explicit Expr(Type type) : type_(type) {} 73 | 74 | private: 75 | Type type_; 76 | 77 | // Use Copy() 78 | Expr(const Expr&) = delete; 79 | }; 80 | 81 | // The abstract base class exists such that the Expr class is template-free. 82 | template 83 | class BaseExpr : public Expr { 84 | protected: 85 | BaseExpr() : Expr(T) {} 86 | }; 87 | 88 | class LiteralExpr {}; 89 | class OpExpr {}; 90 | 91 | class UnaryOpExpr : public OpExpr { 92 | public: 93 | explicit UnaryOpExpr(Expr* expr) : operand_(expr) {} 94 | 95 | Expr* operand() const { return operand_; } 96 | void set_operand(Expr* expr) { operand_ = expr; } 97 | 98 | private: 99 | Expr* operand_; 100 | }; 101 | 102 | class BinaryOpExpr : public OpExpr { 103 | public: 104 | BinaryOpExpr(Expr* lhs, Expr* rhs) : left_operand_(lhs), right_operand_(rhs) {} 105 | 106 | Expr* left_operand() const { return left_operand_; } 107 | void set_left_operand(Expr* left) { left_operand_ = left; } 108 | 109 | Expr* right_operand() const { return right_operand_; } 110 | void set_right_operand(Expr* right) { right_operand_ = right; } 111 | 112 | private: 113 | Expr* left_operand_; 114 | Expr* right_operand_; 115 | }; 116 | 117 | // Literal Expressions 118 | 119 | // Represents an empty bitmap (all bits cleared) 120 | class EmptyBitmapExpr final : public BaseExpr, public LiteralExpr {}; 121 | 122 | // Represents a full bitmap (all bits set) 123 | class FullBitmapExpr final : public BaseExpr, public LiteralExpr {}; 124 | 125 | // References a Bitmap by name 126 | class VariableExpr final : public BaseExpr { 127 | public: 128 | explicit VariableExpr(std::string name) : name_(std::move(name)) {} 129 | 130 | const std::string& value() const { return name_; } 131 | 132 | private: 133 | std::string name_; 134 | }; 135 | 136 | // Operators 137 | 138 | class NotOpExpr final : public BaseExpr, public UnaryOpExpr { 139 | public: 140 | using UnaryOpExpr::UnaryOpExpr; 141 | }; 142 | 143 | class AndOpExpr final : public BaseExpr, public BinaryOpExpr { 144 | public: 145 | using BinaryOpExpr::BinaryOpExpr; 146 | }; 147 | 148 | class OrOpExpr final : public BaseExpr, public BinaryOpExpr { 149 | public: 150 | using BinaryOpExpr::BinaryOpExpr; 151 | }; 152 | 153 | class XorOpExpr final : public BaseExpr, public BinaryOpExpr { 154 | public: 155 | using BinaryOpExpr::BinaryOpExpr; 156 | }; 157 | 158 | class ExprBuilder { 159 | public: 160 | Expr* EmptyBitmap() { 161 | static EmptyBitmapExpr empty; 162 | return ∅ 163 | } 164 | 165 | Expr* FullBitmap() { 166 | static FullBitmapExpr full; 167 | return &full; 168 | } 169 | 170 | Expr* Var(const std::string& name) { return Build(name); } 171 | Expr* Var(std::string_view name) { return Build(std::string(name)); } 172 | 173 | Expr* Not(Expr* expr) { return Build(expr); } 174 | 175 | Expr* And(Expr* lhs, Expr* rhs) { return Build(lhs, rhs); } 176 | 177 | Expr* Or(Expr* lhs, Expr* rhs) { return Build(lhs, rhs); } 178 | 179 | Expr* Xor(Expr* lhs, Expr* rhs) { return Build(lhs, rhs); } 180 | 181 | private: 182 | template 183 | Expr* Build(Args&&... args) { 184 | return expressions_.emplace_back(std::make_unique(std::forward(args)...)) 185 | .get(); 186 | } 187 | 188 | std::vector> expressions_; 189 | }; 190 | 191 | // TODO: Refactor to support various input types (instead of `const Expr&`). 192 | template 193 | auto Expr::Visit(Visitor&& v) const { 194 | switch (type()) { 195 | case EMPTY_LITERAL: 196 | return v(dynamic_cast(this)); 197 | case FULL_LITERAL: 198 | return v(dynamic_cast(this)); 199 | case VARIABLE: 200 | return v(dynamic_cast(this)); 201 | case NOT_OPERATOR: 202 | return v(dynamic_cast(this)); 203 | case AND_OPERATOR: 204 | return v(dynamic_cast(this)); 205 | case OR_OPERATOR: 206 | return v(dynamic_cast(this)); 207 | case XOR_OPERATOR: 208 | return v(dynamic_cast(this)); 209 | } 210 | 211 | throw Exception("Unknown type: ", type()); 212 | } 213 | 214 | template 215 | auto Expr::Visit(Visitor&& v) { 216 | switch (type()) { 217 | case EMPTY_LITERAL: 218 | return v(dynamic_cast(this)); 219 | case FULL_LITERAL: 220 | return v(dynamic_cast(this)); 221 | case VARIABLE: 222 | return v(dynamic_cast(this)); 223 | case NOT_OPERATOR: 224 | return v(dynamic_cast(this)); 225 | case AND_OPERATOR: 226 | return v(dynamic_cast(this)); 227 | case OR_OPERATOR: 228 | return v(dynamic_cast(this)); 229 | case XOR_OPERATOR: 230 | return v(dynamic_cast(this)); 231 | } 232 | 233 | throw Exception("Unknown type: ", type()); 234 | } 235 | 236 | std::ostream& operator<<(std::ostream& os, const Expr& e); 237 | std::ostream& operator<<(std::ostream& os, Expr* e); 238 | 239 | } // namespace query 240 | } // namespace jitmap 241 | -------------------------------------------------------------------------------- /include/jitmap/query/matcher.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | namespace jitmap { 24 | namespace query { 25 | 26 | // The Matcher interface is used to recognize patterns in expressions. 27 | class Matcher { 28 | public: 29 | virtual bool Match(const Expr& expr) const = 0; 30 | bool Match(Expr* expr) const { 31 | return (expr != nullptr) && Match(*expr); 32 | } 33 | 34 | bool operator()(const Expr& expr) const { return Match(expr); } 35 | bool operator()(Expr* expr) const { return Match(expr); } 36 | 37 | virtual ~Matcher() = default; 38 | }; 39 | 40 | // TypeMatcher matches Expression of given types. 41 | class TypeMatcher final : public Matcher { 42 | public: 43 | using Type = Expr::Type; 44 | 45 | // Construct a TypeMatcher matching a single type. 46 | explicit TypeMatcher(Type type); 47 | // Construct a TypeMatcher matching a set of types. 48 | explicit TypeMatcher(const std::vector& types); 49 | TypeMatcher(const std::initializer_list& types); 50 | 51 | bool Match(const Expr& expr) const override; 52 | 53 | private: 54 | std::bitset<8> mask_; 55 | }; 56 | 57 | // OperandMatcher applies a matcher to an operator's operand(s). 58 | class OperandMatcher final : public Matcher { 59 | public: 60 | // The Mode is relevant to operators with more than one operand. It 61 | // indicates if the match should apply to one (ANY) or all (ALL) 62 | // operands. 63 | enum Mode { 64 | // Any (at least one), operand must match 65 | ANY, 66 | // All operand must match 67 | ALL, 68 | }; 69 | 70 | explicit OperandMatcher(Matcher* operand_matcher, Mode mode = ANY); 71 | 72 | bool Match(const Expr& expr) const override; 73 | 74 | private: 75 | Matcher* matcher_; 76 | Mode mode_; 77 | }; 78 | 79 | // ChainMatcher applies many matchers in an ordered fashion. The 80 | // short-circuiting is dictated by the mode. 81 | class ChainMatcher final : public Matcher { 82 | public: 83 | // Indicate if all or at-least one of the matchers must match. 84 | enum Mode { 85 | // Any (at least one), operand must match 86 | ANY, 87 | // All operand must match 88 | ALL, 89 | }; 90 | 91 | explicit ChainMatcher(const std::vector& matchers, Mode mode = ALL); 92 | ChainMatcher(const std::initializer_list& matchers, Mode mode = ALL); 93 | 94 | bool Match(const Expr& expr) const override; 95 | 96 | private: 97 | std::vector matchers_; 98 | Mode mode_; 99 | }; 100 | 101 | } // namespace query 102 | } // namespace jitmap 103 | -------------------------------------------------------------------------------- /include/jitmap/query/optimizer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include 20 | 21 | namespace jitmap { 22 | namespace query { 23 | 24 | class Expr; 25 | class ExprBuilder; 26 | 27 | class OptimizationPass { 28 | public: 29 | // Indicate that no optimization was done, see `Rewrite`. 30 | static constexpr Expr* kNoOptimizationPerformed = nullptr; 31 | 32 | OptimizationPass(Matcher* matcher, ExprBuilder* builder) 33 | : matcher_(matcher), builder_(builder) {} 34 | 35 | bool Match(const Expr& expr) const { return matcher_->Match(expr); } 36 | 37 | // Rewrite an expression into an (hopefully) optimized expression. 38 | // 39 | // \param[in] expr Expression to simplify. 40 | // 41 | // \return The optimized expression or the `kNoOptimizationPerformed` token 42 | // in case of failure. 43 | virtual Expr* Rewrite(const Expr& expr) = 0; 44 | 45 | // Evaluate the pass on an expression. 46 | // 47 | // This is syntactic sugar for `(opt->Match(e)) ? opt->Rewrite(e) : e` 48 | // 49 | // \param[in] expr Expression to simplify 50 | // 51 | // \return The optimized expression if possible or the original 52 | // expression in the case where the expression doesn't pass the 53 | // matcher or the rewrite step failed. 54 | Expr* operator()(Expr* expr) { 55 | if (!Match(*expr)) return expr; 56 | auto simplified = Rewrite(*expr); 57 | return (simplified != kNoOptimizationPerformed) ? simplified : expr; 58 | } 59 | 60 | virtual ~OptimizationPass() = default; 61 | 62 | protected: 63 | Matcher* matcher_; 64 | ExprBuilder* builder_; 65 | }; 66 | 67 | // Not(0) -> 1 68 | // Not(1) -> 0 69 | // And(0, e) -> 0 70 | // And(1, e) -> e 71 | // Or(0, e) -> e 72 | // Or(1, e) -> 1 73 | // Xor(0, e) -> e 74 | // Xor(1, e) -> Not(e) 75 | class ConstantFolding final : public OptimizationPass { 76 | public: 77 | explicit ConstantFolding(ExprBuilder* builder); 78 | 79 | Expr* Rewrite(const Expr& expr) override; 80 | }; 81 | 82 | // And(e, e) -> e 83 | // Or(e, e) -> e 84 | // Xor(e, e) -> 0 85 | class SameOperandFolding final : public OptimizationPass { 86 | public: 87 | explicit SameOperandFolding(ExprBuilder* builder); 88 | 89 | Expr* Rewrite(const Expr& expr) override; 90 | }; 91 | 92 | // Not(Not(Not...(e)...))) -> e or Not(e) depending on occurence 93 | class NotChainFolding final : public OptimizationPass { 94 | public: 95 | explicit NotChainFolding(ExprBuilder* builder); 96 | 97 | Expr* Rewrite(const Expr& expr) override; 98 | }; 99 | 100 | struct OptimizerOptions { 101 | enum EnabledOptimizations : uint64_t { 102 | CONSTANT_FOLDING = 1U << 1, 103 | SAME_OPERAND_FOLDING = 1U << 2, 104 | NOT_CHAIN_FOLDING = 1U << 3, 105 | }; 106 | 107 | bool HasOptimization(enum EnabledOptimizations optimization) { 108 | return (enabled_optimizations & optimization) != 0; 109 | } 110 | 111 | static constexpr uint64_t kDefaultOptimizations = 112 | CONSTANT_FOLDING | SAME_OPERAND_FOLDING | NOT_CHAIN_FOLDING; 113 | 114 | uint64_t enabled_optimizations = kDefaultOptimizations; 115 | }; 116 | 117 | class Optimizer { 118 | public: 119 | explicit Optimizer(ExprBuilder* builder, OptimizerOptions options = {}); 120 | 121 | const OptimizerOptions& options() const { return options_; } 122 | 123 | Expr* Optimize(const Expr& input); 124 | 125 | private: 126 | ExprBuilder* builder_; 127 | OptimizerOptions options_; 128 | 129 | std::optional constant_folding_; 130 | std::optional same_operand_folding_; 131 | std::optional not_chain_folding_; 132 | }; 133 | 134 | } // namespace query 135 | } // namespace jitmap 136 | -------------------------------------------------------------------------------- /include/jitmap/query/parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace jitmap { 9 | namespace query { 10 | 11 | class ParserException : public Exception { 12 | public: 13 | using Exception::Exception; 14 | }; 15 | 16 | class Expr; 17 | class ExprBuilder; 18 | 19 | // \brief Parse the query as an expression. 20 | // 21 | // \param[in] query, the query string to parse. 22 | // \param[in] builder, the expression builder used to create new expressions. 23 | // 24 | // \return The parsed expression, owned by the builder. 25 | // 26 | // \throws ParserException with a reason why the parsing failed. 27 | Expr* Parse(std::string_view query, ExprBuilder* builder); 28 | 29 | } // namespace query 30 | } // namespace jitmap 31 | -------------------------------------------------------------------------------- /include/jitmap/query/query.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | namespace jitmap { 24 | namespace query { 25 | 26 | class Expr; 27 | class ExecutionContext; 28 | class EvaluationContext; 29 | 30 | class QueryImpl; 31 | 32 | constexpr int64_t kUnknownPopCount = -1; 33 | 34 | class Query : util::Pimpl { 35 | public: 36 | // Create a new query object based on an expression. 37 | // 38 | // \param[in] name, the name of the query, see further note for restriction. 39 | // \param[in] expr, the expression of the query. 40 | // \param[in] context, the context where queries are compiled. 41 | // 42 | // \return a new query object 43 | // 44 | // \throws ParserException if the expression is not valid, CompilerException 45 | // if any failure was encountered while compiling the expression or if the 46 | // query name is not valid. 47 | // 48 | // A query expression is compiled into a native function. The function is 49 | // loaded in the process executable memory under a unique symbol name. This 50 | // symbol is constructed partly from the query name. Thus, the query name must 51 | // start with an alpha-numeric character and the remaining characters must be 52 | // alpha-numeric or an underscore. 53 | static std::shared_ptr Make(const std::string& name, const std::string& query, 54 | ExecutionContext* context); 55 | 56 | // Evaluate the expression on dense bitmaps. 57 | // 58 | // \param[in] ctx, evaluation context, see `EvaluationContext`. 59 | // \param[in] ins, pointers to input bitmaps, see further note on ordering. 60 | // \param[out] out, pointer where the resulting bitmap will be written to, 61 | // must not be nullptr. 62 | // \return kUnknownPopCount if popcount is not computed, else the popcount of 63 | // the resulting bitmap. 64 | // 65 | // \throws Exception if any of the inputs/output pointers are nullptr. 66 | // 67 | // All the bitmaps must have allocated of the proper size, i.e. 68 | // `kBytesPerContainer`. The 69 | // 70 | // The query expression references bitmaps by name, e.g. the expression 71 | // `a & (b ^ !c) & (!a ^ c)` depends on the `a`, `b`, and `c` bitmaps. Since 72 | // the expression is a tree structure, there is multiple ways to order the 73 | // bitmaps. For this reason, the `Query::variables` method indicates the 74 | // order in which bitmaps are expected by the `Query::Eval` method. The 75 | // following pseudo-code shows how to do this. 76 | // 77 | // ``` 78 | // Bitmap a, b, c, output; 79 | // auto order = query->variables(); 80 | // auto ordered_bitmaps = ReorderInputs({"a": a, "b": b, "c": c}, order); 81 | // query->Eval(ordered_bitmaps, output); 82 | // ``` 83 | int32_t Eval(const EvaluationContext& ctx, std::vector ins, char* out); 84 | int32_t Eval(std::vector ins, char* out); 85 | 86 | int32_t EvalUnsafe(const EvaluationContext& ctx, std::vector& ins, 87 | char* out); 88 | 89 | // Return the referenced variables and the expected order.o 90 | // 91 | // The ordering is fix once a Query object is constructed, i.e. the ordering 92 | // is fixed for a given instance of a Query. If the query is deleted and then 93 | // reconstructed with the same expression, there is not guarantee. 94 | const std::vector& variables() const; 95 | 96 | // Return the name of the query. 97 | const std::string& name() const; 98 | 99 | // Return the expression of the query. 100 | const Expr& expr() const; 101 | 102 | private: 103 | // Private constructor, see Query::Make. 104 | Query(std::string name, std::string query, ExecutionContext* context); 105 | }; 106 | 107 | class JitEngine; 108 | 109 | class ExecutionContext { 110 | public: 111 | explicit ExecutionContext(std::shared_ptr jit) : jit_(std::move(jit)) {} 112 | 113 | JitEngine* jit() { return jit_.get(); } 114 | 115 | private: 116 | std::shared_ptr jit_; 117 | }; 118 | 119 | class EvaluationContext { 120 | public: 121 | // The MissingPolicy indicates how `Eval` behave when one or more of the 122 | // input pointers are nullptr. This is syntactic sugar to allow passing a 123 | // sparse array of bitmaps, e.g. in the case of roaring bitmap when some of 124 | // the partitions don't have containers. 125 | enum MissingPolicy { 126 | // Abort the computation and throw an exception. 127 | ERROR = 0, 128 | // Replace all missing bitmaps pointer with a pointer pointing to an empty 129 | // bitmap. 130 | REPLACE_WITH_EMPTY, 131 | // Replace all missing bitmaps pointer with a pointer pointing to a full 132 | // bitmap. 133 | REPLACE_WITH_FULL, 134 | }; 135 | 136 | MissingPolicy missing_policy() const { return missing_policy_; } 137 | void set_missing_policy(MissingPolicy policy) { missing_policy_ = policy; } 138 | 139 | bool popcount() const { return popcount_; } 140 | void set_popcount(bool popcount) { popcount_ = popcount; } 141 | 142 | private: 143 | MissingPolicy missing_policy_ = ERROR; 144 | bool popcount_ = false; 145 | }; 146 | 147 | } // namespace query 148 | } // namespace jitmap 149 | -------------------------------------------------------------------------------- /include/jitmap/query/type_fwd.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | namespace jitmap { 18 | namespace query { 19 | 20 | class Expr; 21 | class ExprBuilder; 22 | 23 | class LiteralExpr; 24 | class EmptyBitmapExpr; 25 | class FullBitmapExpr; 26 | 27 | class VariableExpr; 28 | 29 | class OpExpr; 30 | 31 | class UnaryOpExpr; 32 | class NotOpExpr; 33 | 34 | class BinaryOpExpr; 35 | class AndOpExpr; 36 | class OrOpExpr; 37 | class XorOpExpr; 38 | 39 | } // namespace query 40 | } // namespace jitmap 41 | -------------------------------------------------------------------------------- /include/jitmap/query/type_traits.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include 20 | 21 | namespace jitmap { 22 | namespace query { 23 | 24 | template 25 | using is_literal = std::is_base_of; 26 | 27 | template 28 | using enable_if_literal = std::enable_if_t::value, R>; 29 | 30 | template 31 | using is_variable = std::is_base_of; 32 | 33 | template 34 | using enable_if_variable = std::enable_if_t::value, R>; 35 | 36 | template 37 | using is_op = std::is_base_of; 38 | 39 | template 40 | using enable_if_op = std::enable_if_t::value, R>; 41 | 42 | template 43 | using is_unary_op = std::is_base_of; 44 | 45 | template 46 | using enable_if_unary_op = std::enable_if_t::value, R>; 47 | 48 | template 49 | using is_not_op = std::is_same; 50 | 51 | template 52 | using enable_if_not_op = std::enable_if_t::value, R>; 53 | 54 | template 55 | using is_binary_op = std::is_base_of; 56 | 57 | template 58 | using enable_if_binary_op = std::enable_if_t::value, R>; 59 | 60 | template 61 | using is_and_op = std::is_same; 62 | 63 | template 64 | using enable_if_and_op = std::enable_if_t::value, R>; 65 | 66 | template 67 | using is_or_op = std::is_same; 68 | 69 | template 70 | using enable_if_or_op = std::enable_if_t::value, R>; 71 | 72 | template 73 | using is_xor_op = std::is_same; 74 | 75 | template 76 | using enable_if_xor_op = std::enable_if_t::value, R>; 77 | 78 | } // namespace query 79 | } // namespace jitmap 80 | -------------------------------------------------------------------------------- /include/jitmap/size.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace jitmap { 22 | 23 | // The size of a cacheline. 24 | constexpr size_t kCacheLineSize = 64ULL; 25 | // The log of the number of bits per container. 26 | constexpr size_t kLogBitsPerContainer = 16ULL; 27 | // The number of bits per container. 28 | constexpr size_t kBitsPerContainer = 1ULL << kLogBitsPerContainer; 29 | // The number of bytes per container. 30 | constexpr size_t kBytesPerContainer = kBitsPerContainer / CHAR_BIT; 31 | 32 | } // namespace jitmap 33 | -------------------------------------------------------------------------------- /include/jitmap/tiny.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | namespace jitmap { 21 | 22 | constexpr size_t kBitsInTiny = 64U; 23 | using TinyBitmap = std::bitset; 24 | 25 | } // namespace jitmap 26 | -------------------------------------------------------------------------------- /include/jitmap/util/aligned.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include 20 | 21 | namespace jitmap { 22 | 23 | template 24 | struct alignas(Alignment) aligned_array : public std::array { 25 | // Change the default constructor to zero initialize the data storage. 26 | aligned_array() { 27 | T zero{}; 28 | std::fill(this->begin(), this->end(), zero); 29 | } 30 | 31 | explicit aligned_array(T val) { 32 | std::fill(this->begin(), this->end(), val); 33 | } 34 | }; 35 | 36 | } // namespace jitmap 37 | -------------------------------------------------------------------------------- /include/jitmap/util/compiler.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #pragma once 15 | 16 | #define JITMAP_UNUSED(e) (void)(e); 17 | -------------------------------------------------------------------------------- /include/jitmap/util/exception.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include 20 | 21 | namespace jitmap { 22 | 23 | class Exception { 24 | public: 25 | explicit Exception(std::string message) : message_(std::move(message)) {} 26 | 27 | template 28 | Exception(Args&&... args) : Exception(util::StaticFmt(std::forward(args)...)) {} 29 | 30 | const std::string& message() const { return message_; } 31 | 32 | protected: 33 | std::string message_; 34 | }; 35 | 36 | #define JITMAP_PRE_IMPL_(expr, ...) \ 37 | do { \ 38 | if (!(expr)) { \ 39 | throw Exception(__VA_ARGS__); \ 40 | } \ 41 | } while (false) 42 | 43 | #define JITMAP_PRE(expr) JITMAP_PRE_IMPL_(expr, "Precondition ", #expr, " not satisfied") 44 | #define JITMAP_PRE_EQ(left, right) JITMAP_PRE(left == right) 45 | #define JITMAP_PRE_NE(left, right) JITMAP_PRE(left != right) 46 | 47 | } // namespace jitmap 48 | -------------------------------------------------------------------------------- /include/jitmap/util/fmt.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace jitmap { 20 | namespace util { 21 | 22 | using stream_type = std::stringstream; 23 | 24 | template 25 | void StaticFmt(stream_type& stream, H&& head) { 26 | stream << head; 27 | } 28 | 29 | template 30 | void StaticFmt(stream_type& stream, H&& head, T&&... tail) { 31 | StaticFmt(stream, std::forward(head)); 32 | StaticFmt(stream, std::forward(tail)...); 33 | } 34 | 35 | template 36 | std::string StaticFmt(Args&&... args) { 37 | stream_type stream; 38 | StaticFmt(stream, std::forward(args)...); 39 | return stream.str(); 40 | } 41 | 42 | } // namespace util 43 | } // namespace jitmap 44 | -------------------------------------------------------------------------------- /include/jitmap/util/pimpl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace jitmap { 20 | namespace util { 21 | 22 | template 23 | class Pimpl { 24 | public: 25 | explicit Pimpl(std::unique_ptr impl) : impl_(std::move(impl)) {} 26 | ~Pimpl() {} 27 | 28 | protected: 29 | ImplType& impl() { return *impl_; } 30 | const ImplType& impl() const { return *impl_; } 31 | 32 | Pimpl(Pimpl&&) = default; 33 | Pimpl& operator=(Pimpl&&) = default; 34 | 35 | // Disable copy & assign 36 | Pimpl(const Pimpl&) = delete; 37 | Pimpl operator=(const Pimpl&) = delete; 38 | 39 | private: 40 | std::unique_ptr impl_; 41 | }; 42 | 43 | } // namespace util 44 | } // namespace jitmap 45 | -------------------------------------------------------------------------------- /include/jitmap/util/platform.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace jitmap { 20 | 21 | #ifndef CACHELINE 22 | #define CACHELINE 64 23 | #endif 24 | 25 | constexpr size_t kCacheLineSize = CACHELINE; 26 | 27 | enum PlatformIntrinsic { 28 | X86_SSE, 29 | X86_AVX, 30 | X86_AVX2, 31 | X86_AVX512, 32 | }; 33 | 34 | constexpr size_t RequiredAlignment(PlatformIntrinsic platform) { 35 | switch (platform) { 36 | case X86_SSE: 37 | return 16; 38 | case X86_AVX: 39 | return 16; 40 | case X86_AVX2: 41 | return 32; 42 | case X86_AVX512: 43 | return 64; 44 | } 45 | } 46 | 47 | } // namespace jitmap 48 | -------------------------------------------------------------------------------- /src/jitmap/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | set(SOURCES 16 | query/compiler.cc 17 | query/expr.cc 18 | query/matcher.cc 19 | query/optimizer.cc 20 | query/parser.cc 21 | query/query.cc 22 | ) 23 | 24 | add_library(jitmap ${SOURCES}) 25 | target_include_directories(jitmap PUBLIC 26 | $ 27 | $ 28 | ) 29 | target_compile_options(jitmap PRIVATE ${CXX_WARNING_FLAGS}) 30 | target_include_directories(jitmap PUBLIC ${LLVM_INCLUDE_DIRS}) 31 | 32 | # Required for the jit engine 33 | set(LLVM_CORE_COMPONENTS support core irreader orcjit vectorize) 34 | set(LLVM_NATIVE_JIT_COMPONENTS x86codegen x86asmparser x86disassembler x86desc x86info x86utils perfjitevents) 35 | 36 | llvm_map_components_to_libnames(LLVM_LIBRARIES ${LLVM_CORE_COMPONENTS} ${LLVM_NATIVE_JIT_COMPONENTS}) 37 | target_link_libraries(jitmap ${LLVM_LIBRARIES}) 38 | -------------------------------------------------------------------------------- /src/jitmap/jitmap.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | namespace jitmap {} 18 | -------------------------------------------------------------------------------- /src/jitmap/query/codegen.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "jitmap/query/compiler.h" 31 | #include "jitmap/query/expr.h" 32 | #include "jitmap/query/query.h" 33 | #include "jitmap/query/type_traits.h" 34 | 35 | namespace jitmap { 36 | namespace query { 37 | 38 | // Generate the hot section of the loop. Takes an expression and reduce it to a 39 | // single (scalar or vector) value. 40 | struct ExprCodeGenVisitor { 41 | public: 42 | std::unordered_map& bitmaps; 43 | llvm::IRBuilder<>& builder; 44 | llvm::Type* vector_type; 45 | 46 | llvm::Value* operator()(const VariableExpr* e) { return FindBitmapByName(e->value()); } 47 | 48 | llvm::Value* operator()(const EmptyBitmapExpr*) { 49 | return llvm::ConstantInt::get(vector_type, 0UL); 50 | } 51 | 52 | llvm::Value* operator()(const FullBitmapExpr*) { 53 | return llvm::ConstantInt::get(vector_type, UINT64_MAX); 54 | } 55 | 56 | llvm::Value* operator()(const NotOpExpr* e) { 57 | auto operand = e->operand()->Visit(*this); 58 | return builder.CreateNot(operand); 59 | } 60 | 61 | llvm::Value* operator()(const AndOpExpr* e) { 62 | auto [lhs, rhs] = VisitBinary(e); 63 | return builder.CreateAnd(lhs, rhs); 64 | } 65 | 66 | llvm::Value* operator()(const OrOpExpr* e) { 67 | auto [lhs, rhs] = VisitBinary(e); 68 | return builder.CreateOr(lhs, rhs); 69 | } 70 | 71 | llvm::Value* operator()(const XorOpExpr* e) { 72 | auto [lhs, rhs] = VisitBinary(e); 73 | return builder.CreateXor(lhs, rhs); 74 | } 75 | 76 | private: 77 | llvm::Value* FindBitmapByName(const std::string& name) { 78 | auto result = bitmaps.find(name); 79 | if (result == bitmaps.end()) 80 | throw CompilerException("Referenced bitmap '", name, "' not found."); 81 | return result->second; 82 | } 83 | 84 | std::pair VisitBinary(const BinaryOpExpr* e) { 85 | return {e->left_operand()->Visit(*this), e->right_operand()->Visit(*this)}; 86 | } 87 | }; 88 | 89 | class ExpressionCodeGen { 90 | public: 91 | explicit ExpressionCodeGen(const std::string& module_name) 92 | : ctx_(std::make_unique()), 93 | module_(std::make_unique(module_name, *ctx_)), 94 | builder_(*ctx_) {} 95 | 96 | ExpressionCodeGen& Compile(const std::string& name, const Expr& expression, 97 | bool with_popcount = true) { 98 | auto fn = FunctionDeclForQuery(name, expression, with_popcount); 99 | FunctionCodeGen(expression, with_popcount, fn); 100 | return *this; 101 | } 102 | 103 | using ContextAndModule = 104 | std::pair, std::unique_ptr>; 105 | 106 | ContextAndModule Finish() { return {std::move(ctx_), std::move(module_)}; } 107 | 108 | private: 109 | void FunctionCodeGen(const Expr& expression, bool with_popcount, llvm::Function* fn) { 110 | auto entry_block = llvm::BasicBlock::Create(*ctx_, "entry", fn); 111 | builder_.SetInsertPoint(entry_block); 112 | 113 | auto variables = expression.Variables(); 114 | // Load bitmaps addresses 115 | auto [inputs, output] = UnrollInputsOutput(variables.size(), fn); 116 | 117 | // Constants 118 | auto i64 = llvm::Type::getInt64Ty(*ctx_); 119 | auto zero = llvm::ConstantInt::get(i64, 0); 120 | auto step = llvm::ConstantInt::get(i64, 1); 121 | auto n_words = llvm::ConstantInt::get(i64, words()); 122 | 123 | auto loop_block = llvm::BasicBlock::Create(*ctx_, "loop", fn); 124 | auto after_block = llvm::BasicBlock::Create(*ctx_, "after_loop", fn); 125 | 126 | llvm::PHINode* acc = nullptr; 127 | llvm::Value* next_acc = nullptr; 128 | 129 | builder_.CreateBr(loop_block); 130 | builder_.SetInsertPoint(loop_block); 131 | 132 | // The following block is equivalent to 133 | // for (int i = 0; i < n_words ; i += step) { 134 | // LoopBodyCodeGen(fn, i) 135 | // } 136 | { 137 | // Define the `i` induction variable and initialize it to zero. 138 | auto i = builder_.CreatePHI(i64, 2, "i"); 139 | i->addIncoming(zero, entry_block); 140 | 141 | if (with_popcount) { 142 | // Initialize an accumulator for popcount. 143 | auto zero_elem = llvm::ConstantInt::get(ElementType(), 0); 144 | auto zero_vec = llvm::ConstantVector::getSplat(vector_width(), zero_elem); 145 | acc = builder_.CreatePHI(VectorType(), 2, "acc"); 146 | acc->addIncoming(zero_vec, entry_block); 147 | } 148 | 149 | auto result = LoopBodyCodeGen(expression, variables, inputs, output, i); 150 | 151 | if (with_popcount) { 152 | auto popcnt = PopCount(result); 153 | next_acc = builder_.CreateAdd(acc, popcnt, "next_acc"); 154 | acc->addIncoming(next_acc, loop_block); 155 | } 156 | 157 | // i += step 158 | auto next_i = builder_.CreateAdd(i, step, "next_i"); 159 | 160 | // if (`i` == n_words) break; 161 | auto exit_cond = builder_.CreateICmpEQ(next_i, n_words, "exit_cond"); 162 | builder_.CreateCondBr(exit_cond, after_block, loop_block); 163 | i->addIncoming(next_i, loop_block); 164 | } 165 | 166 | builder_.SetInsertPoint(after_block); 167 | // Return the horizontal sum of the vector accumulator. 168 | if (with_popcount) { 169 | builder_.CreateRet(ReduceAdd(next_acc)); 170 | } else { 171 | builder_.CreateRetVoid(); 172 | } 173 | } 174 | 175 | llvm::Value* PopCount(llvm::Value* val) { 176 | // See https://reviews.llvm.org/D10084 177 | constexpr auto ctpop = llvm::Intrinsic::ctpop; 178 | return builder_.CreateUnaryIntrinsic(ctpop, val, nullptr, "popcnt"); 179 | } 180 | 181 | llvm::Value* ReduceAdd(llvm::Value* val) { 182 | constexpr auto horizontal_add = llvm::Intrinsic::experimental_vector_reduce_add; 183 | return builder_.CreateUnaryIntrinsic(horizontal_add, val, nullptr, "hsum"); 184 | } 185 | 186 | std::pair, llvm::Value*> UnrollInputsOutput( 187 | size_t n_bitmaps, llvm::Function* fn) { 188 | auto args_it = fn->args().begin(); 189 | auto inputs_ptr = args_it++; 190 | auto output_ptr = args_it++; 191 | 192 | // Load scalar at index for given bitmap 193 | auto load_bitmap = [&](size_t i) { 194 | auto namify = [&i](std::string key) { return key + "_" + std::to_string(i); }; 195 | auto bitmap_i = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx_), i); 196 | auto gep = builder_.CreateInBoundsGEP(inputs_ptr, bitmap_i, namify("bitmap_gep")); 197 | auto addr = builder_.CreateLoad(gep, namify("bitmap")); 198 | return builder_.CreatePointerCast(addr, VectorPtrType(), namify("bitmap_vec")); 199 | }; 200 | 201 | std::vector inputs; 202 | for (size_t i = 0; i < n_bitmaps; i++) { 203 | inputs.push_back(load_bitmap(i)); 204 | } 205 | 206 | auto output_vec_ptr = 207 | builder_.CreatePointerCast(output_ptr, VectorPtrType(), "output_vec"); 208 | return {inputs, output_vec_ptr}; 209 | } 210 | 211 | llvm::Value* LoopBodyCodeGen(const Expr& expression, 212 | const std::vector& variables, 213 | std::vector inputs, llvm::Value* output, 214 | llvm::Value* loop_idx) { 215 | // Load scalar at index for given bitmap 216 | auto load_vector_inst = [&](auto bitmap_addr, size_t i) { 217 | auto namify = [&i](std::string key) { return key + "_" + std::to_string(i); }; 218 | // Compute the address to load 219 | auto gep = builder_.CreateInBoundsGEP(bitmap_addr, {loop_idx}, namify("gep")); 220 | // Load in a register 221 | return builder_.CreateLoad(gep, namify("load")); 222 | }; 223 | 224 | // Bind the variable bitmaps by name to inputs of the function 225 | const auto& parameters = variables; 226 | std::unordered_map keyed_bitmaps; 227 | for (size_t i = 0; i < inputs.size(); i++) { 228 | keyed_bitmaps.emplace(parameters[i], load_vector_inst(inputs[i], i)); 229 | } 230 | 231 | // Execute the expression tree on the input 232 | ExprCodeGenVisitor visitor{keyed_bitmaps, builder_, VectorType()}; 233 | auto result = expression.Visit(visitor); 234 | 235 | // Store the result in the output bitmap. 236 | auto gep = builder_.CreateInBoundsGEP(output, {loop_idx}, "gep_output"); 237 | auto bitcast = builder_.CreateBitCast(gep, VectorPtrType(), "bitcast_output"); 238 | builder_.CreateStore(result, bitcast); 239 | 240 | return result; 241 | } 242 | 243 | llvm::FunctionType* FunctionTypeForArguments(bool with_popcount) { 244 | // void 245 | auto return_type = with_popcount ? ElementType() : llvm::Type::getVoidTy(*ctx_); 246 | auto i8 = llvm::Type::getInt8Ty(*ctx_); 247 | auto i8_ptr = i8->getPointerTo(); 248 | // dense_fn( 249 | // const int8_t** inputs, 250 | auto inputs_type = i8_ptr->getPointerTo(); 251 | // int8_t* output, 252 | auto output_type = i8_ptr; 253 | // ) 254 | 255 | constexpr bool is_var_args = false; 256 | return llvm::FunctionType::get(return_type, {inputs_type, output_type}, is_var_args); 257 | } 258 | 259 | llvm::Function* FunctionDeclForQuery(const std::string& name, const Expr& expression, 260 | bool with_popcount) { 261 | auto fn_type = FunctionTypeForArguments(with_popcount); 262 | // The generated function will be exposed as an external symbol, i.e the 263 | // symbol will be globally visible. This would be equivalent to defining a 264 | // symbol with the `extern` storage classifier. 265 | auto linkage = llvm::Function::ExternalLinkage; 266 | auto fn = llvm::Function::Create(fn_type, linkage, name, *module_); 267 | 268 | // The generated objects are accessed by taking the symbol address and 269 | // casting to a function type. Thus, we must use the C calling convention. 270 | fn->setCallingConv(llvm::CallingConv::C); 271 | // The function will only access memory pointed by the parameter pointers. 272 | fn->addFnAttr(llvm::Attribute::ArgMemOnly); 273 | 274 | auto args_it = fn->args().begin(); 275 | auto inputs_ptr = args_it++; 276 | auto output = args_it++; 277 | 278 | inputs_ptr->setName("inputs"); 279 | // The NoCapture attribute indicates that the bitmap pointer 280 | // will not be captured (leak outside the function). 281 | inputs_ptr->addAttr(llvm::Attribute::NoCapture); 282 | inputs_ptr->addAttr(llvm::Attribute::ReadOnly); 283 | 284 | output->setName("output"); 285 | output->addAttr(llvm::Attribute::NoCapture); 286 | 287 | return fn; 288 | } 289 | 290 | llvm::Type* ElementType() { return llvm::Type::getIntNTy(*ctx_, scalar_width()); } 291 | 292 | llvm::VectorType* VectorType() { 293 | return llvm::VectorType::get(ElementType(), vector_width()); 294 | } 295 | llvm::Type* VectorPtrType() { return VectorType()->getPointerTo(); } 296 | 297 | uint32_t scalar_width() const { return 32; } 298 | uint32_t vector_width() const { return 16; } 299 | uint32_t unroll() const { return 4; } 300 | 301 | uint32_t words() const { return kBitsPerContainer / (scalar_width() * vector_width()); } 302 | 303 | std::unique_ptr ctx_; 304 | std::unique_ptr module_; 305 | llvm::IRBuilder<> builder_; 306 | }; 307 | 308 | } // namespace query 309 | } // namespace jitmap 310 | -------------------------------------------------------------------------------- /src/jitmap/query/compiler.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "llvm/Analysis/BasicAliasAnalysis.h" 38 | #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" 39 | #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" 40 | #include "llvm/ExecutionEngine/SectionMemoryManager.h" 41 | #include "llvm/Transforms/InstCombine/InstCombine.h" 42 | #include "llvm/Transforms/Scalar.h" 43 | #include "llvm/Transforms/Scalar/GVN.h" 44 | 45 | #include "codegen.h" 46 | #include "jitmap/query/compiler.h" 47 | #include "jitmap/util/compiler.h" 48 | 49 | namespace orc = llvm::orc; 50 | 51 | namespace jitmap { 52 | namespace query { 53 | 54 | void RaiseOnFailure(llvm::Error error) { 55 | if (error) { 56 | auto error_msg = llvm::toString(std::move(error)); 57 | throw CompilerException("LLVM error: ", error_msg); 58 | } 59 | } 60 | 61 | template 62 | T ExpectOrRaise(llvm::Expected&& expected) { 63 | if (!expected) { 64 | RaiseOnFailure(expected.takeError()); 65 | } 66 | 67 | return std::move(*expected); 68 | } 69 | 70 | llvm::CodeGenOpt::Level CodeGetOptFromNumber(uint8_t level) { 71 | switch (level) { 72 | case 0: 73 | return llvm::CodeGenOpt::None; 74 | case 1: 75 | return llvm::CodeGenOpt::Less; 76 | case 2: 77 | return llvm::CodeGenOpt::Default; 78 | case 3: 79 | default: 80 | return llvm::CodeGenOpt::Aggressive; 81 | } 82 | } 83 | 84 | std::string DetectCPU(const CompilerOptions& opts) { 85 | if (opts.cpu.empty()) { 86 | return llvm::sys::getHostCPUName(); 87 | } 88 | 89 | return opts.cpu; 90 | } 91 | 92 | auto InitHostTargetMachineBuilder(const CompilerOptions& opts) { 93 | // Ensure LLVM registries are populated. 94 | llvm::InitializeNativeTarget(); 95 | llvm::InitializeNativeTargetAsmPrinter(); 96 | 97 | auto machine_builder = ExpectOrRaise(orc::JITTargetMachineBuilder::detectHost()); 98 | machine_builder.setCodeGenOptLevel(CodeGetOptFromNumber(opts.optimization_level)); 99 | machine_builder.setCPU(DetectCPU(opts)); 100 | 101 | return machine_builder; 102 | } 103 | 104 | // Register a custom ObjectLinkerLayer to support (query) symbols with gdb and perf. 105 | // LLJIT (via ORC) doesn't support explicitly the llvm::JITEventListener 106 | // interface. This is the missing glue. 107 | std::unique_ptr ObjectLinkingLayerFactory( 108 | orc::ExecutionSession& execution_session) { 109 | auto memory_manager_factory = []() { 110 | return std::make_unique(); 111 | }; 112 | 113 | auto linking_layer = std::make_unique( 114 | execution_session, std::move(memory_manager_factory)); 115 | 116 | std::vector listeners{ 117 | llvm::JITEventListener::createGDBRegistrationListener(), 118 | llvm::JITEventListener::createPerfJITEventListener()}; 119 | 120 | // Lambda invoked whenever a new symbol is added. 121 | auto notify_loaded = [listeners](orc::VModuleKey key, 122 | const llvm::object::ObjectFile& object, 123 | const llvm::RuntimeDyld::LoadedObjectInfo& info) { 124 | for (auto listener : listeners) { 125 | if (listener != nullptr) { 126 | listener->notifyObjectLoaded(key, object, info); 127 | } 128 | } 129 | }; 130 | 131 | linking_layer->setNotifyLoaded(notify_loaded); 132 | 133 | return linking_layer; 134 | } 135 | 136 | std::unique_ptr InitLLJIT(orc::JITTargetMachineBuilder machine_builder, 137 | llvm::DataLayout layout, 138 | const CompilerOptions& options) { 139 | return ExpectOrRaise(orc::LLJITBuilder() 140 | .setJITTargetMachineBuilder(machine_builder) 141 | .setObjectLinkingLayerCreator(ObjectLinkingLayerFactory) 142 | .create()); 143 | } 144 | 145 | auto AsThreadSafeModule(ExpressionCodeGen::ContextAndModule ctx_module) { 146 | auto [context, module] = std::move(ctx_module); 147 | return orc::ThreadSafeModule(std::move(module), std::move(context)); 148 | } 149 | 150 | class JitEngineImpl { 151 | public: 152 | JitEngineImpl(orc::JITTargetMachineBuilder machine_builder, const CompilerOptions& opts) 153 | : host_(ExpectOrRaise(machine_builder.createTargetMachine())), 154 | jit_(InitLLJIT(machine_builder, host_->createDataLayout(), opts)), 155 | user_queries_(jit_->createJITDylib("jitmap.user")), 156 | options_(opts) {} 157 | 158 | void Compile(const std::string& name, const Expr& expr) { 159 | auto thread_safe_module = AsThreadSafeModule(CompileInternal(name, expr)); 160 | Optimize(thread_safe_module.getModule()); 161 | RaiseOnFailure(jit_->addIRModule(user_queries_, std::move(thread_safe_module))); 162 | } 163 | 164 | std::string CompileIR(const std::string& n, const Expr& e) { 165 | auto ctx_module = CompileInternal(n, e); 166 | auto module = ctx_module.second.get(); 167 | // By default, the TargetTriple is not part of the module. This ensure that 168 | // callers of `jitmap-ir` don't need to explicit the tripple in the command 169 | // line chain, e.g. via `opt` or `llc` utility. 170 | module->setTargetTriple(GetTargetTriple()); 171 | Optimize(module); 172 | 173 | std::string ir; 174 | llvm::raw_string_ostream ss{ir}; 175 | module->print(ss, nullptr); 176 | 177 | return ir; 178 | } 179 | 180 | DenseEvalFn LookupUserQuery(const std::string& name) { 181 | auto symbol = ExpectOrRaise(jit_->lookup(user_queries_, name)); 182 | return llvm::jitTargetAddressToPointer(symbol.getAddress()); 183 | } 184 | 185 | DenseEvalPopCountFn LookupUserPopCountQuery(const std::string& name) { 186 | auto symbol = ExpectOrRaise(jit_->lookup(user_queries_, query_popcount(name))); 187 | return llvm::jitTargetAddressToPointer(symbol.getAddress()); 188 | } 189 | 190 | // Introspection 191 | std::string GetTargetCPU() const { return host_->getTargetCPU(); } 192 | std::string GetTargetTriple() const { return host_->getTargetTriple().normalize(); } 193 | llvm::DataLayout layout() const { return host_->createDataLayout(); } 194 | 195 | private: 196 | std::string query_popcount(const std::string query_name) { 197 | return query_name + "_popcount"; 198 | } 199 | 200 | ExpressionCodeGen::ContextAndModule CompileInternal(const std::string& name, 201 | const Expr& e) { 202 | // Generate 2 variants for the expression, one function that returns the 203 | // popcount, and the other that doesn't tally the popcount and returns void. 204 | return ExpressionCodeGen("module_a") 205 | .Compile(name, e, false /* with_popcount */) 206 | .Compile(query_popcount(name), e, true /* with_popcount */) 207 | .Finish(); 208 | } 209 | 210 | llvm::Module* Optimize(llvm::Module* module) { 211 | unsigned opt_level = options_.optimization_level; 212 | // Don't optimize for size. 213 | unsigned size_level = 0; 214 | 215 | llvm::PassManagerBuilder builder; 216 | builder.OptLevel = opt_level; 217 | builder.SizeLevel = size_level; 218 | 219 | builder.Inliner = llvm::createFunctionInliningPass(opt_level, size_level, false); 220 | builder.LoopVectorize = true; 221 | builder.SLPVectorize = true; 222 | builder.DisableUnrollLoops = false; 223 | host_->adjustPassManager(builder); 224 | 225 | auto cpu = host_->getTargetCPU(); 226 | for (auto& function : *module) { 227 | setFunctionAttributes("target-cpu", cpu, function); 228 | } 229 | 230 | llvm::legacy::FunctionPassManager fn_manager{module}; 231 | auto host_analysis = host_->getTargetIRAnalysis(); 232 | fn_manager.add(llvm::createTargetTransformInfoWrapperPass(host_analysis)); 233 | builder.populateFunctionPassManager(fn_manager); 234 | 235 | fn_manager.add( 236 | llvm::createLoopUnrollPass(2, /*OnlyWhenForced*/ false, false, -1, 4, 1)); 237 | 238 | fn_manager.doInitialization(); 239 | for (auto& function : *module) { 240 | fn_manager.run(function); 241 | } 242 | fn_manager.doFinalization(); 243 | 244 | llvm::legacy::PassManager mod_manager; 245 | auto& llvm_host = dynamic_cast(*host_); 246 | mod_manager.add(llvm_host.createPassConfig(mod_manager)); 247 | builder.populateModulePassManager(mod_manager); 248 | 249 | mod_manager.run(*module); 250 | 251 | return module; 252 | } 253 | 254 | void setFunctionAttributes(const std::string& attr, const std::string& val, 255 | llvm::Function& fn) { 256 | llvm::AttrBuilder new_attrs; 257 | if (!val.empty() && !fn.hasFnAttribute(attr)) { 258 | new_attrs.addAttribute(attr, val); 259 | } 260 | 261 | constexpr auto fn_index = llvm::AttributeList::FunctionIndex; 262 | auto attrs = fn.getAttributes(); 263 | fn.setAttributes(attrs.addAttributes(fn.getContext(), fn_index, new_attrs)); 264 | } 265 | 266 | private: 267 | std::unique_ptr host_; 268 | std::unique_ptr jit_; 269 | orc::JITDylib& user_queries_; 270 | CompilerOptions options_; 271 | }; 272 | 273 | JitEngine::JitEngine(CompilerOptions opts) 274 | : Pimpl(std::make_unique(InitHostTargetMachineBuilder(opts), opts)) {} 275 | 276 | std::shared_ptr JitEngine::Make(CompilerOptions opts) { 277 | return std::shared_ptr{new JitEngine(opts)}; 278 | } 279 | 280 | std::string JitEngine::GetTargetCPU() const { return impl().GetTargetCPU(); } 281 | std::string JitEngine::GetTargetTriple() const { return impl().GetTargetTriple(); } 282 | void JitEngine::Compile(const std::string& name, const Expr& expression) { 283 | impl().Compile(name, expression); 284 | } 285 | 286 | std::string JitEngine::CompileIR(const std::string& name, const Expr& expression) { 287 | return impl().CompileIR(name, expression); 288 | } 289 | 290 | DenseEvalFn JitEngine::LookupUserQuery(const std::string& query_name) { 291 | return impl().LookupUserQuery(query_name); 292 | } 293 | 294 | DenseEvalPopCountFn JitEngine::LookupUserPopCountQuery(const std::string& query_name) { 295 | return impl().LookupUserPopCountQuery(query_name); 296 | } 297 | 298 | } // namespace query 299 | } // namespace jitmap 300 | -------------------------------------------------------------------------------- /src/jitmap/query/expr.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "jitmap/query/matcher.h" 21 | #include "jitmap/query/type_traits.h" 22 | #include "jitmap/util/compiler.h" 23 | 24 | namespace jitmap { 25 | namespace query { 26 | 27 | bool Expr::IsLiteral() const { 28 | switch (type_) { 29 | case EMPTY_LITERAL: 30 | case FULL_LITERAL: 31 | return true; 32 | case VARIABLE: 33 | case NOT_OPERATOR: 34 | case AND_OPERATOR: 35 | case OR_OPERATOR: 36 | case XOR_OPERATOR: 37 | return false; 38 | } 39 | 40 | return false; 41 | } 42 | 43 | bool Expr::IsVariable() const { return type_ == VARIABLE; } 44 | 45 | bool Expr::IsOperator() const { 46 | switch (type_) { 47 | case EMPTY_LITERAL: 48 | case FULL_LITERAL: 49 | case VARIABLE: 50 | return false; 51 | case NOT_OPERATOR: 52 | case AND_OPERATOR: 53 | case OR_OPERATOR: 54 | case XOR_OPERATOR: 55 | return true; 56 | } 57 | 58 | return false; 59 | } 60 | 61 | bool Expr::IsUnaryOperator() const { return type_ == NOT_OPERATOR; } 62 | 63 | bool Expr::IsBinaryOperator() const { 64 | return type_ == AND_OPERATOR || type_ == OR_OPERATOR || type_ == XOR_OPERATOR; 65 | } 66 | 67 | bool Expr::operator==(const Expr& rhs) const { 68 | // Pointer shorcut. 69 | if (this == &rhs) return true; 70 | return this->Visit([&](const auto* left) { 71 | if (type() != rhs.type()) return false; 72 | 73 | using E = std::decay_t>; 74 | const E* right = static_cast(&rhs); 75 | 76 | // GCC warns about unused `right`. This is due to the conditional constexpr 77 | // in the literal case. 78 | JITMAP_UNUSED(right); 79 | 80 | if constexpr (is_literal::value) return true; 81 | if constexpr (is_variable::value) return left->value() == right->value(); 82 | if constexpr (is_unary_op::value) return *left->operand() == *right->operand(); 83 | if constexpr (is_unary_op::value) return *left->operand() == *right->operand(); 84 | if constexpr (is_binary_op::value) { 85 | return (*left->left_operand() == *right->left_operand()) && 86 | (*left->right_operand() == *right->right_operand()); 87 | } 88 | 89 | return false; 90 | }); 91 | } 92 | 93 | static const char* OpToChar(Expr::Type op) { 94 | switch (op) { 95 | case Expr::EMPTY_LITERAL: 96 | return "$0"; 97 | case Expr::FULL_LITERAL: 98 | return "$1"; 99 | case Expr::VARIABLE: 100 | return ""; 101 | case Expr::NOT_OPERATOR: 102 | return "!"; 103 | case Expr::AND_OPERATOR: 104 | return "&"; 105 | case Expr::OR_OPERATOR: 106 | return "|"; 107 | case Expr::XOR_OPERATOR: 108 | return "^"; 109 | } 110 | 111 | throw Exception("Unkonwn operator type: ", op); 112 | } 113 | 114 | std::string Expr::ToString() const { 115 | return Visit([&](const auto* e) -> std::string { 116 | using E = std::decay_t>; 117 | auto type = e->type(); 118 | auto symbol = OpToChar(type); 119 | 120 | std::stringstream ss; 121 | 122 | if constexpr (is_literal::value) { 123 | ss << symbol; 124 | } 125 | 126 | if constexpr (is_variable::value) { 127 | ss << symbol << e->value(); 128 | } 129 | 130 | if constexpr (is_unary_op::value) { 131 | ss << symbol << e->operand()->ToString(); 132 | } 133 | 134 | if constexpr (is_binary_op::value) { 135 | auto left = e->left_operand()->ToString(); 136 | auto right = e->right_operand()->ToString(); 137 | ss << "(" << left << " " << symbol << " " << right << ")"; 138 | } 139 | 140 | return ss.str(); 141 | }); 142 | } 143 | 144 | static void CollectVariables(const Expr* expr, 145 | std::unordered_set& unique_variables, 146 | std::vector& variables) { 147 | expr->Visit([&unique_variables, &variables](const auto* e) { 148 | using E = std::decay_t>; 149 | 150 | if constexpr (is_variable::value) { 151 | auto var = e->value(); 152 | if (unique_variables.insert(var).second) variables.emplace_back(var); 153 | } else if constexpr (is_unary_op::value) { 154 | CollectVariables(e->operand(), unique_variables, variables); 155 | } else if constexpr (is_binary_op::value) { 156 | CollectVariables(e->left_operand(), unique_variables, variables); 157 | CollectVariables(e->right_operand(), unique_variables, variables); 158 | } 159 | }); 160 | } 161 | 162 | std::vector Expr::Variables() const { 163 | std::unordered_set unique_variables; 164 | std::vector variables; 165 | CollectVariables(this, unique_variables, variables); 166 | return variables; 167 | } 168 | 169 | Expr* Expr::Copy(ExprBuilder* b) const { 170 | return Visit([b](const auto* e) -> Expr* { 171 | using E = std::decay_t>; 172 | if constexpr (is_variable::value) { 173 | return b->Var(e->value()); 174 | } else if constexpr (is_literal::value) { 175 | return (e->type() == FULL_LITERAL) ? b->FullBitmap() : b->EmptyBitmap(); 176 | } else if constexpr (is_not_op::value) { 177 | return b->Not(e->operand()->Copy(b)); 178 | } else if constexpr (is_and_op::value) { 179 | return b->And(e->left_operand()->Copy(b), e->right_operand()->Copy(b)); 180 | } else if constexpr (is_or_op::value) { 181 | return b->Or(e->left_operand()->Copy(b), e->right_operand()->Copy(b)); 182 | } else if constexpr (is_xor_op::value) { 183 | return b->Xor(e->left_operand()->Copy(b), e->right_operand()->Copy(b)); 184 | } 185 | 186 | return nullptr; 187 | }); 188 | } 189 | 190 | std::ostream& operator<<(std::ostream& os, const Expr& e) { return os << e.ToString(); } 191 | std::ostream& operator<<(std::ostream& os, Expr* e) { return os << *e; } 192 | 193 | } // namespace query 194 | } // namespace jitmap 195 | -------------------------------------------------------------------------------- /src/jitmap/query/matcher.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "jitmap/query/matcher.h" 18 | #include "jitmap/query/type_traits.h" 19 | 20 | namespace jitmap { 21 | namespace query { 22 | 23 | TypeMatcher::TypeMatcher(Expr::Type type) { mask_[type] = true; } 24 | 25 | TypeMatcher::TypeMatcher(const std::vector& types) { 26 | for (auto t : types) mask_[t] = true; 27 | } 28 | 29 | TypeMatcher::TypeMatcher(const std::initializer_list& types) { 30 | for (auto t : types) mask_[t] = true; 31 | } 32 | 33 | bool TypeMatcher::Match(const Expr& expr) const { return mask_[expr.type()]; } 34 | 35 | OperandMatcher::OperandMatcher(Matcher* matcher, Mode mode) 36 | : matcher_(matcher), mode_(mode) {} 37 | 38 | bool OperandMatcher::Match(const Expr& expr) const { 39 | return expr.Visit([&](const auto* e) { 40 | using E = std::decay_t>; 41 | 42 | auto mode = this->mode_; 43 | auto& matcher = *this->matcher_; 44 | 45 | if constexpr (is_not_op::value) { 46 | return matcher(e->operand()); 47 | } 48 | 49 | if constexpr (is_binary_op::value) { 50 | bool left = matcher(e->left_operand()); 51 | 52 | // Short-circuit 53 | if (left && mode == Mode::ANY) return true; 54 | if (!left && mode == Mode::ALL) return false; 55 | 56 | bool right = matcher(e->right_operand()); 57 | return (mode == Mode::ANY) ? left || right : left && right; 58 | } 59 | 60 | return false; 61 | }); 62 | } 63 | 64 | ChainMatcher::ChainMatcher(const std::vector& matchers, Mode mode) 65 | : matchers_(matchers), mode_(mode) {} 66 | 67 | ChainMatcher::ChainMatcher(const std::initializer_list& matchers, Mode mode) 68 | : matchers_(matchers), mode_(mode) {} 69 | 70 | bool ChainMatcher::Match(const Expr& expr) const { 71 | auto match = [&](const Matcher* m) { return m->Match(expr); }; 72 | 73 | switch (mode_) { 74 | case ALL: 75 | return std::all_of(matchers_.cbegin(), matchers_.cend(), match); 76 | case ANY: 77 | return std::any_of(matchers_.cbegin(), matchers_.cend(), match); 78 | } 79 | 80 | throw Exception("Unknown match type: ", mode_); 81 | } 82 | 83 | } // namespace query 84 | } // namespace jitmap 85 | -------------------------------------------------------------------------------- /src/jitmap/query/optimizer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "jitmap/query/optimizer.h" 16 | 17 | #include "jitmap/query/expr.h" 18 | #include "jitmap/query/type_traits.h" 19 | 20 | namespace jitmap { 21 | namespace query { 22 | 23 | auto kConstantMatcher = TypeMatcher{Expr::EMPTY_LITERAL, Expr::FULL_LITERAL}; 24 | 25 | auto kConstantOperandMatcher = OperandMatcher{&kConstantMatcher}; 26 | 27 | ConstantFolding::ConstantFolding(ExprBuilder* builder) 28 | : OptimizationPass(&kConstantOperandMatcher, builder) {} 29 | 30 | Expr* ConstantFolding::Rewrite(const Expr& expr) { 31 | return expr.Visit([&](const auto* e) -> Expr* { 32 | using E = std::decay_t>; 33 | 34 | auto builder = this->builder_; 35 | 36 | // Not(0) -> 1 37 | // Not(1) -> 0 38 | if constexpr (is_not_op::value) { 39 | auto constant = e->operand(); 40 | auto type = constant->type(); 41 | 42 | if (type == Expr::EMPTY_LITERAL) return builder->FullBitmap(); 43 | if (type == Expr::FULL_LITERAL) return builder->EmptyBitmap(); 44 | } 45 | 46 | if constexpr (is_binary_op::value) { 47 | // Returns a pair where the first element is the constant operand. 48 | auto unpack_const_expr = [](const BinaryOpExpr* expr) -> std::pair { 49 | auto left = expr->left_operand(); 50 | auto right = expr->right_operand(); 51 | if (left->IsLiteral()) return {left, right}; 52 | return {right, left}; 53 | }; 54 | 55 | auto [constant, e_] = unpack_const_expr(e); 56 | auto type = constant->type(); 57 | 58 | // And(0, e) -> 0 59 | // And(1, e) -> e 60 | if constexpr (is_and_op::value) { 61 | if (type == Expr::EMPTY_LITERAL) return builder->EmptyBitmap(); 62 | if (type == Expr::FULL_LITERAL) return e_; 63 | } 64 | 65 | // Or(0, e) -> e 66 | // Or(1, e) -> 1 67 | if constexpr (is_or_op::value) { 68 | if (type == Expr::EMPTY_LITERAL) return e_; 69 | if (type == Expr::FULL_LITERAL) return builder->FullBitmap(); 70 | } 71 | 72 | // Xor(0, e) -> e 73 | // Xor(1, e) -> Not(e) 74 | if constexpr (is_xor_op::value) { 75 | if (type == Expr::EMPTY_LITERAL) return e_; 76 | if (type == Expr::FULL_LITERAL) return builder->Not(e_); 77 | } 78 | } 79 | 80 | return kNoOptimizationPerformed; 81 | }); 82 | } 83 | 84 | class SameOperandMatcher final : public Matcher { 85 | public: 86 | bool Match(const Expr& expr) const { 87 | return expr.Visit([](const auto* e) { 88 | using E = std::decay_t>; 89 | if constexpr (is_binary_op::value) { 90 | return *e->left_operand() == *e->right_operand(); 91 | } 92 | return false; 93 | }); 94 | } 95 | } kSameOperandMatcher; 96 | 97 | SameOperandFolding::SameOperandFolding(ExprBuilder* builder) 98 | : OptimizationPass(&kSameOperandMatcher, builder) {} 99 | 100 | Expr* SameOperandFolding::Rewrite(const Expr& expr) { 101 | return expr.Visit([&](const auto* e) -> Expr* { 102 | using E = std::decay_t>; 103 | 104 | // And(e, e) -> e 105 | if constexpr (is_and_op::value) { 106 | return e->left_operand(); 107 | } 108 | 109 | // Or(e, e) -> e 110 | if constexpr (is_or_op::value) { 111 | return e->left_operand(); 112 | } 113 | 114 | // Xor(e, e) -> 0 115 | if constexpr (is_xor_op::value) { 116 | return this->builder_->EmptyBitmap(); 117 | } 118 | 119 | return kNoOptimizationPerformed; 120 | }); 121 | } 122 | 123 | TypeMatcher kNotMatcher{Expr::NOT_OPERATOR}; 124 | OperandMatcher kNotOperandMatcher{&kNotMatcher}; 125 | ChainMatcher kNotNotOperandMatcher{&kNotMatcher, &kNotOperandMatcher}; 126 | 127 | NotChainFolding::NotChainFolding(ExprBuilder* builder) 128 | : OptimizationPass(&kNotNotOperandMatcher, builder) {} 129 | 130 | Expr* NotChainFolding::Rewrite(const Expr& expr) { 131 | return expr.Visit([&](const auto* e) -> Expr* { 132 | using E = std::decay_t>; 133 | 134 | if constexpr (is_not_op::value) { 135 | size_t count = 1; 136 | auto operand = e->operand(); 137 | 138 | while (operand->type() == Expr::NOT_OPERATOR) { 139 | count++; 140 | operand = (static_cast(operand)->operand()); 141 | } 142 | 143 | return (count % 2) ? this->builder_->Not(operand) : operand; 144 | } 145 | 146 | return kNoOptimizationPerformed; 147 | }); 148 | } 149 | 150 | Optimizer::Optimizer(ExprBuilder* builder, OptimizerOptions options) 151 | : builder_(builder), options_(options) { 152 | if (options.HasOptimization(OptimizerOptions::CONSTANT_FOLDING)) { 153 | constant_folding_ = ConstantFolding{builder_}; 154 | } 155 | 156 | if (options.HasOptimization(OptimizerOptions::SAME_OPERAND_FOLDING)) { 157 | same_operand_folding_ = SameOperandFolding{builder_}; 158 | } 159 | 160 | if (options.HasOptimization(OptimizerOptions::NOT_CHAIN_FOLDING)) { 161 | not_chain_folding_ = NotChainFolding{builder_}; 162 | } 163 | } 164 | 165 | // Apply optimizations in a bottom-up fashion, i.e. visit children before parents. 166 | template 167 | struct BottonUpVisitor { 168 | Expr* operator()(VariableExpr* e) { return visitor(e); } 169 | 170 | template 171 | enable_if_literal operator()(E* e) { 172 | return visitor(e); 173 | } 174 | 175 | template 176 | enable_if_unary_op operator()(E* op) { 177 | op->set_operand((op->operand()->Visit(*this))); 178 | return visitor(op); 179 | } 180 | 181 | template 182 | enable_if_binary_op operator()(E* op) { 183 | op->set_left_operand(op->left_operand()->Visit(*this)); 184 | op->set_right_operand(op->right_operand()->Visit(*this)); 185 | return visitor(op); 186 | } 187 | 188 | Visitor visitor; 189 | }; 190 | 191 | Expr* Optimizer::Optimize(const Expr& input) { 192 | auto expr = input.Copy(builder_); 193 | 194 | // Apply folding optimizations 195 | auto folder = [this](Expr* e) { 196 | if (this->constant_folding_) e = this->constant_folding_.value()(e); 197 | if (this->same_operand_folding_) e = this->same_operand_folding_.value()(e); 198 | if (this->not_chain_folding_) e = this->not_chain_folding_.value()(e); 199 | return e; 200 | }; 201 | BottonUpVisitor folders{std::move(folder)}; 202 | 203 | return expr->Visit(folders); 204 | } 205 | 206 | } // namespace query 207 | } // namespace jitmap 208 | -------------------------------------------------------------------------------- /src/jitmap/query/parser.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | 16 | #include 17 | 18 | #include "jitmap/query/parser.h" 19 | #include "jitmap/query/expr.h" 20 | 21 | #include "parser_internal.h" 22 | 23 | namespace jitmap { 24 | namespace query { 25 | 26 | const char* TokenTypeToString(Token::Type type) { 27 | switch (type) { 28 | case Token::Type::EMPTY_LITERAL: 29 | return "$0"; 30 | case Token::Type::FULL_LITERAL: 31 | return "$1"; 32 | case Token::Type::VARIABLE: 33 | return "Variable"; 34 | case Token::Type::LEFT_PARENTHESIS: 35 | return "LeftParenthesis"; 36 | case Token::Type::RIGHT_PARENTHESIS: 37 | return "RightParenthesis"; 38 | case Token::Type::NOT_OPERATOR: 39 | return "NotOp"; 40 | case Token::Type::AND_OPERATOR: 41 | return "AndOp"; 42 | case Token::Type::OR_OPERATOR: 43 | return "OrOp"; 44 | case Token::Type::XOR_OPERATOR: 45 | return "XorOp"; 46 | case Token::Type::END_OF_STREAM: 47 | return "EOS"; 48 | } 49 | 50 | throw ParserException("Unkonwn token type: ", type); 51 | } 52 | 53 | std::ostream& operator<<(std::ostream& os, Token::Type type) { 54 | return os << TokenTypeToString(type); 55 | } 56 | 57 | Token Token::Empty(std::string_view t) { return Token(Token::EMPTY_LITERAL, t); } 58 | Token Token::Full(std::string_view t) { return Token(Token::FULL_LITERAL, t); } 59 | Token Token::Var(std::string_view t) { return Token(Token::VARIABLE, t); } 60 | Token Token::LeftParen(std::string_view t) { return Token(Token::LEFT_PARENTHESIS, t); } 61 | Token Token::RightParen(std::string_view t) { return Token(Token::RIGHT_PARENTHESIS, t); } 62 | Token Token::NotOp(std::string_view t) { return Token(Token::NOT_OPERATOR, t); } 63 | Token Token::AndOp(std::string_view t) { return Token(Token::AND_OPERATOR, t); } 64 | Token Token::OrOp(std::string_view t) { return Token(Token::OR_OPERATOR, t); } 65 | Token Token::XorOp(std::string_view t) { return Token(Token::XOR_OPERATOR, t); } 66 | Token Token::EoS(std::string_view t) { return Token(Token::END_OF_STREAM, t); } 67 | 68 | constexpr char kEoFCharacter = '\0'; 69 | constexpr char kLiteralPrefixCharacter = '$'; 70 | 71 | static bool IsSpace(char c) { return std::isspace(c); } 72 | static bool IsVariable(char c) { return std::isalnum(c) || c == '_'; } 73 | static bool IsLeftParenthesis(char c) { return c == '('; } 74 | static bool IsRightParenthesis(char c) { return c == ')'; } 75 | static bool IsParenthesis(char c) { 76 | return IsLeftParenthesis(c) || IsRightParenthesis(c); 77 | } 78 | 79 | static bool IsOperator(char c) { 80 | switch (c) { 81 | case '!': 82 | case '&': 83 | case '|': 84 | case '^': 85 | return true; 86 | default: 87 | return false; 88 | } 89 | } 90 | 91 | bool Lexer::Done() const { return position_ >= query_.size(); } 92 | 93 | char Lexer::Peek() const { 94 | if (Done()) return kEoFCharacter; 95 | return query_[position_]; 96 | } 97 | 98 | char Lexer::Consume() { 99 | if (Done()) return kEoFCharacter; 100 | return query_[position_++]; 101 | } 102 | 103 | char Lexer::Consume(char expected) { 104 | if (Done()) return kEoFCharacter; 105 | char ret = query_[position_++]; 106 | 107 | if (ret != expected) 108 | throw ParserException("Consumed character '", ret, "' but expected '", expected, "'"); 109 | 110 | return ret; 111 | } 112 | 113 | Token Lexer::ConsumeLiteral() { 114 | // Pop '$'. 115 | Consume(kLiteralPrefixCharacter); 116 | 117 | size_t start = position_; 118 | if (Peek() == '0') { 119 | Consume(); 120 | return Token::Empty(query_.substr(start, 0)); 121 | } else if (Peek() == '1') { 122 | Consume(); 123 | return Token::Full(query_.substr(start, 0)); 124 | } 125 | 126 | throw ParserException("Invalid literal character ", Peek()); 127 | } 128 | 129 | Token Lexer::ConsumeVariable() { 130 | size_t start = position_; 131 | while (IsVariable(Peek())) { 132 | Consume(); 133 | }; 134 | 135 | // Expected at least one character 136 | if (start == position_) 137 | throw ParserException("Named reference expects at least one character"); 138 | 139 | return Token::Var(query_.substr(start, position_ - start)); 140 | } 141 | 142 | Token Lexer::ConsumeOperator() { 143 | char c = Consume(); 144 | switch (c) { 145 | case '(': 146 | return Token::LeftParen(); 147 | case ')': 148 | return Token::RightParen(); 149 | case '!': 150 | return Token::NotOp(); 151 | case '&': 152 | return Token::AndOp(); 153 | case '|': 154 | return Token::OrOp(); 155 | case '^': 156 | return Token::XorOp(); 157 | default: 158 | throw ParserException("Unexpected character '", c, "' while consuming operator."); 159 | } 160 | } 161 | 162 | Token Lexer::Next() { 163 | while (IsSpace(Peek())) { 164 | Consume(); 165 | } 166 | 167 | char next = Peek(); 168 | if (next == kEoFCharacter) 169 | return Token::EoS(); 170 | else if (next == kLiteralPrefixCharacter) 171 | return ConsumeLiteral(); 172 | else if (IsVariable(next)) 173 | return ConsumeVariable(); 174 | else if (IsOperator(next) || IsParenthesis(next)) 175 | return ConsumeOperator(); 176 | 177 | throw ParserException("Unexpected character '", next, "'."); 178 | } 179 | 180 | int OperatorPrecedence(Token::Type type) { 181 | switch (type) { 182 | case Token::NOT_OPERATOR: 183 | return 4; 184 | case Token::AND_OPERATOR: 185 | return 3; 186 | case Token::XOR_OPERATOR: 187 | return 2; 188 | case Token::OR_OPERATOR: 189 | return 1; 190 | default: 191 | return 0; 192 | } 193 | } 194 | 195 | // Pratt parser adapted from 196 | // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ 197 | class Parser { 198 | public: 199 | Parser(std::string_view query, ExprBuilder* builder) 200 | : lexer_(query), builder_(builder) {} 201 | 202 | Expr* Parse() { return ParseAndConsume(Token::END_OF_STREAM); } 203 | 204 | protected: 205 | Token Peek() { 206 | if (!next_) next_ = lexer_.Next(); 207 | return *next_; 208 | } 209 | 210 | Token Consume() { 211 | Token token = Peek(); 212 | next_.reset(); 213 | return token; 214 | } 215 | 216 | Token Consume(Token::Type expected) { 217 | Token token = Consume(); 218 | 219 | if (token.type() != expected) 220 | throw ParserException("Unexpected token got '", token, "' but exepected '", 221 | TokenTypeToString(expected), "'"); 222 | return token; 223 | } 224 | 225 | Expr* ParseAndConsume(Token::Type expected) { 226 | Expr* expr = Parse(0); 227 | Consume(expected); 228 | return expr; 229 | } 230 | 231 | Expr* Parse(int precedence) { 232 | Token token = Consume(); 233 | Expr* left = ParsePrefix(token); 234 | 235 | while (precedence < OperatorPrecedence(Peek().type())) { 236 | token = Consume(); 237 | left = ParseInfix(token, left); 238 | } 239 | 240 | return left; 241 | } 242 | 243 | Expr* ParsePrefix(Token token) { 244 | switch (token.type()) { 245 | case Token::EMPTY_LITERAL: 246 | return builder_->EmptyBitmap(); 247 | case Token::FULL_LITERAL: 248 | return builder_->FullBitmap(); 249 | case Token::VARIABLE: 250 | return builder_->Var(token.string()); 251 | case Token::NOT_OPERATOR: 252 | return builder_->Not(Parse(OperatorPrecedence(Token::NOT_OPERATOR))); 253 | case Token::LEFT_PARENTHESIS: 254 | return ParseAndConsume(Token::RIGHT_PARENTHESIS); 255 | default: 256 | throw ParserException("Unexpected token '", token, "'"); 257 | } 258 | } 259 | 260 | Expr* ParseInfix(Token token, Expr* left) { 261 | switch (token.type()) { 262 | case Token::AND_OPERATOR: 263 | return builder_->And(left, Parse(OperatorPrecedence(Token::AND_OPERATOR))); 264 | case Token::OR_OPERATOR: 265 | return builder_->Or(left, Parse(OperatorPrecedence(Token::OR_OPERATOR))); 266 | case Token::XOR_OPERATOR: 267 | return builder_->Xor(left, Parse(OperatorPrecedence(Token::XOR_OPERATOR))); 268 | default: 269 | throw ParserException("Unexpected token '", token, "'"); 270 | } 271 | } 272 | 273 | private: 274 | Lexer lexer_; 275 | ExprBuilder* builder_; 276 | // A tiny buffer to support Peek(). 277 | std::optional next_; 278 | }; 279 | 280 | Expr* Parse(std::string_view query, ExprBuilder* builder) { 281 | return Parser(query, builder).Parse(); 282 | } 283 | 284 | } // namespace query 285 | } // namespace jitmap 286 | -------------------------------------------------------------------------------- /src/jitmap/query/parser_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace jitmap { 24 | namespace query { 25 | 26 | class Token { 27 | public: 28 | enum Type { 29 | EMPTY_LITERAL, 30 | FULL_LITERAL, 31 | VARIABLE, 32 | LEFT_PARENTHESIS, 33 | RIGHT_PARENTHESIS, 34 | NOT_OPERATOR, 35 | AND_OPERATOR, 36 | OR_OPERATOR, 37 | XOR_OPERATOR, 38 | END_OF_STREAM, 39 | LAST_TOKEN = END_OF_STREAM, 40 | }; 41 | 42 | Token(Type type, std::string_view token) : type_(type), string_(std::move(token)) {} 43 | Token(Type type) : Token(type, "") {} 44 | 45 | Type type() const { return type_; } 46 | std::string_view string() const { return string_; } 47 | 48 | bool operator==(const Token& rhs) const { 49 | return this->type_ == rhs.type() && this->string_ == rhs.string(); 50 | } 51 | 52 | friend std::ostream& operator<<(std::ostream& os, const Token& token) { 53 | return os << token.type() << "(" << token.string() << ")"; 54 | } 55 | 56 | // Friendly builder methods 57 | static Token Empty(std::string_view = ""); 58 | static Token Full(std::string_view = ""); 59 | static Token Var(std::string_view); 60 | static Token LeftParen(std::string_view = ""); 61 | static Token RightParen(std::string_view = ""); 62 | static Token NotOp(std::string_view = ""); 63 | static Token AndOp(std::string_view = ""); 64 | static Token OrOp(std::string_view = ""); 65 | static Token XorOp(std::string_view = ""); 66 | static Token EoS(std::string_view = ""); 67 | 68 | private: 69 | Type type_; 70 | std::string_view string_; 71 | }; 72 | 73 | std::ostream& operator<<(std::ostream& os, Token::Type type); 74 | 75 | class Lexer { 76 | public: 77 | Lexer(std::string_view query) : position_(0), query_(std::move(query)) {} 78 | 79 | Token Next(); 80 | 81 | private: 82 | bool Done() const; 83 | char Peek() const; 84 | char Consume(); 85 | char Consume(char expected); 86 | 87 | Token ConsumeLiteral(); 88 | Token ConsumeVariable(); 89 | Token ConsumeOperator(); 90 | 91 | size_t position_; 92 | std::string_view query_; 93 | }; 94 | 95 | } // namespace query 96 | } // namespace jitmap 97 | -------------------------------------------------------------------------------- /src/jitmap/query/query.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "jitmap/query/query.h" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "jitmap/jitmap.h" 23 | #include "jitmap/query/compiler.h" 24 | #include "jitmap/query/optimizer.h" 25 | #include "jitmap/query/parser.h" 26 | 27 | namespace jitmap { 28 | namespace query { 29 | 30 | class QueryImpl { 31 | public: 32 | QueryImpl(std::string name, std::string query) 33 | : name_(std::move(name)), 34 | query_(std::move(query)), 35 | expr_(Parse(query_, &builder_)), 36 | optimized_expr_(Optimizer(&builder_).Optimize(*expr_)), 37 | variables_(expr_->Variables()) {} 38 | 39 | // Accessors 40 | const std::string& name() const { return name_; } 41 | const std::string& query() const { return query_; } 42 | const Expr& expr() const { return *expr_; } 43 | const Expr& optimized_expr() const { return *optimized_expr_; } 44 | const std::vector& variables() const { return variables_; } 45 | 46 | DenseEvalFn dense_eval_fn() const { return dense_eval_fn_; } 47 | DenseEvalPopCountFn dense_eval_popct_fn() const { return dense_eval_popct_fn_; } 48 | 49 | private: 50 | std::string name_; 51 | std::string query_; 52 | ExprBuilder builder_; 53 | Expr* expr_; 54 | Expr* optimized_expr_; 55 | 56 | friend class Query; 57 | 58 | std::vector variables_; 59 | DenseEvalFn dense_eval_fn_ = nullptr; 60 | DenseEvalPopCountFn dense_eval_popct_fn_ = nullptr; 61 | }; 62 | 63 | static inline void ValidateQueryName(const std::string& name) { 64 | if (name.empty()) { 65 | throw CompilerException("Query name must have at least one character"); 66 | } 67 | 68 | auto first = name[0]; 69 | if (!std::isalnum(first)) { 70 | throw CompilerException( 71 | "The first character of the Query name must be an alpha numeric character but " 72 | "got", 73 | first); 74 | } 75 | 76 | auto is_valid_char = [](auto c) { return std::isalnum(c) || c == '_'; }; 77 | if (!std::all_of(name.cbegin(), name.cend(), is_valid_char)) { 78 | throw CompilerException( 79 | "The characters of a query name must either be an alpha numeric character or an " 80 | "underscore."); 81 | } 82 | } 83 | 84 | Query::Query(std::string name, std::string query, ExecutionContext* context) 85 | : Pimpl(std::make_unique(std::move(name), std::move(query))) {} 86 | 87 | std::shared_ptr Query::Make(const std::string& name, const std::string& expr, 88 | ExecutionContext* context) { 89 | JITMAP_PRE_NE(context, nullptr); 90 | 91 | // Ensure that the query names follows the restriction 92 | ValidateQueryName(name); 93 | 94 | auto query = std::shared_ptr(new Query(name, expr, context)); 95 | context->jit()->Compile(query->name(), query->expr()); 96 | 97 | // Cache functions 98 | query->impl().dense_eval_fn_ = context->jit()->LookupUserQuery(name); 99 | query->impl().dense_eval_popct_fn_ = context->jit()->LookupUserPopCountQuery(name); 100 | 101 | return query; 102 | } 103 | 104 | const std::string& Query::name() const { return impl().name(); } 105 | const Expr& Query::expr() const { return impl().expr(); } 106 | const std::vector& Query::variables() const { return impl().variables(); } 107 | 108 | template 109 | class StaticArray : public std::array { 110 | public: 111 | StaticArray() noexcept : array() { fill(FillByte); } 112 | }; 113 | 114 | // Private read-only full and empty bitmap. Used for EvaluationContext::MissingPolicy; 115 | static const StaticArray(0x00)> kEmptyBitmap; 116 | static const StaticArray(0xFF)> kFullBitmap; 117 | 118 | using MissingPolicy = EvaluationContext::MissingPolicy; 119 | 120 | static inline const char* CoalesceInputPointer(const char* input, 121 | const std::string& variable, 122 | MissingPolicy policy) { 123 | if (input != nullptr) { 124 | return input; 125 | } 126 | 127 | switch (policy) { 128 | case MissingPolicy::ERROR: 129 | throw Exception("Missing pointer for bitmap '", variable, ","); 130 | case MissingPolicy::REPLACE_WITH_EMPTY: 131 | return kEmptyBitmap.data(); 132 | case MissingPolicy::REPLACE_WITH_FULL: 133 | return kFullBitmap.data(); 134 | } 135 | 136 | throw Exception("Unreachable in ", __FUNCTION__); 137 | } 138 | 139 | int32_t Query::Eval(const EvaluationContext& eval_ctx, std::vector inputs, 140 | char* output) { 141 | auto vars = variables(); 142 | 143 | JITMAP_PRE_EQ(vars.size(), inputs.size()); 144 | JITMAP_PRE_NE(output, nullptr); 145 | 146 | auto policy = eval_ctx.missing_policy(); 147 | for (size_t i = 0; i < inputs.size(); i++) { 148 | if (inputs[i] == nullptr) { 149 | inputs[i] = CoalesceInputPointer(inputs[i], vars[i], policy); 150 | } 151 | } 152 | 153 | if (eval_ctx.popcount()) { 154 | auto eval_fn = impl().dense_eval_popct_fn(); 155 | return eval_fn(inputs.data(), output); 156 | } 157 | 158 | auto eval_fn = impl().dense_eval_fn(); 159 | eval_fn(inputs.data(), output); 160 | return kUnknownPopCount; 161 | } 162 | 163 | int32_t Query::Eval(std::vector inputs, char* output) { 164 | EvaluationContext ctx; 165 | return Eval(ctx, std::move(inputs), output); 166 | } 167 | 168 | int32_t Query::EvalUnsafe(const EvaluationContext& eval_ctx, 169 | std::vector& inputs, char* output) { 170 | if (eval_ctx.popcount()) { 171 | auto eval_fn = impl().dense_eval_popct_fn(); 172 | return eval_fn(inputs.data(), output); 173 | } 174 | 175 | auto eval_fn = impl().dense_eval_fn(); 176 | eval_fn(inputs.data(), output); 177 | return kUnknownPopCount; 178 | } 179 | 180 | } // namespace query 181 | } // namespace jitmap 182 | -------------------------------------------------------------------------------- /tests/BenchmarkCMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 2.8.2) 16 | 17 | project(googlebenchmark-download NONE) 18 | 19 | include(ExternalProject) 20 | ExternalProject_Add(googlebenchmark 21 | SOURCE_DIR "${CMAKE_SOURCE_DIR}/vendor/benchmark" 22 | BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark-build" 23 | CONFIGURE_COMMAND "" 24 | BUILD_COMMAND "" 25 | INSTALL_COMMAND "" 26 | TEST_COMMAND "" 27 | ) 28 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | function(unit_test test_name) 16 | set(options) 17 | set(one_value_args) 18 | set(multi_value_args SOURCES) 19 | cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) 20 | 21 | if (ARG_SOURCES) 22 | set(SOURCES ${ARG_SOURCES}) 23 | else() 24 | set(SOURCES ${test_name}.cc) 25 | endif() 26 | 27 | add_executable(${test_name} ${SOURCES}) 28 | target_link_libraries(${test_name} jitmap gtest gtest_main gmock) 29 | target_compile_options(${test_name} PRIVATE ${CXX_WARNING_FLAGS}) 30 | add_test(${test_name} ${test_name}) 31 | endfunction() 32 | 33 | function(benchmark benchmark_name) 34 | set(options) 35 | set(one_value_args) 36 | set(multi_value_args SOURCES) 37 | cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) 38 | 39 | if (ARG_SOURCES) 40 | set(SOURCES ${ARG_SOURCES}) 41 | else() 42 | set(SOURCES ${benchmark_name}.cc) 43 | endif() 44 | 45 | add_executable(${benchmark_name} ${SOURCES}) 46 | target_link_libraries(${benchmark_name} jitmap benchmark benchmark_main) 47 | target_compile_options(${benchmark_name} PRIVATE ${CXX_WARNING_FLAGS}) 48 | add_test(${benchmark_name} ${benchmark_name}) 49 | endfunction() 50 | 51 | unit_test(bitset_test) 52 | unit_test(jitmap_test) 53 | benchmark(jitmap_benchmark) 54 | 55 | add_subdirectory(query) 56 | -------------------------------------------------------------------------------- /tests/GTestCMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 2.8.2) 16 | 17 | project(googletest-download NONE) 18 | 19 | include(ExternalProject) 20 | ExternalProject_Add(googletest 21 | SOURCE_DIR "${CMAKE_SOURCE_DIR}/vendor/googletest" 22 | BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-build" 23 | CONFIGURE_COMMAND "" 24 | BUILD_COMMAND "" 25 | INSTALL_COMMAND "" 26 | TEST_COMMAND "" 27 | ) 28 | -------------------------------------------------------------------------------- /tests/bitset_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | using testing::ContainerEq; 25 | 26 | namespace jitmap { 27 | 28 | static constexpr size_t kBitsetSize = 64; 29 | 30 | using BitsetUInt64 = Bitset; 31 | using BitsetConstUInt64 = Bitset; 32 | using BitsetUPtrUInt64 = Bitset>; 33 | 34 | TEST(BitsetTest, make_bitset) { 35 | // Create a bitset from a variable. 36 | uint64_t stack_ptr = 0x000000000000000F; 37 | auto stack = make_bitset<64>(&stack_ptr); 38 | EXPECT_EQ(stack.count(), 4); 39 | 40 | // Create a bitset from a const variable. 41 | const uint64_t const_stack_ptr = 0x00000000000000FF; 42 | auto const_stack = make_bitset<64>(&const_stack_ptr); 43 | EXPECT_EQ(const_stack.count(), 8); 44 | 45 | constexpr size_t kArraySize = 4; 46 | 47 | // Create a bitset from a stack array. 48 | uint64_t stack_array_ptr[kArraySize] = {UINT64_MAX, 0ULL, UINT64_MAX, 0ULL}; 49 | auto stack_array = make_bitset<64 * kArraySize>(stack_array_ptr); 50 | EXPECT_EQ(stack_array.count(), 64 * 2); 51 | 52 | // Create a bitset from a heap array. 53 | uint64_t* heap_array_ptr = new uint64_t[kArraySize]; 54 | memset(heap_array_ptr, 0, sizeof(uint64_t) * kArraySize); 55 | heap_array_ptr[1] = 0xFF00000000000000; 56 | auto heap_array = make_bitset<64 * kArraySize>(heap_array_ptr); 57 | EXPECT_EQ(heap_array.count(), 8); 58 | delete[] heap_array_ptr; 59 | 60 | // Create a bitset from a unique_ptr array. 61 | auto uniq = std::make_unique(kArraySize); 62 | uniq[2] = 0xFF00FF00FF00FF00; 63 | // The bitset takes ownership 64 | auto uniq_bitset = make_bitset<64 * kArraySize>(std::move(uniq)); 65 | EXPECT_EQ(uniq_bitset.count(), 32); 66 | } 67 | 68 | TEST(BitsetTest, StackUInt64) { 69 | const uint64_t no_bits = 0ULL; 70 | auto empty = make_bitset<64>(&no_bits); 71 | EXPECT_EQ(empty, empty); 72 | EXPECT_EQ(empty.size(), kBitsetSize); 73 | EXPECT_EQ(empty.count(), 0); 74 | EXPECT_FALSE(empty.all()); 75 | EXPECT_FALSE(empty.any()); 76 | EXPECT_TRUE(empty.none()); 77 | 78 | const uint64_t all_bits = UINT64_MAX; 79 | auto full = make_bitset<64>(&all_bits); 80 | EXPECT_EQ(full, full); 81 | EXPECT_EQ(full.size(), kBitsetSize); 82 | EXPECT_EQ(full.count(), kBitsetSize); 83 | EXPECT_TRUE(full.all()); 84 | EXPECT_TRUE(full.any()); 85 | EXPECT_FALSE(full.none()); 86 | 87 | const uint64_t some_bits = 0xF0F0F0F0F0F0F0F0; 88 | auto some = make_bitset<64>(&some_bits); 89 | EXPECT_EQ(some, some); 90 | EXPECT_EQ(some.size(), kBitsetSize); 91 | EXPECT_EQ(some.count(), 32); 92 | EXPECT_FALSE(some.all()); 93 | EXPECT_TRUE(some.any()); 94 | EXPECT_FALSE(some.none()); 95 | 96 | const uint32_t other_bits[2] = {0x0F0F0F0F, 0x0F0F0F0F}; 97 | auto other = make_bitset<64>(&other_bits); 98 | EXPECT_EQ(other, other); 99 | EXPECT_EQ(other.size(), kBitsetSize); 100 | EXPECT_EQ(other.count(), 32); 101 | EXPECT_FALSE(other.all()); 102 | EXPECT_TRUE(other.any()); 103 | EXPECT_FALSE(other.none()); 104 | 105 | EXPECT_NE(empty, full); 106 | EXPECT_NE(empty, some); 107 | EXPECT_NE(empty, other); 108 | EXPECT_NE(full, some); 109 | EXPECT_NE(full, other); 110 | EXPECT_NE(some, other); 111 | } 112 | 113 | TEST(BitsetTest, Operations) { 114 | const uint64_t no_bits = 0ULL; 115 | auto empty = make_bitset<64>(&no_bits); 116 | 117 | const uint64_t all_bits = UINT64_MAX; 118 | auto full = make_bitset<64>(&all_bits); 119 | 120 | uint64_t result_bits = 0ULL; 121 | auto result = make_bitset<64>(&result_bits); 122 | 123 | // Note that the storage types are different, the inputs are const, e.g. the 124 | // following will not compile. 125 | // 126 | // full &= empty; 127 | // empty |= full; 128 | // 129 | // Thus we validate that we can compare of Bitset from other pointer types 130 | // even if they're considered read-only, as long as the size match. 131 | 132 | result &= empty; 133 | ASSERT_EQ(result_bits, 0); 134 | 135 | result &= full; 136 | ASSERT_EQ(result_bits, 0); 137 | 138 | result |= full; 139 | ASSERT_EQ(result_bits, UINT64_MAX); 140 | 141 | auto not_empty = ~empty; 142 | EXPECT_TRUE(not_empty.all()); 143 | EXPECT_FALSE(not_empty.none()); 144 | EXPECT_EQ(not_empty, full); 145 | 146 | auto empty_or_full = empty | full; 147 | EXPECT_TRUE(empty_or_full.all()); 148 | EXPECT_FALSE(empty_or_full.none()); 149 | EXPECT_EQ(empty_or_full, full); 150 | 151 | auto empty_and_full = empty & full; 152 | EXPECT_FALSE(empty_and_full.all()); 153 | EXPECT_TRUE(empty_and_full.none()); 154 | EXPECT_EQ(empty_and_full, empty); 155 | 156 | auto empty_xor_full = empty ^ full; 157 | EXPECT_TRUE(empty_xor_full.all()); 158 | EXPECT_FALSE(empty_xor_full.none()); 159 | EXPECT_EQ(empty_xor_full, full); 160 | 161 | auto full_xor_full = full ^ full; 162 | EXPECT_FALSE(full_xor_full.all()); 163 | EXPECT_TRUE(full_xor_full.none()); 164 | EXPECT_EQ(full_xor_full, empty); 165 | } 166 | 167 | TEST(BitsetTest, ErrorOnNullPtrConstructor) { 168 | EXPECT_THROW(BitsetUInt64 null_u64_bitset{nullptr}, Exception); 169 | EXPECT_THROW(BitsetConstUInt64 null_const_u64_bitset{nullptr}, Exception); 170 | EXPECT_THROW(BitsetUPtrUInt64 null_uptr_u64_bitset{nullptr}, Exception); 171 | } 172 | 173 | template 174 | class alignas(kCacheLineSize) BitsetStorage 175 | : public std::array { 176 | public: 177 | BitsetStorage(bool value = false) { 178 | memset(this->data(), value ? 0xFF : 0x00, N / CHAR_BIT); 179 | } 180 | }; 181 | 182 | template 184 | void EnsureAligmentAndSize() { 185 | static_assert(ExpectedAlignment == ActualAlignment, "alignment don't match!"); 186 | static_assert(ExpectedSize == ActualSize, "size don't match!"); 187 | } 188 | 189 | TEST(BitsetTest, AlignmentAndSize) { 190 | EnsureAligmentAndSize, kCacheLineSize, kBytesPerContainer>(); 191 | } 192 | 193 | const BitsetStorage empty_store; 194 | const BitsetStorage full_store{true}; 195 | 196 | TEST(BitsetTest, BitStorage) { 197 | auto empty = make_bitset(empty_store.data()); 198 | EXPECT_EQ(empty, empty); 199 | EXPECT_EQ(empty.size(), kBitsPerContainer); 200 | EXPECT_EQ(empty.count(), 0); 201 | EXPECT_FALSE(empty.all()); 202 | EXPECT_FALSE(empty.any()); 203 | EXPECT_TRUE(empty.none()); 204 | 205 | auto full = make_bitset(full_store.data()); 206 | EXPECT_EQ(full, full); 207 | EXPECT_EQ(full.size(), kBitsPerContainer); 208 | EXPECT_EQ(full.count(), kBitsPerContainer); 209 | EXPECT_TRUE(full.all()); 210 | EXPECT_TRUE(full.any()); 211 | EXPECT_FALSE(full.none()); 212 | } 213 | 214 | } // namespace jitmap 215 | -------------------------------------------------------------------------------- /tests/jitmap_benchmark.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace jitmap { 29 | 30 | enum PopCountOption { 31 | WithoutPopCount = 0, 32 | WithPopCount, 33 | }; 34 | 35 | template 36 | class TreeVisitorFunctor { 37 | public: 38 | explicit TreeVisitorFunctor(size_t n_inputs) { inputs.resize(n_inputs); } 39 | 40 | int32_t operator()() { 41 | output = inputs[0]; 42 | 43 | auto n_inputs = inputs.size(); 44 | for (size_t i = 1; i < n_inputs; i++) { 45 | output &= inputs[i]; 46 | } 47 | 48 | return Opt == WithPopCount ? output.count() : output.all(); 49 | } 50 | 51 | private: 52 | std::vector> inputs; 53 | std::bitset output; 54 | }; 55 | 56 | template 57 | class JitFunctor { 58 | public: 59 | explicit JitFunctor(size_t n_inputs) { 60 | auto query_name = util::StaticFmt("and_", n_inputs); 61 | query = query::Query::Make(query_name, QueryForInputs(n_inputs), &engine); 62 | ctx.set_popcount(Opt == WithPopCount); 63 | 64 | bitmaps.resize(n_inputs); 65 | for (const auto& input : bitmaps) inputs.push_back(input.data()); 66 | } 67 | 68 | int32_t operator()() { return query->EvalUnsafe(ctx, inputs, output.data()); } 69 | 70 | std::string QueryForInputs(size_t n) { 71 | std::stringstream ss; 72 | 73 | ss << "i_0"; 74 | for (size_t i = 1; i < n; i++) { 75 | ss << " & " 76 | << "i_" << i; 77 | } 78 | 79 | return ss.str(); 80 | } 81 | 82 | private: 83 | std::shared_ptr query; 84 | query::EvaluationContext ctx; 85 | query::ExecutionContext engine{query::JitEngine::Make()}; 86 | 87 | std::vector> bitmaps; 88 | std::vector inputs; 89 | aligned_array output; 90 | }; 91 | 92 | template 93 | static void BasicBenchmark(benchmark::State& state) { 94 | auto n_bitmaps = static_cast(state.range(0)); 95 | std::vector> bitmaps{n_bitmaps}; 96 | aligned_array output; 97 | 98 | std::vector inputs; 99 | for (const auto& input : bitmaps) inputs.push_back(input.data()); 100 | 101 | ComputeFunctor compute{n_bitmaps}; 102 | for (auto _ : state) { 103 | benchmark::DoNotOptimize(compute()); 104 | } 105 | 106 | state.SetBytesProcessed(kBytesPerContainer * n_bitmaps * state.iterations()); 107 | } 108 | 109 | BENCHMARK_TEMPLATE(BasicBenchmark, TreeVisitorFunctor) 110 | ->RangeMultiplier(2) 111 | ->Range(2, 8); 112 | BENCHMARK_TEMPLATE(BasicBenchmark, TreeVisitorFunctor) 113 | ->RangeMultiplier(2) 114 | ->Range(2, 8); 115 | BENCHMARK_TEMPLATE(BasicBenchmark, JitFunctor) 116 | ->RangeMultiplier(2) 117 | ->Range(2, 8); 118 | BENCHMARK_TEMPLATE(BasicBenchmark, JitFunctor) 119 | ->RangeMultiplier(2) 120 | ->Range(2, 8); 121 | 122 | } // namespace jitmap 123 | -------------------------------------------------------------------------------- /tests/jitmap_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | using testing::ContainerEq; 21 | 22 | namespace jitmap { 23 | 24 | static_assert(sizeof(ProxyBitmap) == sizeof(uint64_t), "ProxyBitmap must fit in 64bits"); 25 | 26 | TEST(JitmapTest, Basic) { DenseContainer dense; } 27 | } // namespace jitmap 28 | -------------------------------------------------------------------------------- /tests/query/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | unit_test(query_compiler_test SOURCES compiler_test.cc) 16 | unit_test(query_expr_test SOURCES expr_test.cc) 17 | unit_test(query_matcher_test SOURCES matcher_test.cc) 18 | unit_test(query_optimizer_test SOURCES optimizer_test.cc) 19 | unit_test(query_parser_test SOURCES parser_test.cc) 20 | unit_test(query_query_test SOURCES query_test.cc) 21 | -------------------------------------------------------------------------------- /tests/query/compiler_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "../query_test.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | namespace jitmap { 25 | namespace query { 26 | 27 | class JitTest : public QueryTest { 28 | public: 29 | void AssertQueryResult(const std::string& query_expr, std::vector input_words, 30 | char output_word) { 31 | auto query = Query::Make(query_name(), query_expr, &ctx); 32 | 33 | auto [_, inputs] = InitInputs(input_words); 34 | JITMAP_UNUSED(_); 35 | 36 | auto variables = query->variables(); 37 | EXPECT_EQ(inputs.size(), variables.size()); 38 | 39 | aligned_array output; 40 | EXPECT_THAT(output, testing::Each(0UL)); 41 | 42 | query->Eval(std::move(inputs), output.data()); 43 | EXPECT_THAT(output, testing::Each(output_word)); 44 | } 45 | 46 | private: 47 | std::string query_name() { return "query_" + std::to_string(id++); } 48 | 49 | std::tuple>, 50 | std::vector> 51 | InitInputs(std::vector input_words) { 52 | size_t n_bitmaps = input_words.size(); 53 | 54 | std::vector> bitmaps(n_bitmaps); 55 | std::vector inputs(n_bitmaps); 56 | for (size_t i = 0; i < n_bitmaps; i++) { 57 | auto repeated_word = input_words[i]; 58 | auto& bitmap = bitmaps[i]; 59 | std::fill(bitmap.begin(), bitmap.end(), repeated_word); 60 | inputs[i] = bitmap.data(); 61 | EXPECT_THAT(bitmaps[i], testing::Each(repeated_word)); 62 | } 63 | 64 | return {std::move(bitmaps), std::move(inputs)}; 65 | } 66 | 67 | protected: 68 | std::atomic id = 0; 69 | ExecutionContext ctx{JitEngine::Make()}; 70 | }; 71 | 72 | TEST_F(JitTest, CpuDetection) { 73 | EXPECT_NE(ctx.jit()->GetTargetCPU(), ""); 74 | EXPECT_NE(ctx.jit()->GetTargetTriple(), ""); 75 | } 76 | 77 | TEST_F(JitTest, CompileAndExecuteTest) { 78 | char full = 0xFF; 79 | char empty = 0x0; 80 | 81 | char a = 0b00010010; 82 | char b = 0b11001000; 83 | char c = 0b00000001; 84 | char d = 0b11111111; 85 | char e = 0b11111110; 86 | 87 | AssertQueryResult("!a", {a}, ~a); 88 | AssertQueryResult("a & b", {a, b}, a & b); 89 | AssertQueryResult("a | b", {a, b}, a | b); 90 | AssertQueryResult("a ^ b", {a, b}, a ^ b); 91 | 92 | AssertQueryResult("full ^ b", {full, b}, full ^ b); 93 | AssertQueryResult("empty | !empty", {empty}, full); 94 | 95 | AssertQueryResult("a & b & c & d & e", {a, b, c, d, e}, a & b & c & d & e); 96 | AssertQueryResult("a | b | c | d | e", {a, b, c, d, e}, a | b | c | d | e); 97 | 98 | // Complex re-use of inputs 99 | AssertQueryResult("(a | b) & (((!a & c) | (d & b)) ^ (!e & b))", {a, b, c, d, e}, 100 | (a | b) & (((~a & c) | (d & b)) ^ (~e & b))); 101 | } 102 | 103 | } // namespace query 104 | } // namespace jitmap 105 | -------------------------------------------------------------------------------- /tests/query/expr_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "../query_test.h" 16 | 17 | #include 18 | #include 19 | 20 | namespace jitmap { 21 | namespace query { 22 | 23 | class ExprTest : public QueryTest {}; 24 | 25 | TEST_F(ExprTest, Equals) { 26 | // Ensure pointer is different than the builder's ptr. 27 | FullBitmapExpr f; 28 | EmptyBitmapExpr e; 29 | 30 | ExprEq(&f, &f); 31 | ExprEq(Full(), &f); 32 | ExprEq(&e, Empty()); 33 | 34 | ExprEq(Var("a"), Var("a")); 35 | ExprNe(Var("b"), Var("c")); 36 | 37 | ExprEq(Not(&f), Not(Full())); 38 | ExprNe(Not(&e), Not(&f)); 39 | 40 | ExprEq(And(Var("0"), Or(Var("a"), &f)), And(Var("0"), Or(Var("a"), &f))); 41 | } 42 | 43 | TEST_F(ExprTest, EqualsNotCommutative) { 44 | ExprNe(And(Full(), Empty()), And(Empty(), Full())); 45 | } 46 | 47 | using testing::ElementsAre; 48 | 49 | template 50 | void ReferencesAre(Expr* expr, T... names) { 51 | EXPECT_THAT(expr->Variables(), ElementsAre(names...)); 52 | } 53 | 54 | template 55 | void ReferencesAre(const std::string& query, T... names) { 56 | ExprBuilder builder; 57 | auto expr = Parse(query, &builder); 58 | EXPECT_THAT(expr->Variables(), ElementsAre(names...)); 59 | } 60 | 61 | TEST_F(ExprTest, Variables) { 62 | ReferencesAre("a", "a"); 63 | ReferencesAre("a ^ b", "a", "b"); 64 | ReferencesAre("a ^ (b | $0)", "a", "b"); 65 | } 66 | 67 | } // namespace query 68 | } // namespace jitmap 69 | -------------------------------------------------------------------------------- /tests/query/matcher_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "../query_test.h" 16 | 17 | #include 18 | 19 | namespace jitmap { 20 | namespace query { 21 | 22 | class MatcherTest : public QueryTest { 23 | public: 24 | template 25 | void ExpectMatch(const Matcher& m, E e) { 26 | EXPECT_TRUE(m(e)); 27 | } 28 | 29 | template 30 | void ExpectNoMatch(const Matcher& m, E e) { 31 | EXPECT_FALSE(m(e)); 32 | } 33 | }; 34 | 35 | TEST_F(MatcherTest, TypeMatcher) { 36 | TypeMatcher or_matcher{Expr::OR_OPERATOR}; 37 | ExpectMatch(or_matcher, Or(Full(), Empty())); 38 | ExpectNoMatch(or_matcher, And(Full(), Full())); 39 | ExpectNoMatch(or_matcher, Full()); 40 | 41 | TypeMatcher full_and{Expr::FULL_LITERAL, Expr::AND_OPERATOR}; 42 | ExpectMatch(full_and, Full()); 43 | ExpectMatch(full_and, And(Empty(), Var("a"))); 44 | ExpectNoMatch(full_and, Empty()); 45 | ExpectNoMatch(full_and, Or(Full(), Full())); 46 | 47 | TypeMatcher constant{Expr::FULL_LITERAL, Expr::EMPTY_LITERAL}; 48 | ExpectMatch(constant, Full()); 49 | ExpectMatch(constant, Empty()); 50 | ExpectNoMatch(constant, Var("a")); 51 | ExpectNoMatch(constant, Or(Full(), Var("a"))); 52 | } 53 | 54 | TEST_F(MatcherTest, OperandMatcher) { 55 | TypeMatcher constant{Expr::FULL_LITERAL, Expr::EMPTY_LITERAL}; 56 | OperandMatcher any_constant{&constant}; 57 | 58 | // Should only match operators 59 | ExpectNoMatch(any_constant, Full()); 60 | ExpectNoMatch(any_constant, Empty()); 61 | ExpectNoMatch(any_constant, Var("a")); 62 | 63 | ExpectMatch(any_constant, Not(Full())); 64 | ExpectMatch(any_constant, Or(Empty(), Var("a"))); 65 | ExpectMatch(any_constant, And(Not(Var("b")), Full())); 66 | 67 | ExpectNoMatch(any_constant, Xor(Var("a"), Not(Full()))); 68 | } 69 | 70 | TEST_F(MatcherTest, ChainMatcher) { 71 | // Checks that the expression is a NotExpr, and then checks if the operand 72 | // is also a NotExpr 73 | auto not_matcher = TypeMatcher{Expr::NOT_OPERATOR}; 74 | auto operand_not_matcher = OperandMatcher{¬_matcher}; 75 | auto not_not_matcher = ChainMatcher{¬_matcher, &operand_not_matcher}; 76 | ExpectMatch(not_not_matcher, Not(Not(Full()))); 77 | ExpectMatch(not_not_matcher, Not(Not(Not(Full())))); 78 | ExpectNoMatch(not_not_matcher, Not(Full())); 79 | ExpectNoMatch(not_not_matcher, Or(Not(Not(Full())), Empty())); 80 | 81 | // Equivalent to TypeMatcher of constants 82 | auto full = TypeMatcher{Expr::FULL_LITERAL}; 83 | auto empty = TypeMatcher{Expr::EMPTY_LITERAL}; 84 | auto const_matcher = ChainMatcher{{&full, &empty}, ChainMatcher::Mode::ANY}; 85 | ExpectMatch(const_matcher, Full()); 86 | ExpectMatch(const_matcher, Empty()); 87 | ExpectNoMatch(const_matcher, Not(Empty())); 88 | ExpectNoMatch(const_matcher, Or(Full(), Empty())); 89 | ExpectNoMatch(const_matcher, Var("a")); 90 | } 91 | 92 | } // namespace query 93 | } // namespace jitmap 94 | -------------------------------------------------------------------------------- /tests/query/optimizer_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "../query_test.h" 16 | 17 | #include 18 | 19 | namespace jitmap { 20 | namespace query { 21 | 22 | class OptimizationTest : public QueryTest { 23 | public: 24 | Expr* e = Var("e"); 25 | Expr* f = Or(And(Var("a"), Not(Var("b"))), Var("c")); 26 | 27 | protected: 28 | void ExpectOpt(OptimizationPass& p, Expr* input, Expr* expected) { 29 | ExprEq(p(input), expected); 30 | } 31 | 32 | void ExpectOpt(Optimizer& o, Expr* input, Expr* expected) { 33 | ExprEq(o.Optimize(*input), expected); 34 | } 35 | }; 36 | 37 | TEST_F(OptimizationTest, ConstantFolding) { 38 | ConstantFolding cf(&expr_builder_); 39 | 40 | ExpectOpt(cf, Not(Full()), Empty()); 41 | ExpectOpt(cf, Not(Empty()), Full()); 42 | ExpectOpt(cf, Not(e), Not(e)); 43 | 44 | ExpectOpt(cf, And(Full(), Empty()), Empty()); 45 | ExpectOpt(cf, And(e, Empty()), Empty()); 46 | ExpectOpt(cf, And(Full(), e), e); 47 | ExpectOpt(cf, And(e, f), And(e, f)); 48 | 49 | ExpectOpt(cf, Or(Full(), Empty()), Full()); 50 | ExpectOpt(cf, Or(e, Empty()), e); 51 | ExpectOpt(cf, Or(Full(), e), Full()); 52 | ExpectOpt(cf, Or(e, f), Or(e, f)); 53 | 54 | ExpectOpt(cf, Xor(e, Empty()), e); 55 | ExpectOpt(cf, Xor(Full(), e), Not(e)); 56 | ExpectOpt(cf, Xor(e, f), Xor(e, f)); 57 | } 58 | 59 | TEST_F(OptimizationTest, SameOperandFolding) { 60 | SameOperandFolding so(&expr_builder_); 61 | 62 | ExpectOpt(so, And(e, e), e); 63 | ExpectOpt(so, And(e, f), And(e, f)); 64 | 65 | ExpectOpt(so, Or(e, e), e); 66 | ExpectOpt(so, Or(e, f), Or(e, f)); 67 | 68 | ExpectOpt(so, Xor(e, e), Empty()); 69 | ExpectOpt(so, Xor(e, f), Xor(e, f)); 70 | } 71 | 72 | TEST_F(OptimizationTest, NotChainFolding) { 73 | NotChainFolding nc(&expr_builder_); 74 | 75 | ExpectOpt(nc, e, e); 76 | ExpectOpt(nc, Not(e), Not(e)); 77 | ExpectOpt(nc, Not(Not(e)), e); 78 | ExpectOpt(nc, Not(Not(Not(e))), Not(e)); 79 | ExpectOpt(nc, Not(Not(Not(Not(e)))), e); 80 | ExpectOpt(nc, Not(Not(Not(Not(Not(e))))), Not(e)); 81 | ExpectOpt(nc, Not(Not(Not(Not(Not(Not(e)))))), e); 82 | } 83 | 84 | TEST_F(OptimizationTest, Optimizer) { 85 | Optimizer opt(&expr_builder_); 86 | 87 | ExpectOpt(opt, e, e); 88 | 89 | // ConstantFolding 90 | ExpectOpt(opt, Not(Full()), Empty()); 91 | // SameOperandFolding 92 | ExpectOpt(opt, And(e, e), e); 93 | // NotChainFolding 94 | ExpectOpt(opt, Not(Not(e)), e); 95 | 96 | // A mixed bag 97 | ExpectOpt(opt, And(e, Or(e, Not(Not(Not(Full()))))), e); 98 | } 99 | 100 | } // namespace query 101 | } // namespace jitmap 102 | -------------------------------------------------------------------------------- /tests/query/parser_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include "../query_test.h" 21 | #include "../../src/jitmap/query/parser_internal.h" 22 | 23 | using testing::ElementsAre; 24 | 25 | namespace jitmap { 26 | namespace query { 27 | 28 | class LexerTest : public QueryTest { 29 | protected: 30 | // Shortcuts 31 | auto Empty() { return Token::Empty(); } 32 | auto Full() { return Token::Full(); } 33 | auto Var(std::string_view s) { return Token::Var(s); } 34 | auto Left() { return Token::LeftParen(); } 35 | auto Right() { return Token::RightParen(); } 36 | auto Not() { return Token::NotOp(); } 37 | auto And() { return Token::AndOp(); } 38 | auto Or() { return Token::OrOp(); } 39 | auto Xor() { return Token::XorOp(); } 40 | 41 | template 42 | void ExpectTokenize(std::string_view query, T... tokens) { 43 | EXPECT_THAT(Lex(query), ElementsAre(tokens...)); 44 | } 45 | 46 | void ExpectThrow(std::string_view query) { EXPECT_ANY_THROW(Lex(query)); } 47 | 48 | std::vector Lex(std::string_view query) { 49 | std::vector tokens; 50 | 51 | Lexer lexer{query}; 52 | for (Token token = lexer.Next(); token.type() != Token::END_OF_STREAM; 53 | token = lexer.Next()) { 54 | tokens.push_back(token); 55 | } 56 | 57 | return tokens; 58 | } 59 | }; 60 | 61 | class ParserTest : public QueryTest { 62 | protected: 63 | void ExpectParse(std::string_view query, Expr* expected) { 64 | ExprEq(Parse(query), expected); 65 | } 66 | 67 | void ExpectThrow(std::string_view query) { EXPECT_ANY_THROW(Parse(query)); } 68 | }; 69 | 70 | TEST_F(LexerTest, Basic) { 71 | ExpectTokenize("(", Left()); 72 | ExpectTokenize(")", Right()); 73 | ExpectTokenize("()", Left(), Right()); 74 | ExpectTokenize(")(", Right(), Left()); 75 | 76 | ExpectTokenize("$0", Empty()); 77 | ExpectTokenize("0", Var("0")); 78 | ExpectTokenize(" $0 ", Empty()); 79 | ExpectTokenize("($0)", Left(), Empty(), Right()); 80 | ExpectTokenize("( $0 )", Left(), Empty(), Right()); 81 | 82 | ExpectTokenize("a", Var("a")); 83 | ExpectTokenize("a ", Var("a")); 84 | ExpectTokenize("(a)", Left(), Var("a"), Right()); 85 | ExpectTokenize(" (a) ", Left(), Var("a"), Right()); 86 | 87 | ExpectTokenize("($0 | a) ", Left(), Empty(), Or(), Var("a"), Right()); 88 | 89 | ExpectTokenize("($0 | !a) ", Left(), Empty(), Or(), Not(), Var("a"), Right()); 90 | 91 | ExpectTokenize(" (a &b & 1)\t", Left(), Var("a"), And(), Var("b"), And(), Var("1"), 92 | Right()); 93 | ExpectTokenize("(a&b&1) ", Left(), Var("a"), And(), Var("b"), And(), Var("1"), Right()); 94 | 95 | ExpectTokenize("((a | b) ^ !b) ", Left(), Left(), Var("a"), Or(), Var("b"), Right(), 96 | Xor(), Not(), Var("b"), Right()); 97 | } 98 | 99 | TEST_F(LexerTest, Errors) {} 100 | 101 | TEST_F(ParserTest, Basic) { 102 | ExpectParse("$0", Empty()); 103 | ExpectParse("0", V("0")); 104 | ExpectParse("a", V("a")); 105 | ExpectParse("!a", Not(V("a"))); 106 | ExpectParse("!!a", Not(Not(V("a")))); 107 | 108 | ExpectParse("a & b", And(V("a"), V("b"))); 109 | ExpectParse("$0 ^ !b", Xor(Empty(), Not(V("b")))); 110 | 111 | ExpectParse("(a & b & c) | ($0 & $1 & a)", 112 | Or(And(And(V("a"), V("b")), V("c")), And(And(Empty(), Full()), Var("a")))); 113 | } 114 | 115 | TEST_F(ParserTest, Parenthesis) { 116 | ExpectParse("($1)", Full()); 117 | ExpectParse("(((a)))", Var("a")); 118 | ExpectParse("(!(b))", Not(Var("b"))); 119 | 120 | ExpectParse("a & (b | c)", And(Var("a"), Or(Var("b"), Var("c")))); 121 | ExpectParse("(a & (b & c))", And(Var("a"), And(Var("b"), Var("c")))); 122 | ExpectParse("(a & b) & (c & d)", And(And(Var("a"), Var("b")), And(Var("c"), Var("d")))); 123 | } 124 | 125 | TEST_F(ParserTest, OperatorPrecedence) { 126 | // Default precedence 127 | ExpectParse("a | !b | c", Or(Or(Var("a"), Not(Var("b"))), Var("c"))); 128 | 129 | // Not precede over And precede over Xor precede over Or. 130 | ExpectParse("!a ^ b & c | d", 131 | Or(Xor(Not(Var("a")), And(Var("b"), Var("c"))), Var("d"))); 132 | ExpectParse("a | !b ^ c", Or(Var("a"), Xor(Not(Var("b")), Var("c")))); 133 | ExpectParse("a ^ b & !c", Xor(Var("a"), And(Var("b"), Not(Var("c"))))); 134 | 135 | // Enforce with parenthesis 136 | ExpectParse("a ^ b & (c | d)", Xor(Var("a"), And(Var("b"), Or(Var("c"), Var("d"))))); 137 | } 138 | 139 | TEST_F(ParserTest, Errors) { 140 | // Invalid reference 141 | ExpectThrow("0$"); 142 | ExpectThrow("$a"); 143 | ExpectThrow("(a b)"); 144 | ExpectThrow("(a ! b)"); 145 | 146 | // No expressions 147 | ExpectThrow("()"); 148 | ExpectThrow("(())"); 149 | ExpectThrow("((()))"); 150 | 151 | // Invalid parenthesis 152 | ExpectThrow(")a)"); 153 | ExpectThrow("(a("); 154 | ExpectThrow(")a("); 155 | ExpectThrow("(a"); 156 | ExpectThrow(")a"); 157 | ExpectThrow("a("); 158 | ExpectThrow("a)"); 159 | ExpectThrow("()(a)"); 160 | ExpectThrow("(a)()"); 161 | } 162 | 163 | } // namespace query 164 | } // namespace jitmap 165 | -------------------------------------------------------------------------------- /tests/query/query_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "../query_test.h" 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace jitmap { 22 | namespace query { 23 | 24 | class QueryExecTest : public QueryTest {}; 25 | 26 | ExecutionContext ctx{JitEngine::Make()}; 27 | 28 | using testing::ContainerEq; 29 | using testing::ElementsAre; 30 | 31 | TEST_F(QueryExecTest, MakeInvalidNames) { 32 | EXPECT_THROW(Query::Make("", "!a", &ctx), CompilerException); 33 | EXPECT_THROW(Query::Make("_a", "!a", &ctx), CompilerException); 34 | EXPECT_THROW(Query::Make("^-?", "!a", &ctx), CompilerException); 35 | EXPECT_THROW(Query::Make("herpidy^", "!a", &ctx), CompilerException); 36 | } 37 | 38 | TEST_F(QueryExecTest, MakeInvalidExpressions) { 39 | EXPECT_THROW(Query::Make("valid", "", &ctx), ParserException); 40 | EXPECT_THROW(Query::Make("valid", "a !^ b", &ctx), ParserException); 41 | EXPECT_THROW(Query::Make("valid", "a b", &ctx), ParserException); 42 | } 43 | 44 | TEST_F(QueryExecTest, Make) { 45 | auto q1 = Query::Make("q1", "a & b", &ctx); 46 | EXPECT_EQ(q1->name(), "q1"); 47 | EXPECT_EQ(q1->expr(), And(Var("a"), Var("b"))); 48 | 49 | auto q2 = Query::Make("q2", "a ^ a", &ctx); 50 | EXPECT_EQ(q2->name(), "q2"); 51 | EXPECT_EQ(q2->expr(), Xor(Var("a"), Var("a"))); 52 | } 53 | 54 | TEST_F(QueryExecTest, variables) { 55 | auto not_a = Query::Make("not_a", "!a", &ctx); 56 | EXPECT_THAT(not_a->variables(), ElementsAre("a")); 57 | 58 | auto a_and_b = Query::Make("a_and_b", "a & b", &ctx); 59 | EXPECT_THAT(a_and_b->variables(), ElementsAre("a", "b")); 60 | 61 | auto a_xor_a = Query::Make("a_xor_a", "a ^ a", &ctx); 62 | EXPECT_THAT(a_xor_a->variables(), ElementsAre("a")); 63 | 64 | auto nested = Query::Make("nested", "!a | ((a ^ b) & (c | d) ^ b)", &ctx); 65 | EXPECT_THAT(nested->variables(), ElementsAre("a", "b", "c", "d")); 66 | } 67 | 68 | TEST_F(QueryExecTest, EvalInvalidParameters) { 69 | aligned_array a(0x00); 70 | aligned_array result(0x00); 71 | std::vector inputs{}; 72 | 73 | auto not_a = Query::Make("invalid_param", "!a", &ctx); 74 | std::vector empty; 75 | EXPECT_THROW(not_a->Eval(empty, nullptr), Exception); 76 | EXPECT_THROW(not_a->Eval(empty, result.data()), Exception); 77 | inputs = {nullptr}; 78 | EXPECT_THROW(not_a->Eval(inputs, result.data()), Exception); 79 | } 80 | 81 | TEST_F(QueryExecTest, Eval) { 82 | aligned_array a(0x00); 83 | aligned_array b(0xFF); 84 | aligned_array result(0x00); 85 | std::vector inputs{}; 86 | 87 | auto not_a = Query::Make("single_param", "!a", &ctx); 88 | 89 | inputs = {a.data()}; 90 | not_a->Eval(inputs, result.data()); 91 | EXPECT_THAT(result, testing::Each(0xFF)); 92 | 93 | auto a_and_b = Query::Make("double_param", "a & b", &ctx); 94 | 95 | inputs = {a.data(), b.data()}; 96 | a_and_b->Eval(inputs, result.data()); 97 | EXPECT_THAT(result, testing::Each(0x00)); 98 | 99 | // It can runs with the same input twice. 100 | a_and_b->Eval({b.data(), b.data()}, result.data()); 101 | EXPECT_THAT(result, testing::Each(0xFF)); 102 | } 103 | 104 | using MissingPolicy = EvaluationContext::MissingPolicy; 105 | 106 | TEST_F(QueryExecTest, EvalWithMissingPolicy) { 107 | aligned_array result(0x00); 108 | std::vector inputs{nullptr}; 109 | 110 | auto q = Query::Make("another_not_a", "!a", &ctx); 111 | 112 | EvaluationContext eval_ctx; 113 | EXPECT_THROW(q->Eval(eval_ctx, inputs, result.data()), Exception); 114 | 115 | eval_ctx.set_missing_policy(MissingPolicy::REPLACE_WITH_EMPTY); 116 | q->Eval(eval_ctx, inputs, result.data()); 117 | EXPECT_THAT(result, testing::Each(0xFF)); 118 | 119 | eval_ctx.set_missing_policy(MissingPolicy::REPLACE_WITH_FULL); 120 | q->Eval(eval_ctx, inputs, result.data()); 121 | EXPECT_THAT(result, testing::Each(0x00)); 122 | } 123 | 124 | TEST_F(QueryExecTest, EvalWithPopCount) { 125 | aligned_array a(0x00); 126 | aligned_array result(0x00); 127 | std::vector inputs{a.data()}; 128 | 129 | auto q = Query::Make("yet_not_a", "!a", &ctx); 130 | 131 | EvaluationContext eval_ctx; 132 | EXPECT_EQ(q->Eval(eval_ctx, inputs, result.data()), kUnknownPopCount); 133 | 134 | eval_ctx.set_popcount(true); 135 | EXPECT_EQ(q->Eval(eval_ctx, inputs, result.data()), kBitsPerContainer); 136 | 137 | a.fill(0b00001111); 138 | EXPECT_EQ(q->Eval(eval_ctx, inputs, result.data()), kBitsPerContainer / 2); 139 | 140 | a.fill(~0b00000001); 141 | EXPECT_EQ(q->Eval(eval_ctx, inputs, result.data()), kBitsPerContainer / 8); 142 | } 143 | 144 | } // namespace query 145 | } // namespace jitmap 146 | -------------------------------------------------------------------------------- /tests/query_test.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace jitmap { 28 | namespace query { 29 | 30 | void ExprEq(Expr* actual, Expr* expected) { 31 | ASSERT_NE(actual, nullptr); 32 | ASSERT_NE(expected, nullptr); 33 | EXPECT_EQ(*actual, *expected); 34 | } 35 | 36 | void ExprNe(Expr* lhs, Expr* rhs) { 37 | ASSERT_NE(lhs, nullptr); 38 | ASSERT_NE(rhs, nullptr); 39 | EXPECT_NE(*lhs, *rhs); 40 | } 41 | 42 | class QueryTest : public testing::Test { 43 | public: 44 | Expr* Empty() { return expr_builder_.EmptyBitmap(); } 45 | Expr* Full() { return expr_builder_.FullBitmap(); } 46 | 47 | Expr* V(std::string name) { return expr_builder_.Var(name); } 48 | Expr* Var(std::string name) { return expr_builder_.Var(name); } 49 | 50 | Expr* Not(Expr* operand) { return expr_builder_.Not(operand); } 51 | 52 | Expr* And(Expr* lhs, Expr* rhs) { return expr_builder_.And(lhs, rhs); } 53 | Expr* Or(Expr* lhs, Expr* rhs) { return expr_builder_.Or(lhs, rhs); } 54 | Expr* Xor(Expr* lhs, Expr* rhs) { return expr_builder_.Xor(lhs, rhs); } 55 | 56 | Expr* Parse(std::string_view query) { 57 | return jitmap::query::Parse(query, &expr_builder_); 58 | } 59 | 60 | protected: 61 | ExprBuilder expr_builder_; 62 | }; 63 | 64 | ExprBuilder _g_builder_; 65 | 66 | Expr* operator"" _v(const char* name) { return _g_builder_.Var(std::string(name)); } 67 | Expr* operator"" _q(const char* name) { return Parse(name, &_g_builder_); } 68 | 69 | } // namespace query 70 | } // namespace jitmap 71 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 RStudio, Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | function(tool tool_name) 16 | set(options) 17 | set(one_value_args) 18 | set(multi_value_args SOURCES) 19 | cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) 20 | 21 | if (ARG_SOURCES) 22 | set(SOURCES ${ARG_SOURCES}) 23 | else() 24 | set(SOURCES ${tool_name}.cc) 25 | endif() 26 | 27 | add_executable(${tool_name} ${SOURCES}) 28 | target_link_libraries(${tool_name} jitmap) 29 | target_compile_options(${tool_name} PRIVATE ${CXX_WARNING_FLAGS}) 30 | target_include_directories(${tool_name} PRIVATE 31 | $ 32 | ) 33 | endfunction() 34 | 35 | tool(jitmap-ir SOURCES jitmap_ir.cc) 36 | -------------------------------------------------------------------------------- /tools/jitmap_ir.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 RStudio, Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace query = jitmap::query; 26 | 27 | int main(int argc, char** argv) { 28 | if (argc != 2) { 29 | return 1; 30 | } 31 | 32 | auto query_str = argv[1]; 33 | 34 | try { 35 | query::ExecutionContext context{query::JitEngine::Make()}; 36 | auto query = query::Query::Make("query", query_str, &context); 37 | std::cout << context.jit()->CompileIR(query->name(), query->expr()) << "\n"; 38 | } catch (jitmap::Exception& e) { 39 | std::cerr << "Problem '" << query_str << "' :\n"; 40 | std::cerr << "\t" << e.message() << "\n"; 41 | return 1; 42 | } 43 | 44 | return 0; 45 | } 46 | --------------------------------------------------------------------------------