├── .gitignore ├── LICENSE.txt ├── README.md ├── calculate_average_baseline.sh ├── calculate_average_thomaswue.sh ├── charts ├── incremental_improvement.png ├── instructions_branch_misses.png └── step_by_step_progress.png ├── create_measurements.sh ├── data └── weather_stations.csv ├── measure_all_thomaswue.sh ├── perf_all_thomaswue.sh ├── pom.xml ├── prepare_all_thomaswue.sh ├── prepare_baseline.sh ├── prepare_thomaswue.sh ├── sloc_all.thomaswue.sh └── src ├── main ├── java │ ├── dev │ │ └── morling │ │ │ └── onebrc │ │ │ ├── CalculateAverage_baseline.java │ │ │ ├── CalculateAverage_thomaswue.java │ │ │ ├── CalculateAverage_thomaswue_v1.java │ │ │ ├── CalculateAverage_thomaswue_v10.java │ │ │ ├── CalculateAverage_thomaswue_v2.java │ │ │ ├── CalculateAverage_thomaswue_v3.java │ │ │ ├── CalculateAverage_thomaswue_v4.java │ │ │ ├── CalculateAverage_thomaswue_v5.java │ │ │ ├── CalculateAverage_thomaswue_v6.java │ │ │ ├── CalculateAverage_thomaswue_v7.java │ │ │ ├── CalculateAverage_thomaswue_v8.java │ │ │ ├── CalculateAverage_thomaswue_v9.java │ │ │ └── CreateMeasurements.java │ └── org │ │ └── rschwietzke │ │ ├── CheaperCharBuffer.java │ │ └── FastRandom.java └── resources │ └── .dontdelete └── test └── resources ├── .dontdelete └── samples ├── measurements-1.out ├── measurements-1.txt ├── measurements-10.out ├── measurements-10.txt ├── measurements-10000-unique-keys.out ├── measurements-10000-unique-keys.txt ├── measurements-2.out ├── measurements-2.txt ├── measurements-20.out ├── measurements-20.txt ├── measurements-3.out ├── measurements-3.txt ├── measurements-boundaries.out ├── measurements-boundaries.txt ├── measurements-complex-utf8.out ├── measurements-complex-utf8.txt ├── measurements-dot.out ├── measurements-dot.txt ├── measurements-rounding.out ├── measurements-rounding.txt ├── measurements-short.out ├── measurements-short.txt ├── measurements-shortest.out └── measurements-shortest.txt /.gitignore: -------------------------------------------------------------------------------- 1 | #Maven 2 | target/ 3 | pom.xml.tag 4 | pom.xml.releaseBackup 5 | pom.xml.versionsBackup 6 | release.properties 7 | 8 | # Eclipse 9 | .project 10 | .classpath 11 | .settings/ 12 | bin/ 13 | 14 | # IntelliJ 15 | .idea 16 | *.ipr 17 | *.iml 18 | *.iws 19 | 20 | # NetBeans 21 | nb-configuration.xml 22 | nbactions.xml 23 | 24 | # Visual Studio Code 25 | .vscode 26 | .factorypath 27 | 28 | # OSX 29 | .DS_Store 30 | 31 | # Vim 32 | *.swp 33 | *.swo 34 | 35 | # patch 36 | *.orig 37 | *.rej 38 | 39 | # Local environment 40 | .env 41 | 42 | #JReleaser 43 | out/ 44 | 45 | # 1BRC 46 | /measurements*.txt 47 | /*.out 48 | out_expected.txt 49 | /*-timing.json 50 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Path to the Fastest #1BRC Solution 2 | 3 | This repository contains 10 snapshots of the fastest [One Billion Row Challenge](https://github.com/gunnarmorling/1brc) solution to describe the incremental path to the final version. It outlines the different optimization techniques and provides measurements of their impact. For general information on the contest and all submitted solutions, please visit the main [#1brc repository](https://github.com/gunnarmorling/1brc). 4 | 5 | Also, there is a lot of other great content about the challenge if you are looking for more detailed information, for example a [step by step guide from Marko Topolnik](https://questdb.io/blog/billion-row-challenge-step-by-step/) or a [talk by Roy van Rijn](https://www.youtube.com/watch?v=EFXxXFHpS0M) or a [podcast with Gunnar Morling](https://www.youtube.com/watch?v=RYjB4sGXNZI). 6 | 7 | In the chart and table below, the 10 steps of the fastest solution are outlined. Each intermediate step is checked into the repository as a separate file. The summary chart shows how the optimizations applied in the first few versions made the biggest difference compared to the reference implementation. As the solution got more advanced, the incremental progress got smaller over time, while the complexity of the solution continued to rise: 8 | 9 |

10 | Step-by-Step Progress of the Winning #1BRC Solution 11 |

12 | 13 | 14 | The performance measurements were performed on an Intel 13th Gen Core i9-13900K while restricting the program to the first 8 cores using `taskset -c 0-7`. It is a comparable setup to the contest grading system, with only a few % difference. 15 | 16 | 17 | | Code | SLOC | Time (s) | Delta | Description | 18 | |------------|------|----------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 19 | | [ref](src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java) | 55 | 125.3 | - | Naive single threaded reference implementation. | 20 | | [ref](src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java) | 55 | 115.1 | -8.2% | Exact same code as above, but using the [GraalVM](github.com/oracle/graal) JIT compiler, easy win by just switching the JDK distribution! 21 | | [v1](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java) | 143 | 5.67 | -95.1% | Split input into 8 [segments](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java#L167) and process them in [parallel](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java#L81). Map the file into memory and use [byte buffer](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java#L74) for parsing. Multiply input values by 10 and use [integer](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java#L41) instead of double for intermediate calculations. Use a simple custom [hash table](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java#L72) implementation. | 22 | | [v2](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v2.java) | 165 | 4.24 | -25.2% | [Use sun.misc.Unsafe](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v2.java#L103) instead of byte buffer to avoid bounds checks and indirections when parsing. | 23 | | [v3](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v3.java) | 197 | 3.10 | -26.8% | Perform [scanning](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v3.java#L132) and [collision checking](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v3.java#L193) 4 or 8 bytes at a time. | 24 | | [v4](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v4.java) | 186 | 2.32 | -25.1% | Add [branchless temperature parsing](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v4.java#L207) and [SWAR scanning](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v4.java#L229) for delimiter. | 25 | | [v5](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v5.java) | 257 | 2.15 | -7.3% | Minor code shape improvements to [better specialize](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v5.java#L126) on the cases where the city name is <= 8 bytes or <= 16 bytes. | 26 | | [v6](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v6.java) | 271 | 1.99 | -7.5% | [Reducing hash table size](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v6.java#L117) while still avoiding collisions in the example data set by adjusting the [hash function](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v6.java#L241). | 27 | | [v7](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v7.java) | 307 | 1.84 | -7.5% | Adding trick to [spawn a subprocess](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v7.java#L115) to avoid the long wait until the Linux kernel unmapping of the file takes place. | 28 | | [v8](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v8.java) | 362 | 1.77 | -3.8% | Better work distribution between threads by [processing in 2MB chunks](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v8.java#L278). Improved instruction level parallelism by processing always [three entries in parallel](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v8.java#L304) in the same thread. | 29 | | [v9](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v9.java) | 343 | 1.71 | -3.4% | Save an instruction per row by [folding an addition into the x86 address calculation](https://github.com/thomaswue/1brc-steps/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v9.java#L281) and another one by performing the check on the name length on [the mask value](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v9.java#L182). | 30 | | [v10](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v10.java) | 349 | 1.45 | -15.4% | Remove the branch mispredictions by processing always [16 bytes at a time](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v10.java#L201) and using [masking instead of branches](src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v10.java#L204) to process the city names. | 31 | 32 | GraalVM 33 | ==== 34 | 35 | As shown in the table above, just switching to the [GraalVM](github.com/oracle/graal) JDK distribution and using the Graal JIT compiler gives an 8% speed up for the reference implementation. Many participants therefore used GraalVM for their submissions. As the performance of submissions was getting better to below 10 seconds, also native image with its instant startup characteristics became important. For best performance, the following native image flags were used: 36 | 37 | ``` 38 | -O3 # Running with the highest optimization level enabled. 39 | -H:TuneInlinerExploration=1 # Maximum inlining exploration. 40 | -march=native # Produce machine code best for the target machine. 41 | --gc=epsilon # Disable garbage collection as it is not necessary. 42 | ``` 43 | 44 | Incremental Progress 45 | ==== 46 | 47 | As outlined in the following chart, the improvement of the first version in comparison to the reference implementation was very large with more than 95%. This was primarily due to using parallelism (factor 8) and optimizing data structures (int instead of double values). Those two aspects are often the most important ways to speed up a task. 48 | 49 |

50 | Improvement Over Previous Version 51 |

52 | 53 | 54 | Branch Misses 55 | ==== 56 | 57 | While the incremental improvements between versions became smaller and smaller, the last version makes a big jump while not making any algorithmic or data structure changes. The improvement comes from a big change in instructions per cycle by eliminating branch misses. When looking at the number of instructions executed, the processor actually has to do *more* work, but because of the dramatically improved instruction level parallelism, it can do so in less cycles and therefore less time. The following chart illustrates this: 58 | 59 |

60 | Instructions/Cycle and Branch Misses 61 |

62 | 63 | [This GitHub comment on the PR](https://github.com/gunnarmorling/1brc/pull/702#issuecomment-1920922860) shows the detailed `perf stat` output that underlines this. The important parts are captured in the following simplified output from `perf stat`: 64 | 65 | ``` 66 | BEFORE: 67 | Performance counter stats for './target/CalculateAverage_thomaswue_image': 68 | 12,061.39 msec task-clock 69 | 25,250,300,213 cpu_core/cycles/ 70 | 42,740,848,935 cpu_core/instructions/ # 1.69 insn per cycle 71 | 3,938,912,502 cpu_core/branches/ # 326.572 M/sec 72 | 246,089,883 cpu_core/branch-misses/ # 6.25% of all branches 73 | TopdownL1 (cpu_core) # 24.9 % tma_bad_speculation 74 | 75 | AFTER: 76 | Performance counter stats for './target/CalculateAverage_thomaswue_image': 77 | 9,778.04 msec task-clock 78 | 21,654,496,541 cpu_core/cycles/ 79 | 49,328,285,361 cpu_core/instructions/ # 2.28 insn per cycle 80 | 3,690,652,954 cpu_core/branches/ # 377.443 M/sec 81 | 14,111,445 cpu_core/branch-misses/ # 0.38% of all branches 82 | TopdownL1 (cpu_core) # 2.2 % tma_bad_speculation 83 | ``` 84 | 85 | Instead of missing 6.25% of all branches, the new version misses only 0.38% of all branches. Therefore a major bottleneck of instruction retirement is removed and `tma_bad_speculation` goes from 24.9% down to 2.2%. 86 | 87 | The algorithmic change here was to not distinguish the case of a city name <= 8 characters and a city name <= 16 characters via branching. Instead, the program always reads ahead 16 characters and then uses masking to make sure only the actual city name is used for the calculation of the hash code and comparison with the stored hash key. It does mean that for the case of the city name being <= 8 characters, the processor is doing now more work. However, on average one branch miss every two rows (assuming a random distribution of city names in the input set) is avoided. 88 | 89 | Using `perf stat` and looking at the `cpu_core/branch-misses` as well as the `tma_bad_speculation` data is a good way to understand whether the performance of your program is limited by branches. -------------------------------------------------------------------------------- /calculate_average_baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | JAVA_OPTS="" 19 | java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_baseline 20 | -------------------------------------------------------------------------------- /calculate_average_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | if [ -f target/CalculateAverage_thomaswue_image ]; then 19 | echo "Picking up existing native image 'target/CalculateAverage_thomaswue_image', delete the file to select JVM mode." 1>&2 20 | target/CalculateAverage_thomaswue_image 21 | else 22 | JAVA_OPTS="--enable-preview" 23 | echo "Chosing to run the app in JVM mode as no native image was found, use prepare_thomaswue.sh to generate." 1>&2 24 | java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_thomaswue 25 | fi 26 | 27 | -------------------------------------------------------------------------------- /charts/incremental_improvement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/charts/incremental_improvement.png -------------------------------------------------------------------------------- /charts/instructions_branch_misses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/charts/instructions_branch_misses.png -------------------------------------------------------------------------------- /charts/step_by_step_progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/charts/step_by_step_progress.png -------------------------------------------------------------------------------- /create_measurements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements $1 20 | -------------------------------------------------------------------------------- /measure_all_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | for ((i=1;i<=10;i++)); 19 | do 20 | hyperfine "taskset -c 0-7 ./target/CalculateAverage_thomaswue_image_v$i" 21 | done 22 | -------------------------------------------------------------------------------- /perf_all_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | for ((i=1;i<=10;i++)); 19 | do 20 | perf stat --cpu 0-7 taskset -c 0-7 ./target/CalculateAverage_thomaswue_image_v$i 21 | done 22 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | 22 | dev.morling.demos 23 | average 24 | 1.0.0-SNAPSHOT 25 | 26 | 27 | true 28 | 21 29 | UTF-8 30 | UTF-8 31 | 32 | 33 | My OSS Project 34 | My Latest OSS Project 35 | 2021 36 | tbd. 37 | 38 | 39 | 40 | Apache-2.0 41 | http://www.apache.org/licenses/LICENSE-2.0.txt 42 | repo 43 | 44 | 45 | 46 | 47 | 48 | 49 | org.junit 50 | junit-bom 51 | 5.8.1 52 | pom 53 | import 54 | 55 | 56 | 57 | 58 | 59 | 60 | org.junit.jupiter 61 | junit-jupiter 62 | test 63 | 64 | 65 | org.assertj 66 | assertj-core 67 | 3.21.0 68 | test 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | com.mycila 78 | license-maven-plugin 79 | 4.1 80 | 81 | 82 | net.revelc.code 83 | impsort-maven-plugin 84 | 1.9.0 85 | 86 | java.,javax.,org.,com. 87 | true 88 | true 89 | 90 | 91 | 92 | org.apache.maven.plugins 93 | maven-compiler-plugin 94 | 3.12.1 95 | 96 | true 97 | 98 | 103 | --enable-preview 104 | --add-modules 105 | java.base,jdk.incubator.vector 106 | --add-opens=java.base/java.lang=ALL-UNNAMED 107 | --add-opens=java.base/jdk.internal.util=ALL-UNNAMED 108 | 109 | 110 | 111 | 112 | org.apache.maven.plugins 113 | maven-clean-plugin 114 | 3.1.0 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-deploy-plugin 119 | 3.1.1 120 | 121 | 122 | org.apache.maven.plugins 123 | maven-enforcer-plugin 124 | 3.3.0 125 | 126 | 127 | org.apache.maven.plugins 128 | maven-install-plugin 129 | 3.1.1 130 | 131 | 132 | org.apache.maven.plugins 133 | maven-jar-plugin 134 | 3.2.0 135 | 136 | 137 | org.apache.maven.plugins 138 | maven-resources-plugin 139 | 3.2.0 140 | 141 | 142 | org.apache.maven.plugins 143 | maven-site-plugin 144 | 3.12.1 145 | 146 | 147 | org.apache.maven.plugins 148 | maven-surefire-plugin 149 | 3.2.3 150 | 151 | 152 | org.apache.maven.plugins 153 | maven-wrapper-plugin 154 | 3.2.0 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | qa 163 | 164 | 165 | !quick 166 | 167 | 168 | 169 | 170 | 171 | com.mycila 172 | license-maven-plugin 173 | 174 |
etc/license.txt
175 | true 176 | true 177 | 178 | LICENSE.txt 179 | **/.dontdelete 180 | **/measurements*.txt 181 | **/measurements*.out 182 | out_expected.txt 183 | github_users.txt 184 | 185 | src/main/java/dev/morling/onebrc/CalculateAverage_cliffclick.java 186 | .sdkmanrc 187 | 188 |
189 | 190 | 191 | 192 | check 193 | 194 | 195 | 196 |
197 | 198 | org.apache.maven.plugins 199 | maven-enforcer-plugin 200 | 201 | 202 | enforce-plugin-versions 203 | 204 | enforce 205 | 206 | 207 | 208 | 209 | ${maven.compiler.release} 210 | 211 | 212 | true 213 | true 214 | true 215 | clean,deploy,site 216 | 217 | 218 | 219 | 220 | 221 | 222 |
223 |
224 |
225 | 226 | quick 227 | 228 | 229 | quick 230 | 231 | 232 | 233 | true 234 | 235 | 236 | 237 | jdk22 238 | 239 | 22 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | maven-compiler-plugin 248 | 249 | 22 250 | 251 | ${project.basedir}/src/main/java-22 252 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 |
265 |
266 | -------------------------------------------------------------------------------- /prepare_all_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | source "$HOME/.sdkman/bin/sdkman-init.sh" 19 | sdk use java 21.0.2-graal 1>&2 20 | 21 | # ./mvnw clean verify removes target/ and will re-trigger native image creation. 22 | if [ ! -f target/CalculateAverage_thomaswue_image ]; then 23 | 24 | # Performance tuning flags, optimization level 3, maximum inlining exploration, and compile for the architecture where the native image is generated. 25 | NATIVE_IMAGE_OPTS="-O3 -H:TuneInlinerExploration=1" 26 | 27 | # Need to enable preview for accessing the raw address of the foreign memory access API. 28 | # Initializing the Scanner to make sure the unsafe access object is known as a non-null compile time constant. 29 | NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_thomaswue\$Scanner" 30 | 31 | # There is no need for garbage collection and therefore also no safepoints required. 32 | NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --gc=epsilon -H:-GenLoopSafepoints" 33 | 34 | # Uncomment the following line for outputting the compiler graph to the IdealGraphVisualizer 35 | # NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:MethodFilter=CalculateAverage_thomaswue.* -H:Dump=:2 -H:PrintGraph=Network" 36 | 37 | for ((i=1;i<=10;i++)); 38 | do 39 | native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_thomaswue_image_v$i dev.morling.onebrc.CalculateAverage_thomaswue_v$i 40 | done 41 | fi 42 | -------------------------------------------------------------------------------- /prepare_baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Uncomment below to use sdk 19 | # source "$HOME/.sdkman/bin/sdkman-init.sh" 20 | # sdk use java 21.0.1-graal 1>&2 21 | -------------------------------------------------------------------------------- /prepare_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | source "$HOME/.sdkman/bin/sdkman-init.sh" 19 | sdk use java 21.0.2-graal 1>&2 20 | 21 | # ./mvnw clean verify removes target/ and will re-trigger native image creation. 22 | if [ ! -f target/CalculateAverage_thomaswue_image ]; then 23 | 24 | # Performance tuning flags, optimization level 3, maximum inlining exploration, and compile for the architecture where the native image is generated. 25 | NATIVE_IMAGE_OPTS="-O3 -H:TuneInlinerExploration=1" 26 | 27 | # Need to enable preview for accessing the raw address of the foreign memory access API. 28 | # Initializing the Scanner to make sure the unsafe access object is known as a non-null compile time constant. 29 | NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --enable-preview --initialize-at-build-time=dev.morling.onebrc.CalculateAverage_thomaswue\$Scanner" 30 | 31 | # There is no need for garbage collection and therefore also no safepoints required. 32 | NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS --gc=epsilon -H:-GenLoopSafepoints" 33 | 34 | # Uncomment the following line for outputting the compiler graph to the IdealGraphVisualizer 35 | # NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:MethodFilter=CalculateAverage_thomaswue.* -H:Dump=:2 -H:PrintGraph=Network" 36 | 37 | native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_thomaswue_image dev.morling.onebrc.CalculateAverage_thomaswue 38 | fi 39 | -------------------------------------------------------------------------------- /sloc_all.thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | for ((i=1;i<=10;i++)); 19 | do 20 | cloc ./src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v$i.java 21 | done 22 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import static java.util.stream.Collectors.*; 19 | 20 | import java.io.IOException; 21 | import java.nio.file.Files; 22 | import java.nio.file.Paths; 23 | import java.util.Map; 24 | import java.util.TreeMap; 25 | import java.util.stream.Collector; 26 | 27 | public class CalculateAverage_baseline { 28 | 29 | private static final String FILE = "./measurements.txt"; 30 | 31 | private static record Measurement(String station, double value) { 32 | private Measurement(String[] parts) { 33 | this(parts[0], Double.parseDouble(parts[1])); 34 | } 35 | } 36 | 37 | private static record ResultRow(double min, double mean, double max) { 38 | 39 | public String toString() { 40 | return round(min) + "/" + round(mean) + "/" + round(max); 41 | } 42 | 43 | private double round(double value) { 44 | return Math.round(value * 10.0) / 10.0; 45 | } 46 | }; 47 | 48 | private static class MeasurementAggregator { 49 | private double min = Double.POSITIVE_INFINITY; 50 | private double max = Double.NEGATIVE_INFINITY; 51 | private double sum; 52 | private long count; 53 | } 54 | 55 | public static void main(String[] args) throws IOException { 56 | // Map measurements1 = Files.lines(Paths.get(FILE)) 57 | // .map(l -> l.split(";")) 58 | // .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); 59 | // 60 | // measurements1 = new TreeMap<>(measurements1.entrySet() 61 | // .stream() 62 | // .collect(toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); 63 | // System.out.println(measurements1); 64 | 65 | Collector collector = Collector.of( 66 | MeasurementAggregator::new, 67 | (a, m) -> { 68 | a.min = Math.min(a.min, m.value); 69 | a.max = Math.max(a.max, m.value); 70 | a.sum += m.value; 71 | a.count++; 72 | }, 73 | (agg1, agg2) -> { 74 | var res = new MeasurementAggregator(); 75 | res.min = Math.min(agg1.min, agg2.min); 76 | res.max = Math.max(agg1.max, agg2.max); 77 | res.sum = agg1.sum + agg2.sum; 78 | res.count = agg1.count + agg2.count; 79 | 80 | return res; 81 | }, 82 | agg -> { 83 | return new ResultRow(agg.min, (Math.round(agg.sum * 10.0) / 10.0) / agg.count, agg.max); 84 | }); 85 | 86 | Map measurements = new TreeMap<>(Files.lines(Paths.get(FILE)) 87 | .map(l -> new Measurement(l.split(";"))) 88 | .collect(groupingBy(m -> m.station(), collector))); 89 | 90 | System.out.println(measurements); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.nio.channels.FileChannel; 20 | import java.util.*; 21 | import java.util.concurrent.atomic.AtomicLong; 22 | 23 | /** 24 | * The solution starts a child worker process for the actual work such that clean up of the memory mapping can occur 25 | * while the main process already returns with the result. The worker then memory maps the input file, creates a worker 26 | * thread per available core, and then processes segments of size {@link #SEGMENT_SIZE} at a time. The segments are 27 | * split into 3 parts and cursors for each of those parts are processing the segment simultaneously in the same thread. 28 | * Results are accumulated into {@link Result} objects and a tree map is used to sequentially accumulate the results in 29 | * the end. 30 | * Runs in 0.31 on an Intel i9-13900K while the reference implementation takes 120.37s. 31 | * Credit: 32 | * Quan Anh Mai for branchless number parsing code 33 | * Alfonso² Peterssen for suggesting memory mapping with unsafe and the subprocess idea 34 | * Artsiom Korzun for showing the benefits of work stealing at 2MB segments instead of equal split between workers 35 | * Jaromir Hamala for showing that avoiding the branch misprediction between <8 and 8-16 cases is a big win even if 36 | * more work is performed 37 | * Van Phu DO for demonstrating the lookup tables based on masks instead of bit shifting 38 | */ 39 | public class CalculateAverage_thomaswue { 40 | private static final String FILE = "./measurements.txt"; 41 | private static final int MIN_TEMP = -999; 42 | private static final int MAX_TEMP = 999; 43 | private static final int MAX_NAME_LENGTH = 100; 44 | private static final int MAX_CITIES = 10000; 45 | private static final int SEGMENT_SIZE = 1 << 21; 46 | private static final int HASH_TABLE_SIZE = 1 << 17; 47 | 48 | public static void main(String[] args) throws IOException, InterruptedException { 49 | // Start worker subprocess if this process is not the worker. 50 | if (args.length == 0 || !("--worker".equals(args[0]))) { 51 | spawnWorker(); 52 | return; 53 | } 54 | 55 | int numberOfWorkers = Runtime.getRuntime().availableProcessors(); 56 | try (var fileChannel = FileChannel.open(java.nio.file.Path.of(FILE), java.nio.file.StandardOpenOption.READ)) { 57 | long fileSize = fileChannel.size(); 58 | final long fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); 59 | final long fileEnd = fileStart + fileSize; 60 | final AtomicLong cursor = new AtomicLong(fileStart); 61 | 62 | // Parallel processing of segments. 63 | Thread[] threads = new Thread[numberOfWorkers]; 64 | List[] allResults = new List[numberOfWorkers]; 65 | for (int i = 0; i < threads.length; ++i) { 66 | final int index = i; 67 | threads[i] = new Thread(() -> { 68 | List results = new ArrayList<>(MAX_CITIES); 69 | parseLoop(cursor, fileEnd, fileStart, results); 70 | allResults[index] = results; 71 | }); 72 | threads[i].start(); 73 | } 74 | for (Thread thread : threads) { 75 | thread.join(); 76 | } 77 | 78 | // Final output. 79 | System.out.println(accumulateResults(allResults)); 80 | System.out.close(); 81 | } 82 | } 83 | 84 | private static void spawnWorker() throws IOException { 85 | ProcessHandle.Info info = ProcessHandle.current().info(); 86 | ArrayList workerCommand = new ArrayList<>(); 87 | info.command().ifPresent(workerCommand::add); 88 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); 89 | workerCommand.add("--worker"); 90 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) 91 | .start().getInputStream().transferTo(System.out); 92 | } 93 | 94 | private static TreeMap accumulateResults(List[] allResults) { 95 | TreeMap result = new TreeMap<>(); 96 | for (List resultArr : allResults) { 97 | for (Result r : resultArr) { 98 | Result current = result.putIfAbsent(r.calcName(), r); 99 | if (current != null) { 100 | current.accumulate(r); 101 | } 102 | } 103 | } 104 | return result; 105 | } 106 | 107 | private static void parseLoop(AtomicLong counter, long fileEnd, long fileStart, List collectedResults) { 108 | Result[] results = new Result[HASH_TABLE_SIZE]; 109 | while (true) { 110 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; 111 | if (current >= fileEnd) { 112 | return; 113 | } 114 | 115 | long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); 116 | long segmentStart; 117 | if (current == fileStart) { 118 | segmentStart = current; 119 | } 120 | else { 121 | segmentStart = nextNewLine(current) + 1; 122 | } 123 | 124 | long dist = (segmentEnd - segmentStart) / 3; 125 | long midPoint1 = nextNewLine(segmentStart + dist); 126 | long midPoint2 = nextNewLine(segmentStart + dist + dist); 127 | 128 | Scanner scanner1 = new Scanner(segmentStart, midPoint1); 129 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2); 130 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd); 131 | while (true) { 132 | if (!scanner1.hasNext()) { 133 | break; 134 | } 135 | if (!scanner2.hasNext()) { 136 | break; 137 | } 138 | if (!scanner3.hasNext()) { 139 | break; 140 | } 141 | long word1 = scanner1.getLong(); 142 | long word2 = scanner2.getLong(); 143 | long word3 = scanner3.getLong(); 144 | long delimiterMask1 = findDelimiter(word1); 145 | long delimiterMask2 = findDelimiter(word2); 146 | long delimiterMask3 = findDelimiter(word3); 147 | long word1b = scanner1.getLongAt(scanner1.pos() + 8); 148 | long word2b = scanner2.getLongAt(scanner2.pos() + 8); 149 | long word3b = scanner3.getLongAt(scanner3.pos() + 8); 150 | long delimiterMask1b = findDelimiter(word1b); 151 | long delimiterMask2b = findDelimiter(word2b); 152 | long delimiterMask3b = findDelimiter(word3b); 153 | Result existingResult1 = findResult(word1, delimiterMask1, word1b, delimiterMask1b, scanner1, results, collectedResults); 154 | Result existingResult2 = findResult(word2, delimiterMask2, word2b, delimiterMask2b, scanner2, results, collectedResults); 155 | Result existingResult3 = findResult(word3, delimiterMask3, word3b, delimiterMask3b, scanner3, results, collectedResults); 156 | long number1 = scanNumber(scanner1); 157 | long number2 = scanNumber(scanner2); 158 | long number3 = scanNumber(scanner3); 159 | record(existingResult1, number1); 160 | record(existingResult2, number2); 161 | record(existingResult3, number3); 162 | } 163 | 164 | while (scanner1.hasNext()) { 165 | long word = scanner1.getLong(); 166 | long pos = findDelimiter(word); 167 | long wordB = scanner1.getLongAt(scanner1.pos() + 8); 168 | long posB = findDelimiter(wordB); 169 | record(findResult(word, pos, wordB, posB, scanner1, results, collectedResults), scanNumber(scanner1)); 170 | } 171 | while (scanner2.hasNext()) { 172 | long word = scanner2.getLong(); 173 | long pos = findDelimiter(word); 174 | long wordB = scanner2.getLongAt(scanner2.pos() + 8); 175 | long posB = findDelimiter(wordB); 176 | record(findResult(word, pos, wordB, posB, scanner2, results, collectedResults), scanNumber(scanner2)); 177 | } 178 | while (scanner3.hasNext()) { 179 | long word = scanner3.getLong(); 180 | long pos = findDelimiter(word); 181 | long wordB = scanner3.getLongAt(scanner3.pos() + 8); 182 | long posB = findDelimiter(wordB); 183 | record(findResult(word, pos, wordB, posB, scanner3, results, collectedResults), scanNumber(scanner3)); 184 | } 185 | } 186 | } 187 | 188 | private static final long[] MASK1 = new long[]{ 0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFFFL, 0xFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFFFL, 189 | 0xFFFFFFFFFFFFFFFFL }; 190 | private static final long[] MASK2 = new long[]{ 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0xFFFFFFFFFFFFFFFFL }; 191 | 192 | private static Result findResult(long initialWord, long initialDelimiterMask, long wordB, long delimiterMaskB, Scanner scanner, Result[] results, 193 | List collectedResults) { 194 | Result existingResult; 195 | long word = initialWord; 196 | long delimiterMask = initialDelimiterMask; 197 | long hash; 198 | long nameAddress = scanner.pos(); 199 | long word2 = wordB; 200 | long delimiterMask2 = delimiterMaskB; 201 | if ((delimiterMask | delimiterMask2) != 0) { 202 | int letterCount1 = Long.numberOfTrailingZeros(delimiterMask) >>> 3; // value between 1 and 8 203 | int letterCount2 = Long.numberOfTrailingZeros(delimiterMask2) >>> 3; // value between 0 and 8 204 | long mask = MASK2[letterCount1]; 205 | word = word & MASK1[letterCount1]; 206 | word2 = mask & word2 & MASK1[letterCount2]; 207 | hash = word ^ word2; 208 | existingResult = results[hashToIndex(hash, results)]; 209 | scanner.add(letterCount1 + (letterCount2 & mask)); 210 | if (existingResult != null && existingResult.firstNameWord == word && existingResult.secondNameWord == word2) { 211 | return existingResult; 212 | } 213 | } 214 | else { 215 | // Slow-path for when the ';' could not be found in the first 16 bytes. 216 | hash = word ^ word2; 217 | scanner.add(16); 218 | while (true) { 219 | word = scanner.getLong(); 220 | delimiterMask = findDelimiter(word); 221 | if (delimiterMask != 0) { 222 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); 223 | word = (word << (63 - trailingZeros)); 224 | scanner.add(trailingZeros >>> 3); 225 | hash ^= word; 226 | break; 227 | } 228 | else { 229 | scanner.add(8); 230 | hash ^= word; 231 | } 232 | } 233 | } 234 | 235 | // Save length of name for later. 236 | int nameLength = (int) (scanner.pos() - nameAddress); 237 | 238 | // Final calculation for index into hash table. 239 | int tableIndex = hashToIndex(hash, results); 240 | outer: while (true) { 241 | existingResult = results[tableIndex]; 242 | if (existingResult == null) { 243 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner, collectedResults); 244 | } 245 | // Check for collision. 246 | int i = 0; 247 | for (; i < nameLength + 1 - 8; i += 8) { 248 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { 249 | // Collision error, try next. 250 | tableIndex = (tableIndex + 31) & (results.length - 1); 251 | continue outer; 252 | } 253 | } 254 | 255 | int remainingShift = (64 - ((nameLength + 1 - i) << 3)); 256 | if (((scanner.getLongAt(existingResult.nameAddress + i) ^ (scanner.getLongAt(nameAddress + i))) << remainingShift) == 0) { 257 | break; 258 | } 259 | else { 260 | // Collision error, try next. 261 | tableIndex = (tableIndex + 31) & (results.length - 1); 262 | } 263 | } 264 | return existingResult; 265 | } 266 | 267 | private static long nextNewLine(long prev) { 268 | while (true) { 269 | long currentWord = Scanner.UNSAFE.getLong(prev); 270 | long input = currentWord ^ 0x0A0A0A0A0A0A0A0AL; 271 | long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 272 | if (pos != 0) { 273 | prev += Long.numberOfTrailingZeros(pos) >>> 3; 274 | break; 275 | } 276 | else { 277 | prev += 8; 278 | } 279 | } 280 | return prev; 281 | } 282 | 283 | private static long scanNumber(Scanner scanPtr) { 284 | long numberWord = scanPtr.getLongAt(scanPtr.pos() + 1); 285 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000L); 286 | long number = convertIntoNumber(decimalSepPos, numberWord); 287 | scanPtr.add((decimalSepPos >>> 3) + 4); 288 | return number; 289 | } 290 | 291 | private static void record(Result existingResult, long number) { 292 | if (number < existingResult.min) { 293 | existingResult.min = (short) number; 294 | } 295 | if (number > existingResult.max) { 296 | existingResult.max = (short) number; 297 | } 298 | existingResult.sum += number; 299 | existingResult.count++; 300 | } 301 | 302 | private static int hashToIndex(long hash, Result[] results) { 303 | long hashAsInt = hash ^ (hash >>> 33) ^ (hash >>> 15); 304 | return (int) (hashAsInt & (results.length - 1)); 305 | } 306 | 307 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. 308 | private static long convertIntoNumber(int decimalSepPos, long numberWord) { 309 | int shift = 28 - decimalSepPos; 310 | // signed is -1 if negative, 0 otherwise 311 | long signed = (~numberWord << 59) >> 63; 312 | long designMask = ~(signed & 0xFF); 313 | // Align the number to a specific position and transform the ascii to digit value 314 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 315 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 316 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 317 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 318 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 319 | return (absValue ^ signed) - signed; 320 | } 321 | 322 | private static long findDelimiter(long word) { 323 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 324 | return (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 325 | } 326 | 327 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner, List collectedResults) { 328 | Result r = new Result(); 329 | results[hash] = r; 330 | int totalLength = nameLength + 1; 331 | r.firstNameWord = scanner.getLongAt(nameAddress); 332 | r.secondNameWord = scanner.getLongAt(nameAddress + 8); 333 | if (totalLength <= 8) { 334 | r.firstNameWord = r.firstNameWord & MASK1[totalLength - 1]; 335 | r.secondNameWord = 0; 336 | } 337 | else if (totalLength < 16) { 338 | r.secondNameWord = r.secondNameWord & MASK1[totalLength - 9]; 339 | } 340 | r.nameAddress = nameAddress; 341 | collectedResults.add(r); 342 | return r; 343 | } 344 | 345 | private static final class Result { 346 | long firstNameWord, secondNameWord; 347 | short min, max; 348 | int count; 349 | long sum; 350 | long nameAddress; 351 | 352 | private Result() { 353 | this.min = MAX_TEMP; 354 | this.max = MIN_TEMP; 355 | } 356 | 357 | public String toString() { 358 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 359 | } 360 | 361 | private static double round(double value) { 362 | return Math.round(value * 10.0) / 10.0; 363 | } 364 | 365 | private void accumulate(Result other) { 366 | if (other.min < min) { 367 | min = other.min; 368 | } 369 | if (other.max > max) { 370 | max = other.max; 371 | } 372 | sum += other.sum; 373 | count += other.count; 374 | } 375 | 376 | public String calcName() { 377 | Scanner scanner = new Scanner(nameAddress, nameAddress + MAX_NAME_LENGTH + 1); 378 | int nameLength = 0; 379 | while (scanner.getByteAt(nameAddress + nameLength) != ';') { 380 | nameLength++; 381 | } 382 | byte[] array = new byte[nameLength]; 383 | for (int i = 0; i < nameLength; ++i) { 384 | array[i] = scanner.getByteAt(nameAddress + i); 385 | } 386 | return new String(array, java.nio.charset.StandardCharsets.UTF_8); 387 | } 388 | } 389 | 390 | private static final class Scanner { 391 | private static final sun.misc.Unsafe UNSAFE = initUnsafe(); 392 | private long pos; 393 | private final long end; 394 | 395 | private static sun.misc.Unsafe initUnsafe() { 396 | try { 397 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); 398 | theUnsafe.setAccessible(true); 399 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); 400 | } 401 | catch (NoSuchFieldException | IllegalAccessException e) { 402 | throw new RuntimeException(e); 403 | } 404 | } 405 | 406 | public Scanner(long start, long end) { 407 | this.pos = start; 408 | this.end = end; 409 | } 410 | 411 | boolean hasNext() { 412 | return pos < end; 413 | } 414 | 415 | long pos() { 416 | return pos; 417 | } 418 | 419 | void add(long delta) { 420 | pos += delta; 421 | } 422 | 423 | long getLong() { 424 | return UNSAFE.getLong(pos); 425 | } 426 | 427 | long getLongAt(long pos) { 428 | return UNSAFE.getLong(pos); 429 | } 430 | 431 | byte getByteAt(long pos) { 432 | return UNSAFE.getByte(pos); 433 | } 434 | } 435 | } -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v1.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.io.RandomAccessFile; 20 | import java.nio.ByteBuffer; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.file.Paths; 23 | import java.nio.file.StandardOpenOption; 24 | import java.util.ArrayList; 25 | import java.util.Arrays; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.TreeMap; 30 | 31 | public class CalculateAverage_thomaswue_v1 { 32 | private static final String FILE = "./measurements.txt"; 33 | private static final int MAX_CITY_NAME_LENGTH = 100; 34 | 35 | // Segment in the file that will be processed in parallel. 36 | private record Segment(long start, int size) { 37 | }; 38 | 39 | // Holding the current result for a single city. 40 | private static class Result { 41 | int max; 42 | int min; 43 | long sum; 44 | int count; 45 | byte[] name; 46 | 47 | public String toString() { 48 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 49 | } 50 | 51 | private double round(double value) { 52 | return Math.round(value * 10.0) / 10.0; 53 | } 54 | 55 | // Accumulate another result into this one. 56 | private void add(Result other) { 57 | min = Math.min(min, other.min); 58 | max = Math.max(max, other.max); 59 | sum += other.sum; 60 | count += other.count; 61 | } 62 | } 63 | 64 | public static void main(String[] args) { 65 | // Calculate input segments. 66 | List segments = getSegments(); 67 | 68 | // Parallel processing of segments. 69 | List> allResults = segments.stream().map(s -> { 70 | HashMap cities = new HashMap<>(); 71 | byte[] name = new byte[MAX_CITY_NAME_LENGTH]; 72 | Result[] results = new Result[1 << 18]; 73 | try (FileChannel ch = (FileChannel) java.nio.file.Files.newByteChannel(Paths.get(FILE), StandardOpenOption.READ)) { 74 | ByteBuffer bf = ch.map(FileChannel.MapMode.READ_ONLY, s.start(), s.size()); 75 | parseLoop(bf, name, results, cities); 76 | } 77 | catch (IOException e) { 78 | throw new RuntimeException(e); 79 | } 80 | return cities; 81 | }).parallel().toList(); 82 | 83 | // Accumulate results sequentially. 84 | HashMap result = allResults.getFirst(); 85 | for (int i = 1; i < allResults.size(); ++i) { 86 | for (Map.Entry r : allResults.get(i).entrySet()) { 87 | Result current = result.get(r.getKey()); 88 | if (current != null) { 89 | current.add(r.getValue()); 90 | } 91 | else { 92 | result.put(r.getKey(), r.getValue()); 93 | } 94 | } 95 | } 96 | 97 | // Final output. 98 | System.out.println(new TreeMap<>(result)); 99 | } 100 | 101 | private static void parseLoop(ByteBuffer bf, byte[] name, Result[] results, HashMap cities) { 102 | int pos = 0; 103 | byte b; 104 | while (pos < bf.limit()) { 105 | int hash = 0; 106 | int nameIndex = 0; 107 | while ((b = bf.get(pos++)) != ';') { 108 | hash += b; 109 | hash += hash << 10; 110 | hash ^= hash >> 6; 111 | name[nameIndex++] = b; 112 | } 113 | hash = hash & (results.length - 1); 114 | 115 | int number; 116 | byte sign = bf.get(pos++); 117 | boolean isMinus = false; 118 | if (sign == '-') { 119 | isMinus = true; 120 | number = bf.get(pos++) - '0'; 121 | } 122 | else { 123 | number = sign - '0'; 124 | } 125 | while ((b = bf.get(pos++)) != '.') { 126 | number = number * 10 + b - '0'; 127 | } 128 | number = number * 10 + bf.get(pos++) - '0'; 129 | if (isMinus) { 130 | number = -number; 131 | } 132 | 133 | while (true) { 134 | Result existingResult = results[hash]; 135 | if (existingResult == null) { 136 | Result r = new Result(); 137 | r.name = new byte[nameIndex]; 138 | r.max = number; 139 | r.min = number; 140 | r.count = 1; 141 | r.sum = number; 142 | System.arraycopy(name, 0, r.name, 0, nameIndex); 143 | cities.put(new String(r.name), r); 144 | results[hash] = r; 145 | break; 146 | } 147 | else { 148 | if (Arrays.equals(existingResult.name, 0, nameIndex, name, 0, nameIndex)) { 149 | existingResult.count++; 150 | existingResult.max = Math.max(existingResult.max, number); 151 | existingResult.min = Math.min(existingResult.min, number); 152 | existingResult.sum += number; 153 | break; 154 | } 155 | else { 156 | // Collision error, try next. 157 | hash = (hash + 1) & (results.length - 1); 158 | } 159 | } 160 | } 161 | 162 | // Skip new line. 163 | pos++; 164 | } 165 | } 166 | 167 | private static List getSegments() { 168 | try (RandomAccessFile file = new RandomAccessFile(FILE, "r")) { 169 | long totalSize = file.length(); 170 | int cores = Runtime.getRuntime().availableProcessors(); 171 | int segmentSize = ((int) (totalSize / cores)); 172 | List segments = new ArrayList<>(); 173 | long filePos = 0; 174 | while (filePos < totalSize - segmentSize) { 175 | file.seek(filePos + segmentSize); 176 | while (file.read() != '\n') 177 | ; 178 | segments.add(new Segment(filePos, (int) (file.getFilePointer() - filePos))); 179 | filePos = file.getFilePointer(); 180 | } 181 | segments.add(new Segment(filePos, (int) (totalSize - filePos))); 182 | return segments; 183 | } 184 | catch (IOException e) { 185 | throw new RuntimeException(e); 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v10.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.nio.channels.FileChannel; 20 | import java.util.*; 21 | import java.util.concurrent.atomic.AtomicLong; 22 | 23 | /** 24 | * The solution starts a child worker process for the actual work such that clean up of the memory mapping can occur 25 | * while the main process already returns with the result. The worker then memory maps the input file, creates a worker 26 | * thread per available core, and then processes segments of size {@link #SEGMENT_SIZE} at a time. The segments are 27 | * split into 3 parts and cursors for each of those parts are processing the segment simultaneously in the same thread. 28 | * Results are accumulated into {@link Result} objects and a tree map is used to sequentially accumulate the results in 29 | * the end. 30 | * Runs in 0.31 on an Intel i9-13900K while the reference implementation takes 120.37s. 31 | * Credit: 32 | * Quan Anh Mai for branchless number parsing code 33 | * Alfonso² Peterssen for suggesting memory mapping with unsafe and the subprocess idea 34 | * Artsiom Korzun for showing the benefits of work stealing at 2MB segments instead of equal split between workers 35 | * Jaromir Hamala for showing that avoiding the branch misprediction between <8 and 8-16 cases is a big win even if 36 | * more work is performed 37 | * Van Phu DO for demonstrating the lookup tables based on masks instead of bit shifting 38 | */ 39 | public class CalculateAverage_thomaswue_v10 { 40 | private static final String FILE = "./measurements.txt"; 41 | private static final int MIN_TEMP = -999; 42 | private static final int MAX_TEMP = 999; 43 | private static final int MAX_NAME_LENGTH = 100; 44 | private static final int MAX_CITIES = 10000; 45 | private static final int SEGMENT_SIZE = 1 << 21; 46 | private static final int HASH_TABLE_SIZE = 1 << 17; 47 | 48 | public static void main(String[] args) throws IOException, InterruptedException { 49 | // Start worker subprocess if this process is not the worker. 50 | if (args.length == 0 || !("--worker".equals(args[0]))) { 51 | spawnWorker(); 52 | return; 53 | } 54 | 55 | int numberOfWorkers = Runtime.getRuntime().availableProcessors(); 56 | try (var fileChannel = FileChannel.open(java.nio.file.Path.of(FILE), java.nio.file.StandardOpenOption.READ)) { 57 | long fileSize = fileChannel.size(); 58 | final long fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); 59 | final long fileEnd = fileStart + fileSize; 60 | final AtomicLong cursor = new AtomicLong(fileStart); 61 | 62 | // Parallel processing of segments. 63 | Thread[] threads = new Thread[numberOfWorkers]; 64 | List[] allResults = new List[numberOfWorkers]; 65 | for (int i = 0; i < threads.length; ++i) { 66 | final int index = i; 67 | threads[i] = new Thread(() -> { 68 | List results = new ArrayList<>(MAX_CITIES); 69 | parseLoop(cursor, fileEnd, fileStart, results); 70 | allResults[index] = results; 71 | }); 72 | threads[i].start(); 73 | } 74 | for (Thread thread : threads) { 75 | thread.join(); 76 | } 77 | 78 | // Final output. 79 | System.out.println(accumulateResults(allResults)); 80 | System.out.close(); 81 | } 82 | } 83 | 84 | private static void spawnWorker() throws IOException { 85 | ProcessHandle.Info info = ProcessHandle.current().info(); 86 | ArrayList workerCommand = new ArrayList<>(); 87 | info.command().ifPresent(workerCommand::add); 88 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); 89 | workerCommand.add("--worker"); 90 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) 91 | .start().getInputStream().transferTo(System.out); 92 | } 93 | 94 | private static TreeMap accumulateResults(List[] allResults) { 95 | TreeMap result = new TreeMap<>(); 96 | for (List resultArr : allResults) { 97 | for (Result r : resultArr) { 98 | Result current = result.putIfAbsent(r.calcName(), r); 99 | if (current != null) { 100 | current.accumulate(r); 101 | } 102 | } 103 | } 104 | return result; 105 | } 106 | 107 | private static void parseLoop(AtomicLong counter, long fileEnd, long fileStart, List collectedResults) { 108 | Result[] results = new Result[HASH_TABLE_SIZE]; 109 | while (true) { 110 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; 111 | if (current >= fileEnd) { 112 | return; 113 | } 114 | 115 | long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); 116 | long segmentStart; 117 | if (current == fileStart) { 118 | segmentStart = current; 119 | } 120 | else { 121 | segmentStart = nextNewLine(current) + 1; 122 | } 123 | 124 | long dist = (segmentEnd - segmentStart) / 3; 125 | long midPoint1 = nextNewLine(segmentStart + dist); 126 | long midPoint2 = nextNewLine(segmentStart + dist + dist); 127 | 128 | Scanner scanner1 = new Scanner(segmentStart, midPoint1); 129 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2); 130 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd); 131 | while (true) { 132 | if (!scanner1.hasNext()) { 133 | break; 134 | } 135 | if (!scanner2.hasNext()) { 136 | break; 137 | } 138 | if (!scanner3.hasNext()) { 139 | break; 140 | } 141 | long word1 = scanner1.getLong(); 142 | long word2 = scanner2.getLong(); 143 | long word3 = scanner3.getLong(); 144 | long delimiterMask1 = findDelimiter(word1); 145 | long delimiterMask2 = findDelimiter(word2); 146 | long delimiterMask3 = findDelimiter(word3); 147 | long word1b = scanner1.getLongAt(scanner1.pos() + 8); 148 | long word2b = scanner2.getLongAt(scanner2.pos() + 8); 149 | long word3b = scanner3.getLongAt(scanner3.pos() + 8); 150 | long delimiterMask1b = findDelimiter(word1b); 151 | long delimiterMask2b = findDelimiter(word2b); 152 | long delimiterMask3b = findDelimiter(word3b); 153 | Result existingResult1 = findResult(word1, delimiterMask1, word1b, delimiterMask1b, scanner1, results, collectedResults); 154 | Result existingResult2 = findResult(word2, delimiterMask2, word2b, delimiterMask2b, scanner2, results, collectedResults); 155 | Result existingResult3 = findResult(word3, delimiterMask3, word3b, delimiterMask3b, scanner3, results, collectedResults); 156 | long number1 = scanNumber(scanner1); 157 | long number2 = scanNumber(scanner2); 158 | long number3 = scanNumber(scanner3); 159 | record(existingResult1, number1); 160 | record(existingResult2, number2); 161 | record(existingResult3, number3); 162 | } 163 | 164 | while (scanner1.hasNext()) { 165 | long word = scanner1.getLong(); 166 | long pos = findDelimiter(word); 167 | long wordB = scanner1.getLongAt(scanner1.pos() + 8); 168 | long posB = findDelimiter(wordB); 169 | record(findResult(word, pos, wordB, posB, scanner1, results, collectedResults), scanNumber(scanner1)); 170 | } 171 | while (scanner2.hasNext()) { 172 | long word = scanner2.getLong(); 173 | long pos = findDelimiter(word); 174 | long wordB = scanner2.getLongAt(scanner2.pos() + 8); 175 | long posB = findDelimiter(wordB); 176 | record(findResult(word, pos, wordB, posB, scanner2, results, collectedResults), scanNumber(scanner2)); 177 | } 178 | while (scanner3.hasNext()) { 179 | long word = scanner3.getLong(); 180 | long pos = findDelimiter(word); 181 | long wordB = scanner3.getLongAt(scanner3.pos() + 8); 182 | long posB = findDelimiter(wordB); 183 | record(findResult(word, pos, wordB, posB, scanner3, results, collectedResults), scanNumber(scanner3)); 184 | } 185 | } 186 | } 187 | 188 | private static final long[] MASK1 = new long[]{ 0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFFFL, 0xFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFFFFL, 189 | 0xFFFFFFFFFFFFFFFFL }; 190 | private static final long[] MASK2 = new long[]{ 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0x00L, 0xFFFFFFFFFFFFFFFFL }; 191 | 192 | private static Result findResult(long initialWord, long initialDelimiterMask, long wordB, long delimiterMaskB, Scanner scanner, Result[] results, 193 | List collectedResults) { 194 | Result existingResult; 195 | long word = initialWord; 196 | long delimiterMask = initialDelimiterMask; 197 | long hash; 198 | long nameAddress = scanner.pos(); 199 | long word2 = wordB; 200 | long delimiterMask2 = delimiterMaskB; 201 | if ((delimiterMask | delimiterMask2) != 0) { 202 | int letterCount1 = Long.numberOfTrailingZeros(delimiterMask) >>> 3; // value between 1 and 8 203 | int letterCount2 = Long.numberOfTrailingZeros(delimiterMask2) >>> 3; // value between 0 and 8 204 | long mask = MASK2[letterCount1]; 205 | word = word & MASK1[letterCount1]; 206 | word2 = mask & word2 & MASK1[letterCount2]; 207 | hash = word ^ word2; 208 | existingResult = results[hashToIndex(hash, results)]; 209 | scanner.add(letterCount1 + (letterCount2 & mask)); 210 | if (existingResult != null && existingResult.firstNameWord == word && existingResult.secondNameWord == word2) { 211 | return existingResult; 212 | } 213 | } 214 | else { 215 | // Slow-path for when the ';' could not be found in the first 16 bytes. 216 | hash = word ^ word2; 217 | scanner.add(16); 218 | while (true) { 219 | word = scanner.getLong(); 220 | delimiterMask = findDelimiter(word); 221 | if (delimiterMask != 0) { 222 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); 223 | word = (word << (63 - trailingZeros)); 224 | scanner.add(trailingZeros >>> 3); 225 | hash ^= word; 226 | break; 227 | } 228 | else { 229 | scanner.add(8); 230 | hash ^= word; 231 | } 232 | } 233 | } 234 | 235 | // Save length of name for later. 236 | int nameLength = (int) (scanner.pos() - nameAddress); 237 | 238 | // Final calculation for index into hash table. 239 | int tableIndex = hashToIndex(hash, results); 240 | outer: while (true) { 241 | existingResult = results[tableIndex]; 242 | if (existingResult == null) { 243 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner, collectedResults); 244 | } 245 | // Check for collision. 246 | int i = 0; 247 | for (; i < nameLength + 1 - 8; i += 8) { 248 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { 249 | // Collision error, try next. 250 | tableIndex = (tableIndex + 31) & (results.length - 1); 251 | continue outer; 252 | } 253 | } 254 | 255 | int remainingShift = (64 - ((nameLength + 1 - i) << 3)); 256 | if (((scanner.getLongAt(existingResult.nameAddress + i) ^ (scanner.getLongAt(nameAddress + i))) << remainingShift) == 0) { 257 | break; 258 | } 259 | else { 260 | // Collision error, try next. 261 | tableIndex = (tableIndex + 31) & (results.length - 1); 262 | } 263 | } 264 | return existingResult; 265 | } 266 | 267 | private static long nextNewLine(long prev) { 268 | while (true) { 269 | long currentWord = Scanner.UNSAFE.getLong(prev); 270 | long input = currentWord ^ 0x0A0A0A0A0A0A0A0AL; 271 | long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 272 | if (pos != 0) { 273 | prev += Long.numberOfTrailingZeros(pos) >>> 3; 274 | break; 275 | } 276 | else { 277 | prev += 8; 278 | } 279 | } 280 | return prev; 281 | } 282 | 283 | private static long scanNumber(Scanner scanPtr) { 284 | long numberWord = scanPtr.getLongAt(scanPtr.pos() + 1); 285 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000L); 286 | long number = convertIntoNumber(decimalSepPos, numberWord); 287 | scanPtr.add((decimalSepPos >>> 3) + 4); 288 | return number; 289 | } 290 | 291 | private static void record(Result existingResult, long number) { 292 | if (number < existingResult.min) { 293 | existingResult.min = (short) number; 294 | } 295 | if (number > existingResult.max) { 296 | existingResult.max = (short) number; 297 | } 298 | existingResult.sum += number; 299 | existingResult.count++; 300 | } 301 | 302 | private static int hashToIndex(long hash, Result[] results) { 303 | long hashAsInt = hash ^ (hash >>> 33) ^ (hash >>> 15); 304 | return (int) (hashAsInt & (results.length - 1)); 305 | } 306 | 307 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. 308 | private static long convertIntoNumber(int decimalSepPos, long numberWord) { 309 | int shift = 28 - decimalSepPos; 310 | // signed is -1 if negative, 0 otherwise 311 | long signed = (~numberWord << 59) >> 63; 312 | long designMask = ~(signed & 0xFF); 313 | // Align the number to a specific position and transform the ascii to digit value 314 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 315 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 316 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 317 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 318 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 319 | return (absValue ^ signed) - signed; 320 | } 321 | 322 | private static long findDelimiter(long word) { 323 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 324 | return (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 325 | } 326 | 327 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner, List collectedResults) { 328 | Result r = new Result(); 329 | results[hash] = r; 330 | int totalLength = nameLength + 1; 331 | r.firstNameWord = scanner.getLongAt(nameAddress); 332 | r.secondNameWord = scanner.getLongAt(nameAddress + 8); 333 | if (totalLength <= 8) { 334 | r.firstNameWord = r.firstNameWord & MASK1[totalLength - 1]; 335 | r.secondNameWord = 0; 336 | } 337 | else if (totalLength < 16) { 338 | r.secondNameWord = r.secondNameWord & MASK1[totalLength - 9]; 339 | } 340 | r.nameAddress = nameAddress; 341 | collectedResults.add(r); 342 | return r; 343 | } 344 | 345 | private static final class Result { 346 | long firstNameWord, secondNameWord; 347 | short min, max; 348 | int count; 349 | long sum; 350 | long nameAddress; 351 | 352 | private Result() { 353 | this.min = MAX_TEMP; 354 | this.max = MIN_TEMP; 355 | } 356 | 357 | public String toString() { 358 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 359 | } 360 | 361 | private static double round(double value) { 362 | return Math.round(value * 10.0) / 10.0; 363 | } 364 | 365 | private void accumulate(Result other) { 366 | if (other.min < min) { 367 | min = other.min; 368 | } 369 | if (other.max > max) { 370 | max = other.max; 371 | } 372 | sum += other.sum; 373 | count += other.count; 374 | } 375 | 376 | public String calcName() { 377 | Scanner scanner = new Scanner(nameAddress, nameAddress + MAX_NAME_LENGTH + 1); 378 | int nameLength = 0; 379 | while (scanner.getByteAt(nameAddress + nameLength) != ';') { 380 | nameLength++; 381 | } 382 | byte[] array = new byte[nameLength]; 383 | for (int i = 0; i < nameLength; ++i) { 384 | array[i] = scanner.getByteAt(nameAddress + i); 385 | } 386 | return new String(array, java.nio.charset.StandardCharsets.UTF_8); 387 | } 388 | } 389 | 390 | private static final class Scanner { 391 | private static final sun.misc.Unsafe UNSAFE = initUnsafe(); 392 | private long pos; 393 | private final long end; 394 | 395 | private static sun.misc.Unsafe initUnsafe() { 396 | try { 397 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); 398 | theUnsafe.setAccessible(true); 399 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); 400 | } 401 | catch (NoSuchFieldException | IllegalAccessException e) { 402 | throw new RuntimeException(e); 403 | } 404 | } 405 | 406 | public Scanner(long start, long end) { 407 | this.pos = start; 408 | this.end = end; 409 | } 410 | 411 | boolean hasNext() { 412 | return pos < end; 413 | } 414 | 415 | long pos() { 416 | return pos; 417 | } 418 | 419 | void add(long delta) { 420 | pos += delta; 421 | } 422 | 423 | long getLong() { 424 | return UNSAFE.getLong(pos); 425 | } 426 | 427 | long getLongAt(long pos) { 428 | return UNSAFE.getLong(pos); 429 | } 430 | 431 | byte getByteAt(long pos) { 432 | return UNSAFE.getByte(pos); 433 | } 434 | } 435 | } 436 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v2.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.Arena; 20 | import java.lang.reflect.Field; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.channels.FileChannel.MapMode; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Path; 25 | import java.nio.file.StandardOpenOption; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.TreeMap; 30 | import java.util.stream.IntStream; 31 | 32 | import sun.misc.Unsafe; 33 | 34 | public class CalculateAverage_thomaswue_v2 { 35 | private static final String FILE = "./measurements.txt"; 36 | 37 | // Holding the current result for a single city. 38 | private static class Result { 39 | int min; 40 | int max; 41 | long sum; 42 | int count; 43 | final long nameAddress; 44 | final int nameLength; 45 | 46 | private Result(long nameAddress, int nameLength, int value) { 47 | this.nameAddress = nameAddress; 48 | this.nameLength = nameLength; 49 | this.min = value; 50 | this.max = value; 51 | this.sum = value; 52 | this.count = 1; 53 | } 54 | 55 | public String toString() { 56 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 57 | } 58 | 59 | private static double round(double value) { 60 | return Math.round(value * 10.0) / 10.0; 61 | } 62 | 63 | // Accumulate another result into this one. 64 | private void add(Result other) { 65 | min = Math.min(min, other.min); 66 | max = Math.max(max, other.max); 67 | sum += other.sum; 68 | count += other.count; 69 | } 70 | } 71 | 72 | public static void main(String[] args) throws IOException { 73 | // Calculate input segments. 74 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 75 | long[] chunks = getSegments(numberOfChunks); 76 | 77 | // Parallel processing of segments. 78 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> { 79 | HashMap cities = HashMap.newHashMap(1 << 10); 80 | Result[] results = new Result[1 << 14]; 81 | parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], results, cities); 82 | return cities; 83 | }).parallel().toList(); 84 | 85 | // Accumulate results sequentially. 86 | HashMap result = allResults.getFirst(); 87 | for (int i = 1; i < allResults.size(); ++i) { 88 | for (Map.Entry entry : allResults.get(i).entrySet()) { 89 | Result current = result.get(entry.getKey()); 90 | if (current != null) { 91 | current.add(entry.getValue()); 92 | } 93 | else { 94 | result.put(entry.getKey(), entry.getValue()); 95 | } 96 | } 97 | } 98 | 99 | // Final output. 100 | System.out.println(new TreeMap<>(result)); 101 | } 102 | 103 | private static final Unsafe UNSAFE = initUnsafe(); 104 | 105 | private static Unsafe initUnsafe() { 106 | try { 107 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 108 | theUnsafe.setAccessible(true); 109 | return (Unsafe) theUnsafe.get(Unsafe.class); 110 | } 111 | catch (NoSuchFieldException | IllegalAccessException e) { 112 | throw new RuntimeException(e); 113 | } 114 | } 115 | 116 | static boolean unsafeEquals(long aStart, long aLength, long bStart, long bLength) { 117 | if (aLength != bLength) { 118 | return false; 119 | } 120 | for (int i = 0; i < aLength; ++i) { 121 | if (UNSAFE.getByte(aStart + i) != UNSAFE.getByte(bStart + i)) { 122 | return false; 123 | } 124 | } 125 | return true; 126 | } 127 | 128 | private static void parseLoop(long chunkStart, long chunkEnd, Result[] results, HashMap cities) { 129 | long scanPtr = chunkStart; 130 | byte b; 131 | while (scanPtr < chunkEnd) { 132 | long nameAddress = scanPtr; 133 | 134 | int hash = UNSAFE.getByte(scanPtr++); 135 | while ((b = UNSAFE.getByte(scanPtr++)) != ';') { 136 | hash += b; 137 | hash += hash << 10; 138 | hash ^= hash >> 6; 139 | } 140 | 141 | int nameLength = (int) (scanPtr - 1 - nameAddress); 142 | hash = hash & (results.length - 1); 143 | 144 | int number; 145 | byte sign = UNSAFE.getByte(scanPtr++); 146 | if (sign == '-') { 147 | number = UNSAFE.getByte(scanPtr++) - '0'; 148 | if ((b = UNSAFE.getByte(scanPtr++)) != '.') { 149 | number = number * 10 + (b - '0'); 150 | scanPtr++; 151 | } 152 | number = number * 10 + (UNSAFE.getByte(scanPtr++) - '0'); 153 | number = -number; 154 | } 155 | else { 156 | number = sign - '0'; 157 | if ((b = UNSAFE.getByte(scanPtr++)) != '.') { 158 | number = number * 10 + (b - '0'); 159 | scanPtr++; 160 | } 161 | number = number * 10 + (UNSAFE.getByte(scanPtr++) - '0'); 162 | } 163 | 164 | while (true) { 165 | Result existingResult = results[hash]; 166 | if (existingResult == null) { 167 | Result r = new Result(nameAddress, nameLength, number); 168 | results[hash] = r; 169 | byte[] bytes = new byte[nameLength]; 170 | UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); 171 | cities.put(new String(bytes, StandardCharsets.UTF_8), r); 172 | break; 173 | } 174 | else if (unsafeEquals(existingResult.nameAddress, existingResult.nameLength, nameAddress, nameLength)) { 175 | existingResult.min = Math.min(existingResult.min, number); 176 | existingResult.max = Math.max(existingResult.max, number); 177 | existingResult.sum += number; 178 | existingResult.count++; 179 | break; 180 | } 181 | else { 182 | // Collision error, try next. 183 | hash = (hash + 1) & (results.length - 1); 184 | } 185 | } 186 | 187 | // Skip new line. 188 | scanPtr++; 189 | } 190 | } 191 | 192 | private static long[] getSegments(int numberOfChunks) throws IOException { 193 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 194 | long fileSize = fileChannel.size(); 195 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 196 | long[] chunks = new long[numberOfChunks + 1]; 197 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); 198 | chunks[0] = mappedAddress; 199 | long endAddress = mappedAddress + fileSize; 200 | for (int i = 1; i < numberOfChunks; ++i) { 201 | long chunkAddress = mappedAddress + i * segmentSize; 202 | // Align to first row start. 203 | while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') { 204 | // nop 205 | } 206 | chunks[i] = Math.min(chunkAddress, endAddress); 207 | } 208 | chunks[numberOfChunks] = endAddress; 209 | return chunks; 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v3.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.Arena; 20 | import java.lang.reflect.Field; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.channels.FileChannel.MapMode; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Path; 25 | import java.nio.file.StandardOpenOption; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.TreeMap; 30 | import java.util.stream.IntStream; 31 | 32 | import sun.misc.Unsafe; 33 | 34 | /** 35 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 36 | * sun.misc.Unsafe to directly access the mapped memory. 37 | * 38 | * Runs in 0.92s on my Intel i9-13900K 39 | * Perf stats: 40 | * 65,004,666,383 cpu_core/cycles/ 41 | * 71,141,249,972 cpu_atom/cycles/ 42 | */ 43 | public class CalculateAverage_thomaswue_v3 { 44 | private static final String FILE = "./measurements.txt"; 45 | 46 | // Holding the current result for a single city. 47 | private static class Result { 48 | short min; 49 | short max; 50 | long sum; 51 | int count; 52 | final long nameAddress; 53 | 54 | private Result(long nameAddress, int value) { 55 | this.nameAddress = nameAddress; 56 | this.min = (short) value; 57 | this.max = (short) value; 58 | this.sum = value; 59 | this.count = 1; 60 | } 61 | 62 | public String toString() { 63 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 64 | } 65 | 66 | private static double round(double value) { 67 | return Math.round(value * 10.0) / 10.0; 68 | } 69 | 70 | // Accumulate another result into this one. 71 | private void add(Result other) { 72 | min = (short) Math.min(min, other.min); 73 | max = (short) Math.max(max, other.max); 74 | sum += other.sum; 75 | count += other.count; 76 | } 77 | } 78 | 79 | public static void main(String[] args) throws IOException { 80 | // Calculate input segments. 81 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 82 | long[] chunks = getSegments(numberOfChunks); 83 | 84 | // Parallel processing of segments. 85 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> { 86 | HashMap cities = HashMap.newHashMap(1 << 10); 87 | Result[] results = new Result[1 << 18]; 88 | parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], results, cities); 89 | return cities; 90 | }).parallel().toList(); 91 | 92 | // Accumulate results sequentially. 93 | HashMap result = allResults.getFirst(); 94 | for (int i = 1; i < allResults.size(); ++i) { 95 | for (Map.Entry entry : allResults.get(i).entrySet()) { 96 | Result current = result.putIfAbsent(entry.getKey(), entry.getValue()); 97 | if (current != null) { 98 | current.add(entry.getValue()); 99 | } 100 | } 101 | } 102 | 103 | // Final output. 104 | System.out.println(new TreeMap<>(result)); 105 | } 106 | 107 | private static final Unsafe UNSAFE = initUnsafe(); 108 | 109 | private static Unsafe initUnsafe() { 110 | try { 111 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 112 | theUnsafe.setAccessible(true); 113 | return (Unsafe) theUnsafe.get(Unsafe.class); 114 | } 115 | catch (NoSuchFieldException | IllegalAccessException e) { 116 | throw new RuntimeException(e); 117 | } 118 | } 119 | 120 | private static void parseLoop(long chunkStart, long chunkEnd, Result[] results, HashMap cities) { 121 | long scanPtr = chunkStart; 122 | byte b; 123 | while (scanPtr < chunkEnd) { 124 | long nameAddress = scanPtr; 125 | int hash = 0; 126 | 127 | // Skip first letter. 128 | scanPtr++; 129 | 130 | // Scan for ';' delimiter, always 4 bytes at a time. 131 | while (true) { 132 | int nextVal = UNSAFE.getInt(scanPtr); 133 | if ((nextVal & 0x3B) == 0x3B) { 134 | scanPtr++; 135 | break; 136 | } 137 | else if ((nextVal & 0x3B00) == 0x3B00) { 138 | scanPtr += 2; 139 | hash = hash ^ (nextVal & 0xFF); 140 | break; 141 | } 142 | else if ((nextVal & 0x3B0000) == 0x3B0000) { 143 | scanPtr += 3; 144 | hash = hash ^ (nextVal & 0xFFFF); 145 | break; 146 | } 147 | else if (((nextVal & 0x3B000000) == 0x3B000000)) { 148 | scanPtr += 4; 149 | hash = hash ^ (nextVal & 0xFFFFFF); 150 | break; 151 | } 152 | scanPtr += 4; 153 | hash = hash ^ nextVal; 154 | } 155 | 156 | // Save length of name for later. 157 | int nameLength = (int) (scanPtr - nameAddress - 1); 158 | 159 | // Parse number. 160 | int number; 161 | byte sign = UNSAFE.getByte(scanPtr++); 162 | if (sign == '-') { 163 | number = UNSAFE.getByte(scanPtr++) - '0'; 164 | if ((b = UNSAFE.getByte(scanPtr++)) != '.') { 165 | number = number * 10 + (b - '0'); 166 | scanPtr++; 167 | } 168 | number = number * 10 + (UNSAFE.getByte(scanPtr++) - '0'); 169 | number = -number; 170 | } 171 | else { 172 | number = sign - '0'; 173 | if ((b = UNSAFE.getByte(scanPtr++)) != '.') { 174 | number = number * 10 + (b - '0'); 175 | scanPtr++; 176 | } 177 | number = number * 10 + (UNSAFE.getByte(scanPtr++) - '0'); 178 | } 179 | 180 | // Final calculation for index into hash table. 181 | int tableIndex = (((hash ^ (hash >>> 18)) & (results.length - 1))); 182 | while (true) { 183 | Result existingResult = results[tableIndex]; 184 | if (existingResult == null) { 185 | newEntry(results, cities, nameAddress, number, tableIndex, nameLength); 186 | break; 187 | } 188 | else { 189 | // Check for collision. 190 | boolean result = true; 191 | int i = 0; 192 | if ((long) nameLength >= 8) { 193 | if (UNSAFE.getLong(existingResult.nameAddress) != UNSAFE.getLong(nameAddress)) { 194 | result = false; 195 | } 196 | else { 197 | i += 8; 198 | } 199 | } 200 | else if ((long) nameLength >= 4) { 201 | if (UNSAFE.getInt(existingResult.nameAddress) != UNSAFE.getInt(nameAddress)) { 202 | result = false; 203 | } 204 | else { 205 | i += 4; 206 | } 207 | } 208 | if (result) { 209 | for (; i < (long) nameLength; ++i) { 210 | if (UNSAFE.getByte(existingResult.nameAddress + i) != UNSAFE.getByte(nameAddress + i)) { 211 | result = false; 212 | break; 213 | } 214 | } 215 | } 216 | if (result) { 217 | existingResult.min = (short) Math.min(existingResult.min, number); 218 | existingResult.max = (short) Math.max(existingResult.max, number); 219 | existingResult.sum += number; 220 | existingResult.count++; 221 | break; 222 | } 223 | else { 224 | // Collision error, try next. 225 | tableIndex = (tableIndex + 1) & (results.length - 1); 226 | } 227 | } 228 | } 229 | 230 | // Skip new line. 231 | scanPtr++; 232 | } 233 | } 234 | 235 | private static void newEntry(Result[] results, HashMap cities, long nameAddress, int number, int hash, int nameLength) { 236 | Result r = new Result(nameAddress, number); 237 | results[hash] = r; 238 | byte[] bytes = new byte[nameLength]; 239 | UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); 240 | cities.put(new String(bytes, StandardCharsets.UTF_8), r); 241 | } 242 | 243 | private static long[] getSegments(int numberOfChunks) throws IOException { 244 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 245 | long fileSize = fileChannel.size(); 246 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 247 | long[] chunks = new long[numberOfChunks + 1]; 248 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); 249 | chunks[0] = mappedAddress; 250 | long endAddress = mappedAddress + fileSize; 251 | for (int i = 1; i < numberOfChunks; ++i) { 252 | long chunkAddress = mappedAddress + i * segmentSize; 253 | // Align to first row start. 254 | while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') { 255 | // nop 256 | } 257 | chunks[i] = Math.min(chunkAddress, endAddress); 258 | } 259 | chunks[numberOfChunks] = endAddress; 260 | return chunks; 261 | } 262 | } 263 | } 264 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v4.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.Arena; 20 | import java.lang.reflect.Field; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.channels.FileChannel.MapMode; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Path; 25 | import java.nio.file.StandardOpenOption; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.TreeMap; 30 | import java.util.stream.IntStream; 31 | 32 | import sun.misc.Unsafe; 33 | 34 | /** 35 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 36 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. 37 | *

38 | * Runs in 0.70s on my Intel i9-13900K 39 | * Perf stats: 40 | * 40,622,862,783 cpu_core/cycles/ 41 | * 48,241,929,925 cpu_atom/cycles/ 42 | */ 43 | public class CalculateAverage_thomaswue_v4 { 44 | private static final String FILE = "./measurements.txt"; 45 | 46 | // Holding the current result for a single city. 47 | private static class Result { 48 | final long nameAddress; 49 | long lastNameLong; 50 | int remainingShift; 51 | int min; 52 | int max; 53 | long sum; 54 | int count; 55 | 56 | private Result(long nameAddress, int value) { 57 | this.nameAddress = nameAddress; 58 | this.min = value; 59 | this.max = value; 60 | this.sum = value; 61 | this.count = 1; 62 | } 63 | 64 | public String toString() { 65 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 66 | } 67 | 68 | private static double round(double value) { 69 | return Math.round(value * 10.0) / 10.0; 70 | } 71 | 72 | // Accumulate another result into this one. 73 | private void add(Result other) { 74 | min = Math.min(min, other.min); 75 | max = Math.max(max, other.max); 76 | sum += other.sum; 77 | count += other.count; 78 | } 79 | } 80 | 81 | public static void main(String[] args) throws IOException { 82 | // Calculate input segments. 83 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 84 | long[] chunks = getSegments(numberOfChunks); 85 | 86 | // Parallel processing of segments. 87 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> { 88 | HashMap cities = HashMap.newHashMap(1 << 10); 89 | parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], cities); 90 | return cities; 91 | }).parallel().toList(); 92 | 93 | // Accumulate results sequentially. 94 | HashMap result = allResults.getFirst(); 95 | for (int i = 1; i < allResults.size(); ++i) { 96 | for (Map.Entry entry : allResults.get(i).entrySet()) { 97 | Result current = result.putIfAbsent(entry.getKey(), entry.getValue()); 98 | if (current != null) { 99 | current.add(entry.getValue()); 100 | } 101 | } 102 | } 103 | 104 | // Final output. 105 | System.out.println(new TreeMap<>(result)); 106 | } 107 | 108 | private static final Unsafe UNSAFE = initUnsafe(); 109 | 110 | private static Unsafe initUnsafe() { 111 | try { 112 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 113 | theUnsafe.setAccessible(true); 114 | return (Unsafe) theUnsafe.get(Unsafe.class); 115 | } 116 | catch (NoSuchFieldException | IllegalAccessException e) { 117 | throw new RuntimeException(e); 118 | } 119 | } 120 | 121 | private static void parseLoop(long chunkStart, long chunkEnd, HashMap cities) { 122 | Result[] results = new Result[1 << 18]; 123 | long scanPtr = chunkStart; 124 | while (scanPtr < chunkEnd) { 125 | long nameAddress = scanPtr; 126 | long hash = 0; 127 | 128 | // Search for ';', one long at a time. 129 | long word = UNSAFE.getLong(scanPtr); 130 | int pos = findDelimiter(word); 131 | if (pos != 8) { 132 | scanPtr += pos; 133 | word = word & (-1L >>> ((8 - pos - 1) << 3)); 134 | hash ^= word; 135 | } 136 | else { 137 | scanPtr += 8; 138 | hash ^= word; 139 | while (true) { 140 | word = UNSAFE.getLong(scanPtr); 141 | pos = findDelimiter(word); 142 | if (pos != 8) { 143 | scanPtr += pos; 144 | word = word & (-1L >>> ((8 - pos - 1) << 3)); 145 | hash ^= word; 146 | break; 147 | } 148 | else { 149 | scanPtr += 8; 150 | hash ^= word; 151 | } 152 | } 153 | } 154 | 155 | // Save length of name for later. 156 | int nameLength = (int) (scanPtr - nameAddress); 157 | scanPtr++; 158 | 159 | long numberWord = UNSAFE.getLong(scanPtr); 160 | // The 4th binary digit of the ascii of a digit is 1 while 161 | // that of the '.' is 0. This finds the decimal separator 162 | // The value can be 12, 20, 28 163 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 164 | int number = convertIntoNumber(decimalSepPos, numberWord); 165 | 166 | // Skip past new line. 167 | // scanPtr++; 168 | scanPtr += (decimalSepPos >>> 3) + 3; 169 | 170 | // Final calculation for index into hash table. 171 | int hashAsInt = (int) (hash ^ (hash >>> 32)); 172 | int finalHash = (hashAsInt ^ (hashAsInt >>> 18)); 173 | int tableIndex = (finalHash & (results.length - 1)); 174 | outer: while (true) { 175 | Result existingResult = results[tableIndex]; 176 | if (existingResult == null) { 177 | newEntry(results, cities, nameAddress, number, tableIndex, nameLength); 178 | break; 179 | } 180 | else { 181 | // Check for collision. 182 | int i = 0; 183 | for (; i < nameLength + 1 - 8; i += 8) { 184 | if (UNSAFE.getLong(existingResult.nameAddress + i) != UNSAFE.getLong(nameAddress + i)) { 185 | tableIndex = (tableIndex + 1) & (results.length - 1); 186 | continue outer; 187 | } 188 | } 189 | if (((existingResult.lastNameLong ^ UNSAFE.getLong(nameAddress + i)) << existingResult.remainingShift) == 0) { 190 | existingResult.min = Math.min(existingResult.min, number); 191 | existingResult.max = Math.max(existingResult.max, number); 192 | existingResult.sum += number; 193 | existingResult.count++; 194 | break; 195 | } 196 | else { 197 | // Collision error, try next. 198 | tableIndex = (tableIndex + 1) & (results.length - 1); 199 | } 200 | } 201 | } 202 | } 203 | } 204 | 205 | // Special method to convert a number in the specific format into an int value without branches created by 206 | // Quan Anh Mai. 207 | private static int convertIntoNumber(int decimalSepPos, long numberWord) { 208 | int shift = 28 - decimalSepPos; 209 | // signed is -1 if negative, 0 otherwise 210 | long signed = (~numberWord << 59) >> 63; 211 | long designMask = ~(signed & 0xFF); 212 | // Align the number to a specific position and transform the ascii code 213 | // to actual digit value in each byte 214 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 215 | 216 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 217 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 218 | // 0x000000UU00TTHH00 + 219 | // 0x00UU00TTHH000000 * 10 + 220 | // 0xUU00TTHH00000000 * 100 221 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 222 | // This results in our value lies in the bit 32 to 41 of this product 223 | // That was close :) 224 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 225 | long value = (absValue ^ signed) - signed; 226 | return (int) value; 227 | } 228 | 229 | private static int findDelimiter(long word) { 230 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 231 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 232 | return Long.numberOfTrailingZeros(tmp) >>> 3; 233 | } 234 | 235 | private static void newEntry(Result[] results, HashMap cities, long nameAddress, int number, int hash, int nameLength) { 236 | Result r = new Result(nameAddress, number); 237 | results[hash] = r; 238 | byte[] bytes = new byte[nameLength]; 239 | 240 | int i = 0; 241 | for (; i < nameLength + 1 - 8; i += 8) { 242 | } 243 | r.lastNameLong = UNSAFE.getLong(nameAddress + i); 244 | r.remainingShift = (64 - (nameLength + 1 - i) << 3); 245 | UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); 246 | String nameAsString = new String(bytes, StandardCharsets.UTF_8); 247 | cities.put(nameAsString, r); 248 | } 249 | 250 | private static long[] getSegments(int numberOfChunks) throws IOException { 251 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 252 | long fileSize = fileChannel.size(); 253 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 254 | long[] chunks = new long[numberOfChunks + 1]; 255 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); 256 | chunks[0] = mappedAddress; 257 | long endAddress = mappedAddress + fileSize; 258 | for (int i = 1; i < numberOfChunks; ++i) { 259 | long chunkAddress = mappedAddress + i * segmentSize; 260 | // Align to first row start. 261 | while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') { 262 | // nop 263 | } 264 | chunks[i] = Math.min(chunkAddress, endAddress); 265 | } 266 | chunks[numberOfChunks] = endAddress; 267 | return chunks; 268 | } 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v5.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.Arena; 20 | import java.lang.reflect.Field; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.channels.FileChannel.MapMode; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Path; 25 | import java.nio.file.StandardOpenOption; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.TreeMap; 29 | import java.util.stream.IntStream; 30 | 31 | import sun.misc.Unsafe; 32 | 33 | /** 34 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 35 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. 36 | *

37 | * Runs in 0.66s on my Intel i9-13900K 38 | * Perf stats: 39 | * 35,935,262,091 cpu_core/cycles/ 40 | * 47,305,591,173 cpu_atom/cycles/ 41 | */ 42 | public class CalculateAverage_thomaswue_v5 { 43 | private static final String FILE = "./measurements.txt"; 44 | 45 | // Holding the current result for a single city. 46 | private static class Result { 47 | long lastNameLong, secondLastNameLong, nameAddress; 48 | int nameLength, remainingShift; 49 | int min, max, count; 50 | long sum; 51 | 52 | private Result(long nameAddress) { 53 | this.nameAddress = nameAddress; 54 | this.min = Integer.MAX_VALUE; 55 | this.max = Integer.MIN_VALUE; 56 | } 57 | 58 | public String toString() { 59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 60 | } 61 | 62 | private static double round(double value) { 63 | return Math.round(value * 10.0) / 10.0; 64 | } 65 | 66 | // Accumulate another result into this one. 67 | private void add(Result other) { 68 | min = Math.min(min, other.min); 69 | max = Math.max(max, other.max); 70 | sum += other.sum; 71 | count += other.count; 72 | } 73 | 74 | public String calcName() { 75 | return new Scanner(nameAddress, nameAddress + nameLength).getString(nameLength); 76 | } 77 | } 78 | 79 | public static void main(String[] args) throws IOException { 80 | // Calculate input segments. 81 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 82 | long[] chunks = getSegments(numberOfChunks); 83 | 84 | // Parallel processing of segments. 85 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1])) 86 | .map(resultArray -> { 87 | List results = new ArrayList<>(); 88 | for (Result r : resultArray) { 89 | if (r != null) { 90 | results.add(r); 91 | } 92 | } 93 | return results; 94 | }).parallel().toList(); 95 | 96 | // Final output. 97 | System.out.println(accumulateResults(allResults)); 98 | } 99 | 100 | // Accumulate results sequentially for simplicity. 101 | private static TreeMap accumulateResults(List> allResults) { 102 | TreeMap result = new TreeMap<>(); 103 | for (List resultArr : allResults) { 104 | for (Result r : resultArr) { 105 | String name = r.calcName(); 106 | Result current = result.putIfAbsent(name, r); 107 | if (current != null) { 108 | current.add(r); 109 | } 110 | } 111 | } 112 | return result; 113 | } 114 | 115 | // Main parse loop. 116 | private static Result[] parseLoop(long chunkStart, long chunkEnd) { 117 | Result[] results = new Result[1 << 18]; 118 | Scanner scanner = new Scanner(chunkStart, chunkEnd); 119 | while (scanner.hasNext()) { 120 | long nameAddress = scanner.pos(); 121 | long hash = 0; 122 | 123 | // Search for ';', one long at a time. 124 | long word = scanner.getLong(); 125 | int pos = findDelimiter(word); 126 | if (pos != 8) { 127 | scanner.add(pos); 128 | word = mask(word, pos); 129 | hash ^= word; 130 | 131 | Result existingResult = results[hashToIndex(hash, results)]; 132 | if (existingResult != null && existingResult.lastNameLong == word) { 133 | scanAndRecord(scanner, existingResult); 134 | continue; 135 | } 136 | } 137 | else { 138 | scanner.add(8); 139 | hash ^= word; 140 | long prevWord = word; 141 | word = scanner.getLong(); 142 | pos = findDelimiter(word); 143 | if (pos != 8) { 144 | scanner.add(pos); 145 | word = mask(word, pos); 146 | hash ^= word; 147 | Result existingResult = results[hashToIndex(hash, results)]; 148 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { 149 | scanAndRecord(scanner, existingResult); 150 | continue; 151 | } 152 | } 153 | else { 154 | scanner.add(8); 155 | hash ^= word; 156 | while (true) { 157 | word = scanner.getLong(); 158 | pos = findDelimiter(word); 159 | if (pos != 8) { 160 | scanner.add(pos); 161 | word = mask(word, pos); 162 | hash ^= word; 163 | break; 164 | } 165 | else { 166 | scanner.add(8); 167 | hash ^= word; 168 | } 169 | } 170 | } 171 | } 172 | 173 | // Save length of name for later. 174 | int nameLength = (int) (scanner.pos() - nameAddress); 175 | scanner.add(1); 176 | 177 | long numberWord = scanner.getLong(); 178 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 179 | int number = convertIntoNumber(decimalSepPos, numberWord); 180 | scanner.add((decimalSepPos >>> 3) + 3); 181 | 182 | // Final calculation for index into hash table. 183 | int tableIndex = hashToIndex(hash, results); 184 | outer: while (true) { 185 | Result existingResult = results[tableIndex]; 186 | if (existingResult == null) { 187 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner); 188 | } 189 | // Check for collision. 190 | int i = 0; 191 | for (; i < nameLength + 1 - 8; i += 8) { 192 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { 193 | tableIndex = (tableIndex + 1) & (results.length - 1); 194 | continue outer; 195 | } 196 | } 197 | if (((existingResult.lastNameLong ^ scanner.getLongAt(nameAddress + i)) << existingResult.remainingShift) == 0) { 198 | record(existingResult, number); 199 | break; 200 | } 201 | else { 202 | // Collision error, try next. 203 | tableIndex = (tableIndex + 1) & (results.length - 1); 204 | } 205 | } 206 | } 207 | return results; 208 | } 209 | 210 | private static void scanAndRecord(Scanner scanPtr, Result existingResult) { 211 | scanPtr.add(1); 212 | long numberWord = scanPtr.getLong(); 213 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 214 | int number = convertIntoNumber(decimalSepPos, numberWord); 215 | scanPtr.add((decimalSepPos >>> 3) + 3); 216 | record(existingResult, number); 217 | } 218 | 219 | private static void record(Result existingResult, int number) { 220 | existingResult.min = Math.min(existingResult.min, number); 221 | existingResult.max = Math.max(existingResult.max, number); 222 | existingResult.sum += number; 223 | existingResult.count++; 224 | } 225 | 226 | private static int hashToIndex(long hash, Result[] results) { 227 | int hashAsInt = (int) (hash ^ (hash >>> 32)); 228 | int finalHash = (hashAsInt ^ (hashAsInt >>> 18)); 229 | return (finalHash & (results.length - 1)); 230 | } 231 | 232 | private static long mask(long word, int pos) { 233 | return word & (-1L >>> ((8 - pos - 1) << 3)); 234 | } 235 | 236 | // Special method to convert a number in the specific format into an int value without branches created by 237 | // Quan Anh Mai. 238 | private static int convertIntoNumber(int decimalSepPos, long numberWord) { 239 | int shift = 28 - decimalSepPos; 240 | // signed is -1 if negative, 0 otherwise 241 | long signed = (~numberWord << 59) >> 63; 242 | long designMask = ~(signed & 0xFF); 243 | // Align the number to a specific position and transform the ascii code 244 | // to actual digit value in each byte 245 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 246 | 247 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 248 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 249 | // 0x000000UU00TTHH00 + 250 | // 0x00UU00TTHH000000 * 10 + 251 | // 0xUU00TTHH00000000 * 100 252 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 253 | // This results in our value lies in the bit 32 to 41 of this product 254 | // That was close :) 255 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 256 | long value = (absValue ^ signed) - signed; 257 | return (int) value; 258 | } 259 | 260 | private static int findDelimiter(long word) { 261 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 262 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 263 | return Long.numberOfTrailingZeros(tmp) >>> 3; 264 | } 265 | 266 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) { 267 | Result r = new Result(nameAddress); 268 | results[hash] = r; 269 | 270 | int i = 0; 271 | for (; i < nameLength + 1 - 8; i += 8) { 272 | r.secondLastNameLong = (scanner.getLongAt(nameAddress + i)); 273 | } 274 | r.remainingShift = (64 - (nameLength + 1 - i) << 3); 275 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) & (-1L >>> r.remainingShift)); 276 | r.nameLength = nameLength; 277 | return r; 278 | } 279 | 280 | private static long[] getSegments(int numberOfChunks) throws IOException { 281 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 282 | long fileSize = fileChannel.size(); 283 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 284 | long[] chunks = new long[numberOfChunks + 1]; 285 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); 286 | chunks[0] = mappedAddress; 287 | long endAddress = mappedAddress + fileSize; 288 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize); 289 | for (int i = 1; i < numberOfChunks; ++i) { 290 | long chunkAddress = mappedAddress + i * segmentSize; 291 | // Align to first row start. 292 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') { 293 | // nop 294 | } 295 | chunks[i] = Math.min(chunkAddress, endAddress); 296 | } 297 | chunks[numberOfChunks] = endAddress; 298 | return chunks; 299 | } 300 | } 301 | 302 | private static class Scanner { 303 | 304 | private static final Unsafe UNSAFE = initUnsafe(); 305 | 306 | private static Unsafe initUnsafe() { 307 | try { 308 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 309 | theUnsafe.setAccessible(true); 310 | return (Unsafe) theUnsafe.get(Unsafe.class); 311 | } 312 | catch (NoSuchFieldException | IllegalAccessException e) { 313 | throw new RuntimeException(e); 314 | } 315 | } 316 | 317 | long pos, end; 318 | 319 | public Scanner(long start, long end) { 320 | this.pos = start; 321 | this.end = end; 322 | } 323 | 324 | boolean hasNext() { 325 | return pos < end; 326 | } 327 | 328 | long pos() { 329 | return pos; 330 | } 331 | 332 | void add(int delta) { 333 | pos += delta; 334 | } 335 | 336 | long getLong() { 337 | return UNSAFE.getLong(pos); 338 | } 339 | 340 | long getLongAt(long pos) { 341 | return UNSAFE.getLong(pos); 342 | } 343 | 344 | public String getString(int nameLength) { 345 | byte[] bytes = new byte[nameLength]; 346 | UNSAFE.copyMemory(null, pos, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); 347 | return new String(bytes, StandardCharsets.UTF_8); 348 | } 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v6.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.Arena; 20 | import java.lang.reflect.Field; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.channels.FileChannel.MapMode; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Path; 25 | import java.nio.file.StandardOpenOption; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.TreeMap; 29 | import java.util.stream.IntStream; 30 | 31 | import sun.misc.Unsafe; 32 | 33 | /** 34 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 35 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. 36 | *

37 | * Runs in 0.60s on my Intel i9-13900K 38 | * Perf stats: 39 | * 34,716,719,245 cpu_core/cycles/ 40 | * 40,776,530,892 cpu_atom/cycles/ 41 | */ 42 | public class CalculateAverage_thomaswue_v6 { 43 | private static final String FILE = "./measurements.txt"; 44 | 45 | // Holding the current result for a single city. 46 | private static class Result { 47 | long lastNameLong, secondLastNameLong, nameAddress; 48 | int nameLength, remainingShift; 49 | int min, max, count; 50 | long sum; 51 | 52 | private Result(long nameAddress) { 53 | this.nameAddress = nameAddress; 54 | this.min = Integer.MAX_VALUE; 55 | this.max = Integer.MIN_VALUE; 56 | } 57 | 58 | public String toString() { 59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 60 | } 61 | 62 | private static double round(double value) { 63 | return Math.round(value * 10.0) / 10.0; 64 | } 65 | 66 | // Accumulate another result into this one. 67 | private void add(Result other) { 68 | min = Math.min(min, other.min); 69 | max = Math.max(max, other.max); 70 | sum += other.sum; 71 | count += other.count; 72 | } 73 | 74 | public String calcName() { 75 | return new Scanner(nameAddress, nameAddress + nameLength).getString(nameLength); 76 | } 77 | } 78 | 79 | public static void main(String[] args) throws IOException { 80 | // Calculate input segments. 81 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 82 | long[] chunks = getSegments(numberOfChunks); 83 | 84 | // Parallel processing of segments. 85 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1])) 86 | .map(resultArray -> { 87 | List results = new ArrayList<>(); 88 | for (Result r : resultArray) { 89 | if (r != null) { 90 | results.add(r); 91 | } 92 | } 93 | return results; 94 | }).parallel().toList(); 95 | 96 | // Final output. 97 | System.out.println(accumulateResults(allResults)); 98 | } 99 | 100 | // Accumulate results sequentially for simplicity. 101 | private static TreeMap accumulateResults(List> allResults) { 102 | TreeMap result = new TreeMap<>(); 103 | for (List resultArr : allResults) { 104 | for (Result r : resultArr) { 105 | String name = r.calcName(); 106 | Result current = result.putIfAbsent(name, r); 107 | if (current != null) { 108 | current.add(r); 109 | } 110 | } 111 | } 112 | return result; 113 | } 114 | 115 | // Main parse loop. 116 | private static Result[] parseLoop(long chunkStart, long chunkEnd) { 117 | Result[] results = new Result[1 << 17]; 118 | Scanner scanner = new Scanner(chunkStart, chunkEnd); 119 | long word = scanner.getLong(); 120 | int pos = findDelimiter(word); 121 | while (scanner.hasNext()) { 122 | long nameAddress = scanner.pos(); 123 | long hash = 0; 124 | 125 | // Search for ';', one long at a time. 126 | if (pos != 8) { 127 | scanner.add(pos); 128 | word = mask(word, pos); 129 | hash = word; 130 | 131 | int number = scanNumber(scanner); 132 | long nextWord = scanner.getLong(); 133 | int nextPos = findDelimiter(nextWord); 134 | 135 | Result existingResult = results[hashToIndex(hash, results)]; 136 | if (existingResult != null && existingResult.lastNameLong == word) { 137 | word = nextWord; 138 | pos = nextPos; 139 | record(existingResult, number); 140 | continue; 141 | } 142 | 143 | scanner.setPos(nameAddress + pos); 144 | } 145 | else { 146 | scanner.add(8); 147 | hash ^= word; 148 | long prevWord = word; 149 | word = scanner.getLong(); 150 | pos = findDelimiter(word); 151 | if (pos != 8) { 152 | scanner.add(pos); 153 | word = mask(word, pos); 154 | hash ^= word; 155 | 156 | Result existingResult = results[hashToIndex(hash, results)]; 157 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { 158 | int number = scanNumber(scanner); 159 | word = scanner.getLong(); 160 | pos = findDelimiter(word); 161 | record(existingResult, number); 162 | continue; 163 | } 164 | } 165 | else { 166 | scanner.add(8); 167 | hash ^= word; 168 | while (true) { 169 | word = scanner.getLong(); 170 | pos = findDelimiter(word); 171 | if (pos != 8) { 172 | scanner.add(pos); 173 | word = mask(word, pos); 174 | hash ^= word; 175 | break; 176 | } 177 | else { 178 | scanner.add(8); 179 | hash ^= word; 180 | } 181 | } 182 | } 183 | } 184 | 185 | // Save length of name for later. 186 | int nameLength = (int) (scanner.pos() - nameAddress); 187 | scanner.add(1); 188 | 189 | long numberWord = scanner.getLong(); 190 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 191 | int number = convertIntoNumber(decimalSepPos, numberWord); 192 | scanner.add((decimalSepPos >>> 3) + 3); 193 | 194 | // Final calculation for index into hash table. 195 | int tableIndex = hashToIndex(hash, results); 196 | outer: while (true) { 197 | Result existingResult = results[tableIndex]; 198 | if (existingResult == null) { 199 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner); 200 | } 201 | // Check for collision. 202 | int i = 0; 203 | for (; i < nameLength + 1 - 8; i += 8) { 204 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { 205 | tableIndex = (tableIndex + 31) & (results.length - 1); 206 | continue outer; 207 | } 208 | } 209 | if (((existingResult.lastNameLong ^ scanner.getLongAt(nameAddress + i)) << existingResult.remainingShift) == 0) { 210 | record(existingResult, number); 211 | break; 212 | } 213 | else { 214 | // Collision error, try next. 215 | tableIndex = (tableIndex + 31) & (results.length - 1); 216 | } 217 | } 218 | 219 | word = scanner.getLong(); 220 | pos = findDelimiter(word); 221 | } 222 | return results; 223 | } 224 | 225 | private static int scanNumber(Scanner scanPtr) { 226 | scanPtr.add(1); 227 | long numberWord = scanPtr.getLong(); 228 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 229 | int number = convertIntoNumber(decimalSepPos, numberWord); 230 | scanPtr.add((decimalSepPos >>> 3) + 3); 231 | return number; 232 | } 233 | 234 | private static void record(Result existingResult, int number) { 235 | existingResult.min = Math.min(existingResult.min, number); 236 | existingResult.max = Math.max(existingResult.max, number); 237 | existingResult.sum += number; 238 | existingResult.count++; 239 | } 240 | 241 | private static int hashToIndex(long hash, Result[] results) { 242 | int hashAsInt = (int) (hash ^ (hash >>> 28)); 243 | int finalHash = (hashAsInt ^ (hashAsInt >>> 15)); 244 | return (finalHash & (results.length - 1)); 245 | } 246 | 247 | private static long mask(long word, int pos) { 248 | return word & (-1L >>> ((8 - pos - 1) << 3)); 249 | } 250 | 251 | // Special method to convert a number in the specific format into an int value without branches created by 252 | // Quan Anh Mai. 253 | private static int convertIntoNumber(int decimalSepPos, long numberWord) { 254 | int shift = 28 - decimalSepPos; 255 | // signed is -1 if negative, 0 otherwise 256 | long signed = (~numberWord << 59) >> 63; 257 | long designMask = ~(signed & 0xFF); 258 | // Align the number to a specific position and transform the ascii code 259 | // to actual digit value in each byte 260 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 261 | 262 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 263 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 264 | // 0x000000UU00TTHH00 + 265 | // 0x00UU00TTHH000000 * 10 + 266 | // 0xUU00TTHH00000000 * 100 267 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400 268 | // This results in our value lies in the bit 32 to 41 of this product 269 | // That was close :) 270 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 271 | long value = (absValue ^ signed) - signed; 272 | return (int) value; 273 | } 274 | 275 | private static int findDelimiter(long word) { 276 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 277 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 278 | return Long.numberOfTrailingZeros(tmp) >>> 3; 279 | } 280 | 281 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) { 282 | Result r = new Result(nameAddress); 283 | results[hash] = r; 284 | 285 | int i = 0; 286 | for (; i < nameLength + 1 - 8; i += 8) { 287 | r.secondLastNameLong = (scanner.getLongAt(nameAddress + i)); 288 | } 289 | r.remainingShift = (64 - (nameLength + 1 - i) << 3); 290 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) & (-1L >>> r.remainingShift)); 291 | r.nameLength = nameLength; 292 | return r; 293 | } 294 | 295 | private static long[] getSegments(int numberOfChunks) throws IOException { 296 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 297 | long fileSize = fileChannel.size(); 298 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 299 | long[] chunks = new long[numberOfChunks + 1]; 300 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address(); 301 | chunks[0] = mappedAddress; 302 | long endAddress = mappedAddress + fileSize; 303 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize); 304 | for (int i = 1; i < numberOfChunks; ++i) { 305 | long chunkAddress = mappedAddress + i * segmentSize; 306 | // Align to first row start. 307 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') { 308 | // nop 309 | } 310 | chunks[i] = Math.min(chunkAddress, endAddress); 311 | } 312 | chunks[numberOfChunks] = endAddress; 313 | return chunks; 314 | } 315 | } 316 | 317 | private static class Scanner { 318 | 319 | private static final Unsafe UNSAFE = initUnsafe(); 320 | 321 | private static Unsafe initUnsafe() { 322 | try { 323 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); 324 | theUnsafe.setAccessible(true); 325 | return (Unsafe) theUnsafe.get(Unsafe.class); 326 | } 327 | catch (NoSuchFieldException | IllegalAccessException e) { 328 | throw new RuntimeException(e); 329 | } 330 | } 331 | 332 | long pos, end; 333 | 334 | public Scanner(long start, long end) { 335 | this.pos = start; 336 | this.end = end; 337 | } 338 | 339 | boolean hasNext() { 340 | return pos < end; 341 | } 342 | 343 | long pos() { 344 | return pos; 345 | } 346 | 347 | void add(int delta) { 348 | pos += delta; 349 | } 350 | 351 | long getLong() { 352 | return UNSAFE.getLong(pos); 353 | } 354 | 355 | long getLongAt(long pos) { 356 | return UNSAFE.getLong(pos); 357 | } 358 | 359 | public String getString(int nameLength) { 360 | byte[] bytes = new byte[nameLength]; 361 | UNSAFE.copyMemory(null, pos, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); 362 | return new String(bytes, StandardCharsets.UTF_8); 363 | } 364 | 365 | public void setPos(long l) { 366 | this.pos = l; 367 | } 368 | } 369 | } 370 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v7.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.nio.ByteBuffer; 20 | import java.nio.ByteOrder; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.charset.StandardCharsets; 23 | import java.nio.file.Path; 24 | import java.nio.file.StandardOpenOption; 25 | import java.util.ArrayList; 26 | import java.util.Arrays; 27 | import java.util.List; 28 | import java.util.TreeMap; 29 | import java.util.stream.IntStream; 30 | 31 | /** 32 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 33 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. 34 | *

35 | * Runs in 0.60s on my Intel i9-13900K 36 | * Perf stats: 37 | * 34,716,719,245 cpu_core/cycles/ 38 | * 40,776,530,892 cpu_atom/cycles/ 39 | */ 40 | public class CalculateAverage_thomaswue_v7 { 41 | private static final String FILE = "./measurements.txt"; 42 | private static final int MIN_TEMP = -999; 43 | private static final int MAX_TEMP = 999; 44 | 45 | // Holding the current result for a single city. 46 | private static class Result { 47 | long lastNameLong, secondLastNameLong; 48 | long[] name; 49 | int count; 50 | short min, max; 51 | long sum; 52 | 53 | private Result() { 54 | this.min = MAX_TEMP; 55 | this.max = MIN_TEMP; 56 | } 57 | 58 | public String toString() { 59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 60 | } 61 | 62 | private static double round(double value) { 63 | return Math.round(value * 10.0) / 10.0; 64 | } 65 | 66 | // Accumulate another result into this one. 67 | private void add(Result other) { 68 | if (other.min < min) { 69 | min = other.min; 70 | } 71 | if (other.max > max) { 72 | max = other.max; 73 | } 74 | sum += other.sum; 75 | count += other.count; 76 | } 77 | 78 | public String calcName() { 79 | ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder()); 80 | bb.asLongBuffer().put(name); 81 | byte[] array = bb.array(); 82 | int i = 0; 83 | while (array[i++] != ';') 84 | ; 85 | return new String(array, 0, i - 1, StandardCharsets.UTF_8); 86 | } 87 | } 88 | 89 | public static void main(String[] args) throws IOException { 90 | if (args.length == 0 || !("--worker".equals(args[0]))) { 91 | spawnWorker(); 92 | return; 93 | } 94 | // Calculate input segments. 95 | int numberOfChunks = Runtime.getRuntime().availableProcessors(); 96 | long[] chunks = getSegments(numberOfChunks); 97 | 98 | // Parallel processing of segments. 99 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1])) 100 | .map(resultArray -> { 101 | List results = new ArrayList<>(); 102 | for (Result r : resultArray) { 103 | if (r != null) { 104 | results.add(r); 105 | } 106 | } 107 | return results; 108 | }).parallel().toList(); 109 | 110 | // Final output. 111 | System.out.println(accumulateResults(allResults)); 112 | System.out.close(); 113 | } 114 | 115 | private static void spawnWorker() throws IOException { 116 | ProcessHandle.Info info = ProcessHandle.current().info(); 117 | ArrayList workerCommand = new ArrayList<>(); 118 | info.command().ifPresent(workerCommand::add); 119 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); 120 | workerCommand.add("--worker"); 121 | new ProcessBuilder() 122 | .command(workerCommand) 123 | .inheritIO() 124 | .redirectOutput(ProcessBuilder.Redirect.PIPE) 125 | .start() 126 | .getInputStream() 127 | .transferTo(System.out); 128 | } 129 | 130 | // Accumulate results sequentially for simplicity. 131 | private static TreeMap accumulateResults(List> allResults) { 132 | TreeMap result = new TreeMap<>(); 133 | for (List resultArr : allResults) { 134 | for (Result r : resultArr) { 135 | String name = r.calcName(); 136 | Result current = result.putIfAbsent(name, r); 137 | if (current != null) { 138 | current.add(r); 139 | } 140 | } 141 | } 142 | return result; 143 | } 144 | 145 | // Main parse loop. 146 | private static Result[] parseLoop(long chunkStart, long chunkEnd) { 147 | Result[] results = new Result[1 << 17]; 148 | Scanner scanner = new Scanner(chunkStart, chunkEnd); 149 | long word = scanner.getLong(); 150 | long pos = findDelimiter(word); 151 | while (scanner.hasNext()) { 152 | long nameAddress = scanner.pos(); 153 | long hash = 0; 154 | 155 | // Search for ';', one long at a time. 156 | if (pos != 0) { 157 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 158 | scanner.add(pos); 159 | word = mask(word, pos); 160 | hash = word; 161 | 162 | int number = scanNumber(scanner); 163 | long nextWord = scanner.getLong(); 164 | long nextPos = findDelimiter(nextWord); 165 | 166 | Result existingResult = results[hashToIndex(hash, results)]; 167 | if (existingResult != null && existingResult.lastNameLong == word) { 168 | word = nextWord; 169 | pos = nextPos; 170 | record(existingResult, number); 171 | continue; 172 | } 173 | 174 | scanner.setPos(nameAddress + pos); 175 | } 176 | else { 177 | scanner.add(8); 178 | hash = word; 179 | long prevWord = word; 180 | word = scanner.getLong(); 181 | pos = findDelimiter(word); 182 | if (pos != 0) { 183 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 184 | scanner.add(pos); 185 | word = mask(word, pos); 186 | hash ^= word; 187 | 188 | Result existingResult = results[hashToIndex(hash, results)]; 189 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { 190 | int number = scanNumber(scanner); 191 | word = scanner.getLong(); 192 | pos = findDelimiter(word); 193 | record(existingResult, number); 194 | continue; 195 | } 196 | } 197 | else { 198 | scanner.add(8); 199 | hash ^= word; 200 | while (true) { 201 | word = scanner.getLong(); 202 | pos = findDelimiter(word); 203 | if (pos != 0) { 204 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 205 | scanner.add(pos); 206 | word = mask(word, pos); 207 | hash ^= word; 208 | break; 209 | } 210 | else { 211 | scanner.add(8); 212 | hash ^= word; 213 | } 214 | } 215 | } 216 | } 217 | 218 | // Save length of name for later. 219 | int nameLength = (int) (scanner.pos() - nameAddress); 220 | int number = scanNumber(scanner); 221 | 222 | // Final calculation for index into hash table. 223 | int tableIndex = hashToIndex(hash, results); 224 | outer: while (true) { 225 | Result existingResult = results[tableIndex]; 226 | if (existingResult == null) { 227 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner); 228 | } 229 | // Check for collision. 230 | int i = 0; 231 | int namePos = 0; 232 | for (; i < nameLength + 1 - 8; i += 8) { 233 | if (namePos >= existingResult.name.length || existingResult.name[namePos++] != scanner.getLongAt(nameAddress + i)) { 234 | tableIndex = (tableIndex + 31) & (results.length - 1); 235 | continue outer; 236 | } 237 | } 238 | 239 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 240 | if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) { 241 | record(existingResult, number); 242 | break; 243 | } 244 | else { 245 | // Collision error, try next. 246 | tableIndex = (tableIndex + 31) & (results.length - 1); 247 | } 248 | } 249 | 250 | word = scanner.getLong(); 251 | pos = findDelimiter(word); 252 | } 253 | return results; 254 | } 255 | 256 | private static int scanNumber(Scanner scanPtr) { 257 | scanPtr.add(1); 258 | long numberWord = scanPtr.getLong(); 259 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 260 | int number = convertIntoNumber(decimalSepPos, numberWord); 261 | scanPtr.add((decimalSepPos >>> 3) + 3); 262 | return number; 263 | } 264 | 265 | private static void record(Result existingResult, int number) { 266 | if (number < existingResult.min) { 267 | existingResult.min = (short) number; 268 | } 269 | if (number > existingResult.max) { 270 | existingResult.max = (short) number; 271 | } 272 | existingResult.sum += number; 273 | existingResult.count++; 274 | } 275 | 276 | private static int hashToIndex(long hash, Result[] results) { 277 | int hashAsInt = (int) (hash ^ (hash >>> 28)); 278 | int finalHash = (hashAsInt ^ (hashAsInt >>> 17)); 279 | return (finalHash & (results.length - 1)); 280 | } 281 | 282 | private static long mask(long word, long pos) { 283 | return (word << ((7 - pos) << 3)); 284 | } 285 | 286 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. 287 | private static int convertIntoNumber(int decimalSepPos, long numberWord) { 288 | int shift = 28 - decimalSepPos; 289 | // signed is -1 if negative, 0 otherwise 290 | long signed = (~numberWord << 59) >> 63; 291 | long designMask = ~(signed & 0xFF); 292 | // Align the number to a specific position and transform the ascii to digit value 293 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 294 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 295 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 296 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 297 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 298 | long value = (absValue ^ signed) - signed; 299 | return (int) value; 300 | } 301 | 302 | private static long findDelimiter(long word) { 303 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 304 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 305 | return tmp; 306 | } 307 | 308 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) { 309 | Result r = new Result(); 310 | results[hash] = r; 311 | long[] name = new long[(nameLength / Long.BYTES) + 1]; 312 | int pos = 0; 313 | int i = 0; 314 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) { 315 | name[pos++] = scanner.getLongAt(nameAddress + i); 316 | } 317 | 318 | if (pos > 0) { 319 | r.secondLastNameLong = name[pos - 1]; 320 | } 321 | 322 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 323 | long lastWord = (scanner.getLongAt(nameAddress + i) << remainingShift); 324 | r.lastNameLong = lastWord; 325 | name[pos] = lastWord >> remainingShift; 326 | r.name = name; 327 | return r; 328 | } 329 | 330 | private static long[] getSegments(int numberOfChunks) throws IOException { 331 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 332 | long fileSize = fileChannel.size(); 333 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks; 334 | long[] chunks = new long[numberOfChunks + 1]; 335 | long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); 336 | chunks[0] = mappedAddress; 337 | long endAddress = mappedAddress + fileSize; 338 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize); 339 | for (int i = 1; i < numberOfChunks; ++i) { 340 | long chunkAddress = mappedAddress + i * segmentSize; 341 | // Align to first row start. 342 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') 343 | ; 344 | chunks[i] = Math.min(chunkAddress, endAddress); 345 | } 346 | chunks[numberOfChunks] = endAddress; 347 | return chunks; 348 | } 349 | } 350 | 351 | private static class Scanner { 352 | 353 | private static final sun.misc.Unsafe UNSAFE = initUnsafe(); 354 | 355 | private static sun.misc.Unsafe initUnsafe() { 356 | try { 357 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); 358 | theUnsafe.setAccessible(true); 359 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); 360 | } 361 | catch (NoSuchFieldException | IllegalAccessException e) { 362 | throw new RuntimeException(e); 363 | } 364 | } 365 | 366 | long pos, end; 367 | 368 | public Scanner(long start, long end) { 369 | this.pos = start; 370 | this.end = end; 371 | } 372 | 373 | boolean hasNext() { 374 | return pos < end; 375 | } 376 | 377 | long pos() { 378 | return pos; 379 | } 380 | 381 | void add(long delta) { 382 | pos += delta; 383 | } 384 | 385 | long getLong() { 386 | return UNSAFE.getLong(pos); 387 | } 388 | 389 | long getLongAt(long pos) { 390 | return UNSAFE.getLong(pos); 391 | } 392 | 393 | void setPos(long l) { 394 | this.pos = l; 395 | } 396 | } 397 | } 398 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v8.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.nio.ByteBuffer; 20 | import java.nio.ByteOrder; 21 | import java.nio.channels.FileChannel; 22 | import java.nio.charset.StandardCharsets; 23 | import java.nio.file.Path; 24 | import java.nio.file.StandardOpenOption; 25 | import java.util.ArrayList; 26 | import java.util.Arrays; 27 | import java.util.List; 28 | import java.util.TreeMap; 29 | import java.util.concurrent.atomic.AtomicLong; 30 | 31 | /** 32 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses 33 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision. 34 | *

35 | * Runs in 0.41s on my Intel i9-13900K 36 | * Perf stats: 37 | * 25,286,227,376 cpu_core/cycles/ 38 | * 26,833,723,225 cpu_atom/cycles/ 39 | */ 40 | public class CalculateAverage_thomaswue_v8 { 41 | private static final String FILE = "./measurements.txt"; 42 | private static final int MIN_TEMP = -999; 43 | private static final int MAX_TEMP = 999; 44 | 45 | // Holding the current result for a single city. 46 | private static class Result { 47 | long lastNameLong, secondLastNameLong; 48 | long min, max; 49 | long sum; 50 | int count; 51 | long[] name; 52 | String nameAsString; 53 | 54 | private Result() { 55 | this.min = MAX_TEMP; 56 | this.max = MIN_TEMP; 57 | } 58 | 59 | public String toString() { 60 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 61 | } 62 | 63 | private static double round(double value) { 64 | return Math.round(value * 10.0) / 10.0; 65 | } 66 | 67 | // Accumulate another result into this one. 68 | private void add(Result other) { 69 | if (other.min < min) { 70 | min = other.min; 71 | } 72 | if (other.max > max) { 73 | max = other.max; 74 | } 75 | sum += other.sum; 76 | count += other.count; 77 | } 78 | 79 | public String calcName() { 80 | if (nameAsString == null) { 81 | ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder()); 82 | bb.asLongBuffer().put(name); 83 | byte[] array = bb.array(); 84 | int i = 0; 85 | while (array[i++] != ';') 86 | ; 87 | nameAsString = new String(array, 0, i - 1, StandardCharsets.UTF_8); 88 | } 89 | return nameAsString; 90 | } 91 | } 92 | 93 | public static void main(String[] args) throws IOException, InterruptedException { 94 | if (args.length == 0 || !("--worker".equals(args[0]))) { 95 | spawnWorker(); 96 | return; 97 | } 98 | // Calculate input segments. 99 | int numberOfWorkers = Runtime.getRuntime().availableProcessors(); 100 | final AtomicLong cursor = new AtomicLong(); 101 | final long fileEnd; 102 | final long fileStart; 103 | 104 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) { 105 | long fileSize = fileChannel.size(); 106 | fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); 107 | cursor.set(fileStart); 108 | fileEnd = fileStart + fileSize; 109 | } 110 | 111 | // Parallel processing of segments. 112 | Thread[] threads = new Thread[numberOfWorkers]; 113 | List[] allResults = new List[numberOfWorkers]; 114 | for (int i = 0; i < threads.length; ++i) { 115 | final int index = i; 116 | threads[i] = new Thread(() -> { 117 | Result[] resultArray = parseLoop(cursor, fileEnd, fileStart); 118 | List results = new ArrayList<>(500); 119 | for (Result r : resultArray) { 120 | if (r != null) { 121 | r.calcName(); 122 | results.add(r); 123 | } 124 | } 125 | allResults[index] = results; 126 | }); 127 | threads[i].start(); 128 | } 129 | 130 | for (Thread thread : threads) { 131 | thread.join(); 132 | } 133 | 134 | // Final output. 135 | System.out.println(accumulateResults(allResults)); 136 | System.out.close(); 137 | } 138 | 139 | private static void spawnWorker() throws IOException { 140 | ProcessHandle.Info info = ProcessHandle.current().info(); 141 | ArrayList workerCommand = new ArrayList<>(); 142 | info.command().ifPresent(workerCommand::add); 143 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); 144 | workerCommand.add("--worker"); 145 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) 146 | .start().getInputStream().transferTo(System.out); 147 | } 148 | 149 | // Accumulate results sequentially for simplicity. 150 | private static TreeMap accumulateResults(List[] allResults) { 151 | TreeMap result = new TreeMap<>(); 152 | for (List resultArr : allResults) { 153 | for (Result r : resultArr) { 154 | String name = r.calcName(); 155 | Result current = result.putIfAbsent(name, r); 156 | if (current != null) { 157 | current.add(r); 158 | } 159 | } 160 | } 161 | return result; 162 | } 163 | 164 | private static Result findResult(long initialWord, long initialPos, Scanner scanner, Result[] results) { 165 | 166 | Result existingResult; 167 | long word = initialWord; 168 | long pos = initialPos; 169 | long hash; 170 | long nameAddress = scanner.pos(); 171 | 172 | // Search for ';', one long at a time. 173 | if (pos != 0) { 174 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 175 | scanner.add(pos); 176 | word = mask(word, pos); 177 | hash = word; 178 | 179 | int index = hashToIndex(hash, results); 180 | existingResult = results[index]; 181 | 182 | if (existingResult != null && existingResult.lastNameLong == word) { 183 | return existingResult; 184 | } 185 | else { 186 | scanner.setPos(nameAddress + pos); 187 | } 188 | } 189 | else { 190 | scanner.add(8); 191 | hash = word; 192 | long prevWord = word; 193 | word = scanner.getLong(); 194 | pos = findDelimiter(word); 195 | if (pos != 0) { 196 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 197 | scanner.add(pos); 198 | word = mask(word, pos); 199 | hash ^= word; 200 | int index = hashToIndex(hash, results); 201 | existingResult = results[index]; 202 | 203 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { 204 | return existingResult; 205 | } 206 | else { 207 | scanner.setPos(nameAddress + pos + 8); 208 | } 209 | } 210 | else { 211 | scanner.add(8); 212 | hash ^= word; 213 | while (true) { 214 | word = scanner.getLong(); 215 | pos = findDelimiter(word); 216 | if (pos != 0) { 217 | pos = Long.numberOfTrailingZeros(pos) >>> 3; 218 | scanner.add(pos); 219 | word = mask(word, pos); 220 | hash ^= word; 221 | break; 222 | } 223 | else { 224 | scanner.add(8); 225 | hash ^= word; 226 | } 227 | } 228 | } 229 | } 230 | 231 | // Save length of name for later. 232 | int nameLength = (int) (scanner.pos() - nameAddress); 233 | 234 | // Final calculation for index into hash table. 235 | int tableIndex = hashToIndex(hash, results); 236 | outer: while (true) { 237 | existingResult = results[tableIndex]; 238 | if (existingResult == null) { 239 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner); 240 | } 241 | // Check for collision. 242 | int i = 0; 243 | long[] name = existingResult.name; 244 | for (; i < nameLength + 1 - 8; i += 8) { 245 | if (scanner.getLongAt(i, name) != scanner.getLongAt(nameAddress + i)) { 246 | tableIndex = (tableIndex + 31) & (results.length - 1); 247 | continue outer; 248 | } 249 | } 250 | 251 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 252 | if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) { 253 | break; 254 | } 255 | else { 256 | // Collision error, try next. 257 | tableIndex = (tableIndex + 31) & (results.length - 1); 258 | } 259 | } 260 | return existingResult; 261 | } 262 | 263 | private static long nextNL(long prev) { 264 | while (true) { 265 | long currentWord = Scanner.UNSAFE.getLong(prev); 266 | long pos = findNewLine(currentWord); 267 | if (pos != 0) { 268 | prev += Long.numberOfTrailingZeros(pos) >>> 3; 269 | break; 270 | } 271 | else { 272 | prev += 8; 273 | } 274 | } 275 | return prev; 276 | } 277 | 278 | private static final int SEGMENT_SIZE = 1024 * 1024 * 2; 279 | 280 | // Main parse loop. 281 | private static Result[] parseLoop(AtomicLong counter, long fileEnd, long fileStart) { 282 | Result[] results = new Result[1 << 17]; 283 | 284 | while (true) { 285 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; 286 | 287 | if (current >= fileEnd) { 288 | return results; 289 | } 290 | 291 | long segmentEnd = nextNL(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); 292 | long segmentStart; 293 | if (current == fileStart) { 294 | segmentStart = current; 295 | } 296 | else { 297 | segmentStart = nextNL(current) + 1; 298 | } 299 | 300 | long dist = (segmentEnd - segmentStart) / 3; 301 | long midPoint1 = nextNL(segmentStart + dist); 302 | long midPoint2 = nextNL(segmentStart + dist + dist); 303 | 304 | Scanner scanner1 = new Scanner(segmentStart, midPoint1); 305 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2); 306 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd); 307 | while (true) { 308 | if (!scanner1.hasNext()) { 309 | break; 310 | } 311 | if (!scanner2.hasNext()) { 312 | break; 313 | } 314 | if (!scanner3.hasNext()) { 315 | break; 316 | } 317 | 318 | long word1 = scanner1.getLong(); 319 | long word2 = scanner2.getLong(); 320 | long word3 = scanner3.getLong(); 321 | long pos1 = findDelimiter(word1); 322 | long pos2 = findDelimiter(word2); 323 | long pos3 = findDelimiter(word3); 324 | Result existingResult1 = findResult(word1, pos1, scanner1, results); 325 | Result existingResult2 = findResult(word2, pos2, scanner2, results); 326 | Result existingResult3 = findResult(word3, pos3, scanner3, results); 327 | long number1 = scanNumber(scanner1); 328 | long number2 = scanNumber(scanner2); 329 | long number3 = scanNumber(scanner3); 330 | record(existingResult1, number1); 331 | record(existingResult2, number2); 332 | record(existingResult3, number3); 333 | } 334 | 335 | while (scanner1.hasNext()) { 336 | long word = scanner1.getLong(); 337 | long pos = findDelimiter(word); 338 | record(findResult(word, pos, scanner1, results), scanNumber(scanner1)); 339 | } 340 | 341 | while (scanner2.hasNext()) { 342 | long word = scanner2.getLong(); 343 | long pos = findDelimiter(word); 344 | record(findResult(word, pos, scanner2, results), scanNumber(scanner2)); 345 | } 346 | 347 | while (scanner3.hasNext()) { 348 | long word = scanner3.getLong(); 349 | long pos = findDelimiter(word); 350 | record(findResult(word, pos, scanner3, results), scanNumber(scanner3)); 351 | } 352 | } 353 | } 354 | 355 | private static long scanNumber(Scanner scanPtr) { 356 | scanPtr.add(1); 357 | long numberWord = scanPtr.getLong(); 358 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000); 359 | long number = convertIntoNumber(decimalSepPos, numberWord); 360 | scanPtr.add((decimalSepPos >>> 3) + 3); 361 | return number; 362 | } 363 | 364 | private static void record(Result existingResult, long number) { 365 | if (number < existingResult.min) { 366 | existingResult.min = number; 367 | } 368 | if (number > existingResult.max) { 369 | existingResult.max = number; 370 | } 371 | existingResult.sum += number; 372 | existingResult.count++; 373 | } 374 | 375 | private static int hashToIndex(long hash, Result[] results) { 376 | long hashAsInt = hash ^ (hash >>> 37) ^ (hash >>> 17); 377 | return (int) (hashAsInt & (results.length - 1)); 378 | } 379 | 380 | private static long mask(long word, long pos) { 381 | return (word << ((7 - pos) << 3)); 382 | } 383 | 384 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. 385 | private static long convertIntoNumber(int decimalSepPos, long numberWord) { 386 | int shift = 28 - decimalSepPos; 387 | // signed is -1 if negative, 0 otherwise 388 | long signed = (~numberWord << 59) >> 63; 389 | long designMask = ~(signed & 0xFF); 390 | // Align the number to a specific position and transform the ascii to digit value 391 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 392 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 393 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 394 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 395 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 396 | return (absValue ^ signed) - signed; 397 | } 398 | 399 | private static long findDelimiter(long word) { 400 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 401 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 402 | return tmp; 403 | } 404 | 405 | private static long findNewLine(long word) { 406 | long input = word ^ 0x0A0A0A0A0A0A0A0AL; 407 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 408 | return tmp; 409 | } 410 | 411 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) { 412 | Result r = new Result(); 413 | results[hash] = r; 414 | long[] name = new long[(nameLength / Long.BYTES) + 1]; 415 | int pos = 0; 416 | int i = 0; 417 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) { 418 | name[pos++] = scanner.getLongAt(nameAddress + i); 419 | } 420 | 421 | if (pos > 0) { 422 | r.secondLastNameLong = name[pos - 1]; 423 | } 424 | 425 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 426 | long lastWord = (scanner.getLongAt(nameAddress + i) << remainingShift); 427 | r.lastNameLong = lastWord; 428 | name[pos] = lastWord >> remainingShift; 429 | r.name = name; 430 | return r; 431 | } 432 | 433 | private static class Scanner { 434 | 435 | private static final sun.misc.Unsafe UNSAFE = initUnsafe(); 436 | 437 | private static sun.misc.Unsafe initUnsafe() { 438 | try { 439 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); 440 | theUnsafe.setAccessible(true); 441 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); 442 | } 443 | catch (NoSuchFieldException | IllegalAccessException e) { 444 | throw new RuntimeException(e); 445 | } 446 | } 447 | 448 | long pos, end; 449 | 450 | public Scanner(long start, long end) { 451 | this.pos = start; 452 | this.end = end; 453 | } 454 | 455 | boolean hasNext() { 456 | return pos < end; 457 | } 458 | 459 | long pos() { 460 | return pos; 461 | } 462 | 463 | void add(long delta) { 464 | pos += delta; 465 | } 466 | 467 | long getLong() { 468 | return UNSAFE.getLong(pos); 469 | } 470 | 471 | long getLongAt(long pos) { 472 | return UNSAFE.getLong(pos); 473 | } 474 | 475 | long getLongAt(long pos, long[] array) { 476 | return UNSAFE.getLong(array, pos + sun.misc.Unsafe.ARRAY_LONG_BASE_OFFSET); 477 | } 478 | 479 | void setPos(long l) { 480 | this.pos = l; 481 | } 482 | } 483 | } 484 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v9.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.IOException; 19 | import java.nio.channels.FileChannel; 20 | import java.util.*; 21 | import java.util.concurrent.atomic.AtomicLong; 22 | 23 | /** 24 | * The solution starts a child worker process for the actual work such that clean up of the memory mapping can occur 25 | * while the main process already returns with the result. The worker then memory maps the input file, creates a worker 26 | * thread per available core, and then processes segments of size {@link #SEGMENT_SIZE} at a time. The segments are 27 | * split into 3 parts and cursors for each of those parts are processing the segment simultaneously in the same thread. 28 | * Results are accumulated into {@link Result} objects and a tree map is used to sequentially accumulate the results in 29 | * the end. 30 | * Runs in 0.39s on an Intel i9-13900K. 31 | * Credit: 32 | * Quan Anh Mai for branchless number parsing code 33 | * Alfonso² Peterssen for suggesting memory mapping with unsafe and the subprocess idea 34 | * Artsiom Korzun for showing the benefits of work stealing at 2MB segments instead of equal split between workers 35 | */ 36 | public class CalculateAverage_thomaswue_v9 { 37 | private static final String FILE = "./measurements.txt"; 38 | private static final int MIN_TEMP = -999; 39 | private static final int MAX_TEMP = 999; 40 | private static final int MAX_NAME_LENGTH = 100; 41 | private static final int MAX_CITIES = 10000; 42 | private static final int SEGMENT_SIZE = 1 << 21; 43 | private static final int HASH_TABLE_SIZE = 1 << 17; 44 | 45 | public static void main(String[] args) throws IOException, InterruptedException { 46 | // Start worker subprocess if this process is not the worker. 47 | if (args.length == 0 || !("--worker".equals(args[0]))) { 48 | spawnWorker(); 49 | return; 50 | } 51 | 52 | int numberOfWorkers = Runtime.getRuntime().availableProcessors(); 53 | try (var fileChannel = FileChannel.open(java.nio.file.Path.of(FILE), java.nio.file.StandardOpenOption.READ)) { 54 | long fileSize = fileChannel.size(); 55 | final long fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address(); 56 | final long fileEnd = fileStart + fileSize; 57 | final AtomicLong cursor = new AtomicLong(fileStart); 58 | 59 | // Parallel processing of segments. 60 | Thread[] threads = new Thread[numberOfWorkers]; 61 | List[] allResults = new List[numberOfWorkers]; 62 | for (int i = 0; i < threads.length; ++i) { 63 | final int index = i; 64 | threads[i] = new Thread(() -> { 65 | List results = new ArrayList<>(MAX_CITIES); 66 | parseLoop(cursor, fileEnd, fileStart, results); 67 | allResults[index] = results; 68 | }); 69 | threads[i].start(); 70 | } 71 | for (Thread thread : threads) { 72 | thread.join(); 73 | } 74 | 75 | // Final output. 76 | System.out.println(accumulateResults(allResults)); 77 | System.out.close(); 78 | } 79 | } 80 | 81 | private static void spawnWorker() throws IOException { 82 | ProcessHandle.Info info = ProcessHandle.current().info(); 83 | ArrayList workerCommand = new ArrayList<>(); 84 | info.command().ifPresent(workerCommand::add); 85 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args))); 86 | workerCommand.add("--worker"); 87 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE) 88 | .start().getInputStream().transferTo(System.out); 89 | } 90 | 91 | private static TreeMap accumulateResults(List[] allResults) { 92 | TreeMap result = new TreeMap<>(); 93 | for (List resultArr : allResults) { 94 | for (Result r : resultArr) { 95 | Result current = result.putIfAbsent(r.calcName(), r); 96 | if (current != null) { 97 | current.accumulate(r); 98 | } 99 | } 100 | } 101 | return result; 102 | } 103 | 104 | private static void parseLoop(AtomicLong counter, long fileEnd, long fileStart, List collectedResults) { 105 | Result[] results = new Result[HASH_TABLE_SIZE]; 106 | while (true) { 107 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE; 108 | if (current >= fileEnd) { 109 | return; 110 | } 111 | 112 | long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE)); 113 | long segmentStart; 114 | if (current == fileStart) { 115 | segmentStart = current; 116 | } 117 | else { 118 | segmentStart = nextNewLine(current) + 1; 119 | } 120 | 121 | long dist = (segmentEnd - segmentStart) / 3; 122 | long midPoint1 = nextNewLine(segmentStart + dist); 123 | long midPoint2 = nextNewLine(segmentStart + dist + dist); 124 | 125 | Scanner scanner1 = new Scanner(segmentStart, midPoint1); 126 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2); 127 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd); 128 | while (true) { 129 | if (!scanner1.hasNext()) { 130 | break; 131 | } 132 | if (!scanner2.hasNext()) { 133 | break; 134 | } 135 | if (!scanner3.hasNext()) { 136 | break; 137 | } 138 | long word1 = scanner1.getLong(); 139 | long word2 = scanner2.getLong(); 140 | long word3 = scanner3.getLong(); 141 | long delimiterMask1 = findDelimiter(word1); 142 | long delimiterMask2 = findDelimiter(word2); 143 | long delimiterMask3 = findDelimiter(word3); 144 | Result existingResult1 = findResult(word1, delimiterMask1, scanner1, results, collectedResults); 145 | Result existingResult2 = findResult(word2, delimiterMask2, scanner2, results, collectedResults); 146 | Result existingResult3 = findResult(word3, delimiterMask3, scanner3, results, collectedResults); 147 | long number1 = scanNumber(scanner1); 148 | long number2 = scanNumber(scanner2); 149 | long number3 = scanNumber(scanner3); 150 | record(existingResult1, number1); 151 | record(existingResult2, number2); 152 | record(existingResult3, number3); 153 | } 154 | 155 | while (scanner1.hasNext()) { 156 | long word = scanner1.getLong(); 157 | long pos = findDelimiter(word); 158 | record(findResult(word, pos, scanner1, results, collectedResults), scanNumber(scanner1)); 159 | } 160 | while (scanner2.hasNext()) { 161 | long word = scanner2.getLong(); 162 | long pos = findDelimiter(word); 163 | record(findResult(word, pos, scanner2, results, collectedResults), scanNumber(scanner2)); 164 | } 165 | while (scanner3.hasNext()) { 166 | long word = scanner3.getLong(); 167 | long pos = findDelimiter(word); 168 | record(findResult(word, pos, scanner3, results, collectedResults), scanNumber(scanner3)); 169 | } 170 | } 171 | } 172 | 173 | private static Result findResult(long initialWord, long initialDelimiterMask, Scanner scanner, Result[] results, List collectedResults) { 174 | Result existingResult; 175 | long word = initialWord; 176 | long delimiterMask = initialDelimiterMask; 177 | long hash; 178 | long nameAddress = scanner.pos(); 179 | 180 | // Search for ';', one long at a time. There are two common cases that a specially treated: 181 | // (b) the ';' is found in the first 16 bytes 182 | if (delimiterMask != 0) { 183 | // Special case for when the ';' is found in the first 8 bytes. 184 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); 185 | word = (word << (63 - trailingZeros)); 186 | scanner.add(trailingZeros >>> 3); 187 | hash = word; 188 | existingResult = results[hashToIndex(hash, results)]; 189 | if (existingResult != null && existingResult.lastNameLong == word) { 190 | return existingResult; 191 | } 192 | } 193 | else { 194 | // Special case for when the ';' is found in bytes 9-16. 195 | hash = word; 196 | long prevWord = word; 197 | scanner.add(8); 198 | word = scanner.getLong(); 199 | delimiterMask = findDelimiter(word); 200 | if (delimiterMask != 0) { 201 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); 202 | word = (word << (63 - trailingZeros)); 203 | scanner.add(trailingZeros >>> 3); 204 | hash ^= word; 205 | existingResult = results[hashToIndex(hash, results)]; 206 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) { 207 | return existingResult; 208 | } 209 | } 210 | else { 211 | // Slow-path for when the ';' could not be found in the first 16 bytes. 212 | scanner.add(8); 213 | hash ^= word; 214 | while (true) { 215 | word = scanner.getLong(); 216 | delimiterMask = findDelimiter(word); 217 | if (delimiterMask != 0) { 218 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask); 219 | word = (word << (63 - trailingZeros)); 220 | scanner.add(trailingZeros >>> 3); 221 | hash ^= word; 222 | break; 223 | } 224 | else { 225 | scanner.add(8); 226 | hash ^= word; 227 | } 228 | } 229 | } 230 | } 231 | 232 | // Save length of name for later. 233 | int nameLength = (int) (scanner.pos() - nameAddress); 234 | 235 | // Final calculation for index into hash table. 236 | int tableIndex = hashToIndex(hash, results); 237 | outer: while (true) { 238 | existingResult = results[tableIndex]; 239 | if (existingResult == null) { 240 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner, collectedResults); 241 | } 242 | // Check for collision. 243 | int i = 0; 244 | for (; i < nameLength + 1 - 8; i += 8) { 245 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) { 246 | // Collision error, try next. 247 | tableIndex = (tableIndex + 31) & (results.length - 1); 248 | continue outer; 249 | } 250 | } 251 | 252 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 253 | if (existingResult.lastNameLong == (scanner.getLongAt(nameAddress + i) << remainingShift)) { 254 | break; 255 | } 256 | else { 257 | // Collision error, try next. 258 | tableIndex = (tableIndex + 31) & (results.length - 1); 259 | } 260 | } 261 | return existingResult; 262 | } 263 | 264 | private static long nextNewLine(long prev) { 265 | while (true) { 266 | long currentWord = Scanner.UNSAFE.getLong(prev); 267 | long input = currentWord ^ 0x0A0A0A0A0A0A0A0AL; 268 | long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 269 | if (pos != 0) { 270 | prev += Long.numberOfTrailingZeros(pos) >>> 3; 271 | break; 272 | } 273 | else { 274 | prev += 8; 275 | } 276 | } 277 | return prev; 278 | } 279 | 280 | private static long scanNumber(Scanner scanPtr) { 281 | long numberWord = scanPtr.getLongAt(scanPtr.pos() + 1); 282 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000L); 283 | long number = convertIntoNumber(decimalSepPos, numberWord); 284 | scanPtr.add((decimalSepPos >>> 3) + 4); 285 | return number; 286 | } 287 | 288 | private static void record(Result existingResult, long number) { 289 | if (number < existingResult.min) { 290 | existingResult.min = (short) number; 291 | } 292 | if (number > existingResult.max) { 293 | existingResult.max = (short) number; 294 | } 295 | existingResult.sum += number; 296 | existingResult.count++; 297 | } 298 | 299 | private static int hashToIndex(long hash, Result[] results) { 300 | long hashAsInt = hash ^ (hash >>> 37) ^ (hash >>> 17); 301 | return (int) (hashAsInt & (results.length - 1)); 302 | } 303 | 304 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai. 305 | private static long convertIntoNumber(int decimalSepPos, long numberWord) { 306 | int shift = 28 - decimalSepPos; 307 | // signed is -1 if negative, 0 otherwise 308 | long signed = (~numberWord << 59) >> 63; 309 | long designMask = ~(signed & 0xFF); 310 | // Align the number to a specific position and transform the ascii to digit value 311 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L; 312 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit) 313 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) = 314 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100 315 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; 316 | return (absValue ^ signed) - signed; 317 | } 318 | 319 | private static long findDelimiter(long word) { 320 | long input = word ^ 0x3B3B3B3B3B3B3B3BL; 321 | return (input - 0x0101010101010101L) & ~input & 0x8080808080808080L; 322 | } 323 | 324 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner, List collectedResults) { 325 | Result r = new Result(); 326 | results[hash] = r; 327 | int i = 0; 328 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) { 329 | } 330 | if (nameLength + 1 > 8) { 331 | r.secondLastNameLong = scanner.getLongAt(nameAddress + i - 8); 332 | } 333 | int remainingShift = (64 - (nameLength + 1 - i) << 3); 334 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) << remainingShift); 335 | r.nameAddress = nameAddress; 336 | collectedResults.add(r); 337 | return r; 338 | } 339 | 340 | private static final class Result { 341 | long lastNameLong, secondLastNameLong; 342 | short min, max; 343 | int count; 344 | long sum; 345 | long nameAddress; 346 | 347 | private Result() { 348 | this.min = MAX_TEMP; 349 | this.max = MIN_TEMP; 350 | } 351 | 352 | public String toString() { 353 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0); 354 | } 355 | 356 | private static double round(double value) { 357 | return Math.round(value * 10.0) / 10.0; 358 | } 359 | 360 | private void accumulate(Result other) { 361 | if (other.min < min) { 362 | min = other.min; 363 | } 364 | if (other.max > max) { 365 | max = other.max; 366 | } 367 | sum += other.sum; 368 | count += other.count; 369 | } 370 | 371 | public String calcName() { 372 | Scanner scanner = new Scanner(nameAddress, nameAddress + MAX_NAME_LENGTH + 1); 373 | int nameLength = 0; 374 | while (scanner.getByteAt(nameAddress + nameLength) != ';') { 375 | nameLength++; 376 | } 377 | byte[] array = new byte[nameLength]; 378 | for (int i = 0; i < nameLength; ++i) { 379 | array[i] = scanner.getByteAt(nameAddress + i); 380 | } 381 | return new String(array, java.nio.charset.StandardCharsets.UTF_8); 382 | } 383 | } 384 | 385 | private static final class Scanner { 386 | private static final sun.misc.Unsafe UNSAFE = initUnsafe(); 387 | private long pos; 388 | private final long end; 389 | 390 | private static sun.misc.Unsafe initUnsafe() { 391 | try { 392 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); 393 | theUnsafe.setAccessible(true); 394 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class); 395 | } 396 | catch (NoSuchFieldException | IllegalAccessException e) { 397 | throw new RuntimeException(e); 398 | } 399 | } 400 | 401 | public Scanner(long start, long end) { 402 | this.pos = start; 403 | this.end = end; 404 | } 405 | 406 | boolean hasNext() { 407 | return pos < end; 408 | } 409 | 410 | long pos() { 411 | return pos; 412 | } 413 | 414 | void add(long delta) { 415 | pos += delta; 416 | } 417 | 418 | long getLong() { 419 | return UNSAFE.getLong(pos); 420 | } 421 | 422 | long getLongAt(long pos) { 423 | return UNSAFE.getLong(pos); 424 | } 425 | 426 | byte getByteAt(long pos) { 427 | return UNSAFE.getByte(pos); 428 | } 429 | } 430 | } 431 | -------------------------------------------------------------------------------- /src/main/java/org/rschwietzke/FastRandom.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.rschwietzke; 17 | 18 | /** 19 | * Ultra-fast pseudo random generator that is not synchronized! 20 | * Don't use anything from Random by inheritance, this will inherit 21 | * a volatile! Not my idea, copyied in parts some demo random 22 | * generator lessons. 23 | * 24 | * @author rschwietzke 25 | * 26 | */ 27 | public class FastRandom { 28 | private long seed; 29 | 30 | public FastRandom() { 31 | this.seed = System.currentTimeMillis(); 32 | } 33 | 34 | public FastRandom(long seed) { 35 | this.seed = seed; 36 | } 37 | 38 | protected int next(int nbits) { 39 | // N.B. Not thread-safe! 40 | long x = this.seed; 41 | x ^= (x << 21); 42 | x ^= (x >>> 35); 43 | x ^= (x << 4); 44 | this.seed = x; 45 | 46 | x &= ((1L << nbits) - 1); 47 | 48 | return (int) x; 49 | } 50 | 51 | /** 52 | * Borrowed from the JDK 53 | * 54 | * @param bound 55 | * @return 56 | */ 57 | public int nextInt(int bound) { 58 | int r = next(31); 59 | int m = bound - 1; 60 | if ((bound & m) == 0) // i.e., bound is a power of 2 61 | r = (int) ((bound * (long) r) >> 31); 62 | else { 63 | for (int u = r; u - (r = u % bound) + m < 0; u = next(31)) 64 | ; 65 | } 66 | return r; 67 | } 68 | 69 | /** 70 | * Borrowed from the JDK 71 | * @return 72 | */ 73 | public int nextInt() { 74 | return next(32); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/resources/.dontdelete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/src/main/resources/.dontdelete -------------------------------------------------------------------------------- /src/test/resources/.dontdelete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/src/test/resources/.dontdelete -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-1.out: -------------------------------------------------------------------------------- 1 | {Kunming=19.8/19.8/19.8} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-1.txt: -------------------------------------------------------------------------------- 1 | Kunming;19.8 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-10.out: -------------------------------------------------------------------------------- 1 | {Adelaide=15.0/15.0/15.0, Cabo San Lucas=14.9/14.9/14.9, Dodoma=22.2/22.2/22.2, Halifax=12.9/12.9/12.9, Karachi=15.4/15.4/15.4, Pittsburgh=9.7/9.7/9.7, Ségou=25.7/25.7/25.7, Tauranga=38.2/38.2/38.2, Xi'an=24.2/24.2/24.2, Zagreb=12.2/12.2/12.2} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-10.txt: -------------------------------------------------------------------------------- 1 | Halifax;12.9 2 | Zagreb;12.2 3 | Cabo San Lucas;14.9 4 | Adelaide;15.0 5 | Ségou;25.7 6 | Pittsburgh;9.7 7 | Karachi;15.4 8 | Xi'an;24.2 9 | Dodoma;22.2 10 | Tauranga;38.2 11 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-2.out: -------------------------------------------------------------------------------- 1 | {Bosaso=19.2/19.2/19.2, Petropavlovsk-Kamchatsky=9.5/9.5/9.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-2.txt: -------------------------------------------------------------------------------- 1 | Bosaso;19.2 2 | Petropavlovsk-Kamchatsky;9.5 3 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-20.out: -------------------------------------------------------------------------------- 1 | {Abéché1️⃣🐝🏎️=27.3/27.3/27.3, Almaty1️⃣🐝🏎️=15.3/15.3/15.3, Baghdad1️⃣🐝🏎️=26.0/26.0/26.0, Bangkok1️⃣🐝🏎️=25.6/25.6/25.6, Berlin1️⃣🐝🏎️=-0.3/-0.3/-0.3, Birao1️⃣🐝🏎️=33.5/33.5/33.5, Canberra1️⃣🐝🏎️=5.2/5.2/5.2, Chittagong1️⃣🐝🏎️=12.6/12.6/12.6, Da Nang1️⃣🐝🏎️=33.7/33.7/33.7, Edinburgh1️⃣🐝🏎️=19.8/19.8/19.8, Irkutsk1️⃣🐝🏎️=9.9/9.9/9.9, Lhasa1️⃣🐝🏎️=13.4/13.4/13.4, Lyon1️⃣🐝🏎️=1.8/1.8/1.8, Mogadishu1️⃣🐝🏎️=11.5/11.5/11.5, Nashville1️⃣🐝🏎️=-4.9/-4.9/-4.9, Odesa1️⃣🐝🏎️=6.5/6.5/6.5, Parakou1️⃣🐝🏎️=36.3/36.3/36.3, Tamanrasset1️⃣🐝🏎️=17.9/17.9/17.9, Tirana1️⃣🐝🏎️=27.7/27.7/27.7, Xi'an1️⃣🐝🏎️=17.5/17.5/17.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-20.txt: -------------------------------------------------------------------------------- 1 | Odesa1️⃣🐝🏎️;6.5 2 | Canberra1️⃣🐝🏎️;5.2 3 | Lhasa1️⃣🐝🏎️;13.4 4 | Edinburgh1️⃣🐝🏎️;19.8 5 | Da Nang1️⃣🐝🏎️;33.7 6 | Xi'an1️⃣🐝🏎️;17.5 7 | Berlin1️⃣🐝🏎️;-0.3 8 | Tamanrasset1️⃣🐝🏎️;17.9 9 | Abéché1️⃣🐝🏎️;27.3 10 | Baghdad1️⃣🐝🏎️;26.0 11 | Lyon1️⃣🐝🏎️;1.8 12 | Mogadishu1️⃣🐝🏎️;11.5 13 | Bangkok1️⃣🐝🏎️;25.6 14 | Irkutsk1️⃣🐝🏎️;9.9 15 | Parakou1️⃣🐝🏎️;36.3 16 | Almaty1️⃣🐝🏎️;15.3 17 | Birao1️⃣🐝🏎️;33.5 18 | Chittagong1️⃣🐝🏎️;12.6 19 | Tirana1️⃣🐝🏎️;27.7 20 | Nashville1️⃣🐝🏎️;-4.9 21 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-3.out: -------------------------------------------------------------------------------- 1 | {Bosaso=-15.0/1.3/20.0, Petropavlovsk-Kamchatsky=-9.5/0.0/9.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-3.txt: -------------------------------------------------------------------------------- 1 | Bosaso;5.0 2 | Bosaso;20.0 3 | Bosaso;-5.0 4 | Bosaso;-15.0 5 | Petropavlovsk-Kamchatsky;9.5 6 | Petropavlovsk-Kamchatsky;-9.5 7 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-boundaries.out: -------------------------------------------------------------------------------- 1 | {Bosaso=-99.9/-99.9/-99.9, Petropavlovsk-Kamchatsky=99.9/99.9/99.9} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-boundaries.txt: -------------------------------------------------------------------------------- 1 | Bosaso;-99.9 2 | Petropavlovsk-Kamchatsky;99.9 3 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-complex-utf8.out: -------------------------------------------------------------------------------- 1 | {B=8.9/8.9/8.9, C=38.9/38.9/38.9, CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa=3.0/3.0/3.0, ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS=23.4/23.4/23.4, MirnaPehčevoRopažiGus=16.7/16.7/16.7, PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb=17.5/17.5/17.5, TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka=10.6/10.6/10.6, aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS=25.4/25.4/25.4, burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak=21.5/21.5/21.5, cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS=11.2/11.2/11.2, eLafayetteAsh Shaţ=14.2/14.2/14.2, en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi=11.9/11.9/11.9, epé=28.2/28.2/28.2, hanVarkkallaiPort LokoD=10.9/10.9/10.9, iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM=13.4/13.4/13.4, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=22.5/22.5/22.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl=11.5/11.5/11.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul=18.5/18.5/18.5, inhoSökeDordrechtPoáLaloG=13.1/13.1/13.1, iudad Melchor MúzquizQuinhámelDa=40.5/40.5/40.5, ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=0.1/0.1/0.1, l ‘=14.6/14.6/14.6, lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait=22.8/22.8/22.8, liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque=12.4/12.4/12.4, lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe=22.5/22.5/22.5, m el Bo=14.6/14.6/14.6, mazunchaleZrenjaninFouchanaSurtPanč=6.7/6.7/6.7, ngoDübendorfC=11.7/11.7/11.7, nt-A=9.2/9.2/9.2, ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon=14.6/14.6/14.6, oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe=17.3/17.3/17.3, oGumlāSamā’=14.9/14.9/14.9, os Reyes de SalgadoCinisello BalsamoKashibaH=20.0/20.0/20.0, picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy=13.0/13.0/13.0, raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat=13.5/13.5/13.5, rhamDera Ghazi KhanMiyazakiBhātpār=21.3/21.3/21.3, rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan=10.9/10.9/10.9, skişeh=12.9/12.9/12.9, venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC=1.9/1.9/1.9, y-le-MoutierSant’ArpinoPljevljaRo=0.8/0.8/0.8, ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK=6.0/6.0/6.0, üSosnowiecTanauanMya=18.4/18.4/18.4, ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E=19.4/19.4/19.4, āng=15.7/15.7/15.7, ġFis=9.6/9.6/9.6, ‘AqabahPembaNowgongQu=12.9/12.9/12.9} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-complex-utf8.txt: -------------------------------------------------------------------------------- 1 | aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS;25.4 2 | picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy;13.0 3 | lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait;22.8 4 | āng;15.7 5 | hanVarkkallaiPort LokoD;10.9 6 | eLafayetteAsh Shaţ;14.2 7 | ‘AqabahPembaNowgongQu;12.9 8 | inhoSökeDordrechtPoáLaloG;13.1 9 | skişeh;12.9 10 | rhamDera Ghazi KhanMiyazakiBhātpār;21.3 11 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl;11.5 12 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul;18.5 13 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;22.5 14 | ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;0.1 15 | B;8.9 16 | C;38.9 17 | nt-A;9.2 18 | y-le-MoutierSant’ArpinoPljevljaRo;0.8 19 | oGumlāSamā’;14.9 20 | os Reyes de SalgadoCinisello BalsamoKashibaH;20.0 21 | m el Bo;14.6 22 | mazunchaleZrenjaninFouchanaSurtPanč;6.7 23 | ġFis;9.6 24 | epé;28.2 25 | ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E;19.4 26 | iudad Melchor MúzquizQuinhámelDa;40.5 27 | ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS;23.4 28 | cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS;11.2 29 | raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat;13.5 30 | l ‘;14.6 31 | TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka;10.6 32 | MirnaPehčevoRopažiGus;16.7 33 | üSosnowiecTanauanMya;18.4 34 | ngoDübendorfC;11.7 35 | liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque;12.4 36 | burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak;21.5 37 | iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM;13.4 38 | lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe;22.5 39 | PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb;17.5 40 | CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa;3.0 41 | venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC;1.9 42 | ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon;14.6 43 | rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan;10.9 44 | oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe;17.3 45 | ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK;6.0 46 | en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi;11.9 47 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-dot.out: -------------------------------------------------------------------------------- 1 | {-=1.0/1.5/2.0, .=1.0/1.0/1.0} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-dot.txt: -------------------------------------------------------------------------------- 1 | .;1.0 2 | -;1.0 3 | -;2.0 4 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-rounding.out: -------------------------------------------------------------------------------- 1 | {ham=14.6/25.5/33.6, jel=-9.0/18.0/46.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-short.out: -------------------------------------------------------------------------------- 1 | {a=1.0/1.0/1.0, b=1.0/1.5/2.0} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-short.txt: -------------------------------------------------------------------------------- 1 | a;1.0 2 | b;1.0 3 | b;2.0 4 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-shortest.out: -------------------------------------------------------------------------------- 1 | {a=1.0/1.0/1.0} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-shortest.txt: -------------------------------------------------------------------------------- 1 | a;1.0 2 | --------------------------------------------------------------------------------