├── .gitattributes ├── .github ├── pull_request_template.md └── workflows │ └── maven.yml ├── .gitignore ├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── 1brc.png ├── LICENSE.txt ├── README.md ├── additional_build_step_thomaswue.sh ├── calculate_average_Edgar-P-Yan.sh ├── calculate_average_baseline.sh ├── cleanup.sh ├── create_measurements.sh ├── create_measurements2.sh ├── create_measurements3.sh ├── data └── weather_stations.csv ├── etc ├── eclipse-formatter-config.xml └── license.txt ├── eval.sh ├── evaluate.sh ├── mvnw ├── mvnw.cmd ├── pom.xml ├── prepare.sh ├── process.sh ├── process_output.java ├── src ├── main │ ├── java │ │ ├── dev │ │ │ └── morling │ │ │ │ └── onebrc │ │ │ │ ├── CalculateAverage.java │ │ │ │ ├── CreateMeasurements.java │ │ │ │ ├── CreateMeasurements2.java │ │ │ │ └── CreateMeasurements3.java │ │ └── org │ │ │ └── rschwietzke │ │ │ ├── CheaperCharBuffer.java │ │ │ └── FastRandom.java │ ├── nodejs │ │ ├── Edgar-P-yan │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── index.js │ │ │ ├── package-lock.json │ │ │ ├── package.json │ │ │ ├── result.txt │ │ │ └── tsconfig.json │ │ └── baseline │ │ │ ├── .gitignore │ │ │ ├── index.js │ │ │ ├── package-lock.json │ │ │ ├── package.json │ │ │ └── tsconfig.json │ └── resources │ │ └── .dontdelete └── test │ └── resources │ ├── .dontdelete │ └── samples │ ├── measurements-1.out │ ├── measurements-1.txt │ ├── measurements-10.out │ ├── measurements-10.txt │ ├── measurements-10000-unique-keys.out │ ├── measurements-10000-unique-keys.txt │ ├── measurements-2.out │ ├── measurements-2.txt │ ├── measurements-20.out │ ├── measurements-20.txt │ ├── measurements-3.out │ ├── measurements-3.txt │ ├── measurements-boundaries.out │ ├── measurements-boundaries.txt │ ├── measurements-complex-utf8.out │ └── measurements-complex-utf8.txt ├── test.sh ├── test_all.sh └── tocsv.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | *.java text 2 | *.md text 3 | *.yml text 4 | *.xml text 5 | *.gradle text 6 | *.properties text 7 | mvnw text eol=lf 8 | *.sh text eol=lf 9 | *.bat text eol=crlf 10 | *.cmd text eol=crlf 11 | *.jar binary 12 | 13 | src/test/resources/samples/*.txt text eol=lf 14 | src/test/resources/samples/*.out text eol=lf 15 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | #### Check List: 2 | - [ ] Tests pass (`./test.sh ` shows no differences between expected and actual outputs) 3 | - [ ] All formatting changes by the build are committed 4 | - [ ] Your launch script is named `calculate_average_.sh` (make sure to match casing of your GH user name) and is executable 5 | - [ ] Output matches that of `calculate_average_baseline.sh` 6 | * Execution time: 7 | * Execution time of reference implementation: 8 | 9 | 17 | -------------------------------------------------------------------------------- /.github/workflows/maven.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 The original authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name: Build 18 | 19 | on: 20 | push: 21 | branches: [ main ] 22 | pull_request: 23 | branches: [ main ] 24 | 25 | jobs: 26 | build: 27 | runs-on: ubuntu-latest 28 | 29 | steps: 30 | - name: 'Check out repository' 31 | uses: actions/checkout@v2 32 | with: 33 | submodules: 'true' 34 | 35 | - name: 'Set up Java' 36 | uses: actions/setup-java@v2 37 | with: 38 | java-version: 21 39 | distribution: 'temurin' 40 | 41 | - name: 'Cache Maven packages' 42 | uses: actions/cache@v3 43 | with: 44 | path: ~/.m2 45 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} 46 | restore-keys: ${{ runner.os }}-m2 47 | 48 | - name: 'Build project' 49 | run: mvn -B clean verify -Pci 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Maven 2 | target/ 3 | pom.xml.tag 4 | pom.xml.releaseBackup 5 | pom.xml.versionsBackup 6 | release.properties 7 | 8 | # Eclipse 9 | .project 10 | .classpath 11 | .settings/ 12 | bin/ 13 | 14 | # IntelliJ 15 | .idea 16 | *.ipr 17 | *.iml 18 | *.iws 19 | 20 | # NetBeans 21 | nb-configuration.xml 22 | 23 | # Visual Studio Code 24 | .vscode 25 | .factorypath 26 | 27 | # OSX 28 | .DS_Store 29 | 30 | # Vim 31 | *.swp 32 | *.swo 33 | 34 | # patch 35 | *.orig 36 | *.rej 37 | 38 | # Local environment 39 | .env 40 | 41 | #JReleaser 42 | out/ 43 | 44 | # 1BRC 45 | /measurements*.txt 46 | /*.out 47 | out_expected.txt 48 | node_modules 49 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1brc/nodejs/9975dfbfa32143fb7502e63374a48f9fde1c381d/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.4/apache-maven-3.8.4-bin.zip 18 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar 19 | -------------------------------------------------------------------------------- /1brc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1brc/nodejs/9975dfbfa32143fb7502e63374a48f9fde1c381d/1brc.png -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 1️⃣🐝🏎️ The One Billion Row Challenge with Node.js 2 | 3 | ## About the Challenge 4 | 5 | The One Billion Row Challenge (1BRC) is a fun exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. 6 | 7 | Later the community created a dedicated @1brc organization to pay more attention to the implementations in other languages. This repository contains and accepts Node.js based implementations. 8 | 9 | Grab all your (virtual) threads, reach out to SIMD, optimize your GC, or pull any other trick, and create the fastest implementation for solving this task! 10 | 11 | 1BRC 12 | 13 | The text file contains temperature values for a range of weather stations. 14 | Each row is one measurement in the format `;`, with the measurement value having exactly one fractional digit. 15 | The following shows ten rows as an example: 16 | 17 | ``` 18 | Hamburg;12.0 19 | Bulawayo;8.9 20 | Palembang;38.8 21 | St. John's;15.2 22 | Cracow;12.6 23 | Bridgetown;26.9 24 | Istanbul;6.2 25 | Roseau;34.4 26 | Conakry;31.2 27 | Istanbul;23.0 28 | ``` 29 | 30 | The task is to write a program which reads the file, calculates the min, mean, and max temperature value per weather station, and emits the results on stdout like this 31 | (i.e. sorted alphabetically by station name, and the result values per station in the format `//`, rounded to one fractional digit): 32 | 33 | ``` 34 | {Abha=-23.0/18.0/59.2, Abidjan=-16.2/26.0/67.3, Abéché=-10.0/29.4/69.0, Accra=-10.1/26.4/66.4, Addis Ababa=-23.7/16.0/67.0, Adelaide=-27.8/17.3/58.5, ...} 35 | ``` 36 | 37 | Submit your implementation and become part of the leaderboard! 38 | 39 | ## Results 40 | 41 | | # | Result (m:s.ms) | Implementation | Submitter | Notes | 42 | | --- | --------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------ | ---------------------------------------------------------------------------------- | 43 | | 1. | 00:23.000 | [link](https://github.com/1brc/nodejs/blob/main/src/main/nodejs/Edgar-P-yan/index.js) | [Edgar Pogosyan](https://github.com/Edgar-P-yan) | Multi-threaded, optimized parsing, input-specific `float` to `int` parser, no mmap | 44 | | | 06:16.000 | [link](https://github.com/1brc/nodejs/blob/main/src/main/nodejs/baseline/index.js) | [Edgar Pogosyan](https://github.com/Edgar-P-yan) | The baseline, single threaded, naive implementation | 45 | 46 | See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation. 47 | 48 | ## Prerequisites 49 | 50 | 1. [Java 21](https://openjdk.org/projects/jdk/21/) to generate the `measurements.txt` files and optionally run tests. 51 | 2. Node.js, preferably via nvm (node version manager) must be installed on your system. 52 | 53 | ## Running the Challenge 54 | 55 | This repository contains two programs: 56 | 57 | - `dev.morling.onebrc.CreateMeasurements` (invoked via _create_measurements.sh_): Creates the file _measurements.txt_ in the root directory of this project with a configurable number of random measurement values 58 | - `src/main/nodejs/baseline/index.js` (invoked via _calculate_average_baseline.sh_): Calculates the average values for the file _measurements.txt_ 59 | 60 | Execute the following steps to run the challenge: 61 | 62 | 1. Build the project using Apache Maven: 63 | 64 | ``` 65 | ./mvnw clean verify 66 | ``` 67 | 68 | 2. Create the measurements file with 1B rows (just once): 69 | 70 | ``` 71 | ./create_measurements.sh 1000000000 72 | ``` 73 | 74 | This will take a few minutes. 75 | **Attention:** the generated file has a size of approx. **12 GB**, so make sure to have enough diskspace. 76 | 77 | 3. Calculate the average measurement values: 78 | 79 | ``` 80 | ./calculate_average_baseline.sh 81 | ``` 82 | 83 | The provided naive example implementation uses the Node.js Streams for processing the file and completes the task in ~6m16s on environment used for [result evaluation](#evaluating-results). 84 | It serves as the base line for comparing your own implementation. 85 | 86 | 4. Optimize the heck out of it: 87 | 88 | Adjust the `src/main/nodejs/baseline/index.js` program to speed it up, in any way you see fit (just sticking to a few rules described below). 89 | Options include parallelizing the computation, memory-mapping different sections of the file concurrently, choosing and tuning the garbage collector, and much more. 90 | 91 | ## Flamegraph/Profiling 92 | 93 | > TODO: add instructions on how to profile node.js programs 94 | 95 | ## Rules and limits 96 | 97 | - No external library dependencies may be used 98 | 99 | - The computation must happen at application _runtime_, i.e. you cannot process the measurements file at _build time_ 100 | and just bake the result into the binary 101 | - Input value ranges are as follows: 102 | - Station name: non null UTF-8 string of min length 1 character and max length 100 bytes (i.e. this could be 100 one-byte characters, or 50 two-byte characters, etc.) 103 | - Temperature value: non null double between -99.9 (inclusive) and 99.9 (inclusive), always with one fractional digit 104 | - There is a maximum of 10,000 unique station names 105 | - Implementations must not rely on specifics of a given data set, e.g. any valid station name as per the constraints above and any data distribution (number of measurements per station) must be supported 106 | 107 | ## Entering the Challenge 108 | 109 | To submit your own implementation to 1BRC, follow these steps: 110 | 111 | - Create a fork of the [1brc/nodejs](https://github.com/1brc/nodejs/) GitHub repository. 112 | - Create a copy of `src/main/nodejs/baseline` directory, rename it to `src/main/nodejs/`, e.g. `src/main/nodejs/JohnDoe`. 113 | - Make that implementation fast. Really fast. 114 | - Create a copy of _calculate_average_baseline.sh_, named _calculate_average\_.sh_, e.g. _calculate_average_JohnDoe.sh_. 115 | - Adjust that script so that it references your implementation file. If needed, provide any Node.js/V8 runtime arguments. 116 | Make sure that script does not write anything to standard output other than calculation results. 117 | - Run the test suite by executing _/test.sh _; if any differences are reported, fix them before submitting your implementation. 118 | - Create a pull request against the upstream repository, clearly stating 119 | - The execution time of the program on your system and specs of the same (CPU, number of cores, RAM). This is for informative purposes only, the official runtime will be determined as described below. 120 | - I will run the program and determine its performance as described in the next section, and enter the result to the scoreboard. 121 | 122 | **Note:** I reserve the right to not evaluate specific submissions if I feel doubtful about the implementation (I.e. I won't run your Bitcoin miner ;). 123 | 124 | 127 | 128 | ## Evaluating Results 129 | 130 | For now results are determined by running the program on a Apple MacBook M1 32GB (10 physical). 131 | The `time` program is used for measuring execution times, i.e. end-to-end times are measured. 132 | Each contender will be run five times in a row. 133 | The slowest and the fastest runs are discarded. 134 | The mean value of the remaining three runs is the result for that contender and will be added to the results table above. 135 | The exact same _measurements.txt_ file is used for evaluating all contenders. 136 | 137 | 141 | 142 | 146 | 147 | ## FAQ 148 | 149 | 160 | 161 | _Q: What is the encoding of the measurements.txt file?_\ 162 | A: The file is encoded with UTF-8. 163 | 164 | _Q: Can I make assumptions on the names of the weather stations showing up in the data set?_\ 165 | A: No, while only a fixed set of station names is used by the data set generator, any solution should work with arbitrary UTF-8 station names 166 | (for the sake of simplicity, names are guaranteed to contain no `;` character). 167 | 168 | _Q: Can I copy code from other submissions?_\ 169 | A: Yes, you can. The primary focus of the challenge is about learning something new, rather than "winning". When you do so, please give credit to the relevant source submissions. Please don't re-submit other entries with no or only trivial improvements. 170 | 171 | _Q: Which operating system is used for evaluation?_\ 172 | A: macOS Sonoma 14 (see [Evaluating Results](#evaluating-results)) 173 | 174 | _Q: My solution runs in 2 sec on my machine. Am I the fastest 1BRC-er in the world?_\ 175 | A: Probably not :) 1BRC results are reported in wallclock time, thus results of different implementations are only comparable when obtained on the same machine. If for instance an implementation is faster on a 32 core workstation than on the 8 core evaluation instance, this doesn't allow for any conclusions. When sharing 1BRC results, you should also always share the result of running the baseline implementation on the same hardware. 176 | 177 | _Q: Why_ 1️⃣🐝🏎️ _?_\ 178 | A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. 179 | 180 | ## License 181 | 182 | This code base is available under the Apache License, version 2. 183 | 184 | ## Code of Conduct 185 | 186 | Be excellent to each other! 187 | More than winning, the purpose of this challenge is to have fun and learn something new. 188 | -------------------------------------------------------------------------------- /additional_build_step_thomaswue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | source "$HOME/.sdkman/bin/sdkman-init.sh" 19 | sdk use java 21.0.1-graal 1>&2 20 | NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview" 21 | native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o image_calculateaverage_thomaswue dev.morling.onebrc.CalculateAverage_thomaswue 22 | -------------------------------------------------------------------------------- /calculate_average_Edgar-P-Yan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | time node src/main/nodejs/Edgar-P-yan/index.js measurements.txt 19 | -------------------------------------------------------------------------------- /calculate_average_baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | time node src/main/nodejs/baseline/index.js measurements.txt 19 | -------------------------------------------------------------------------------- /cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | if [ -z "$1" ] 19 | then 20 | echo "Usage: cleanup.sh " 21 | exit 1 22 | fi 23 | 24 | git checkout . 25 | git checkout main 26 | git branch -D $1 27 | git pull upstream main 28 | -------------------------------------------------------------------------------- /create_measurements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements $1 20 | -------------------------------------------------------------------------------- /create_measurements2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements2 $1 20 | -------------------------------------------------------------------------------- /create_measurements3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements3 $1 20 | -------------------------------------------------------------------------------- /etc/license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023 The original authors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | if [ -z "$1" ] 19 | then 20 | echo "Usage: eval.sh " 21 | exit 1 22 | fi 23 | 24 | ./evaluate.sh $1 2>&1 | tee $1.out 25 | -------------------------------------------------------------------------------- /evaluate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | if [ -z "$1" ] 19 | then 20 | echo "Usage: evaluate.sh " 21 | exit 1 22 | fi 23 | 24 | java --version 25 | 26 | mvn clean verify 27 | 28 | rm -f measurements.txt 29 | ln -s measurements_1B.txt measurements.txt 30 | 31 | for i in {1..5} 32 | do 33 | ./calculate_average_$1.sh 34 | done 35 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Apache Maven Wrapper startup batch script, version 3.2.0 23 | # 24 | # Required ENV vars: 25 | # ------------------ 26 | # JAVA_HOME - location of a JDK home dir 27 | # 28 | # Optional ENV vars 29 | # ----------------- 30 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven 31 | # e.g. to debug Maven itself, use 32 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 33 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files 34 | # ---------------------------------------------------------------------------- 35 | 36 | if [ -z "$MAVEN_SKIP_RC" ] ; then 37 | 38 | if [ -f /usr/local/etc/mavenrc ] ; then 39 | . /usr/local/etc/mavenrc 40 | fi 41 | 42 | if [ -f /etc/mavenrc ] ; then 43 | . /etc/mavenrc 44 | fi 45 | 46 | if [ -f "$HOME/.mavenrc" ] ; then 47 | . "$HOME/.mavenrc" 48 | fi 49 | 50 | fi 51 | 52 | # OS specific support. $var _must_ be set to either true or false. 53 | cygwin=false; 54 | darwin=false; 55 | mingw=false 56 | case "$(uname)" in 57 | CYGWIN*) cygwin=true ;; 58 | MINGW*) mingw=true;; 59 | Darwin*) darwin=true 60 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home 61 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html 62 | if [ -z "$JAVA_HOME" ]; then 63 | if [ -x "/usr/libexec/java_home" ]; then 64 | JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME 65 | else 66 | JAVA_HOME="/Library/Java/Home"; export JAVA_HOME 67 | fi 68 | fi 69 | ;; 70 | esac 71 | 72 | if [ -z "$JAVA_HOME" ] ; then 73 | if [ -r /etc/gentoo-release ] ; then 74 | JAVA_HOME=$(java-config --jre-home) 75 | fi 76 | fi 77 | 78 | # For Cygwin, ensure paths are in UNIX format before anything is touched 79 | if $cygwin ; then 80 | [ -n "$JAVA_HOME" ] && 81 | JAVA_HOME=$(cygpath --unix "$JAVA_HOME") 82 | [ -n "$CLASSPATH" ] && 83 | CLASSPATH=$(cygpath --path --unix "$CLASSPATH") 84 | fi 85 | 86 | # For Mingw, ensure paths are in UNIX format before anything is touched 87 | if $mingw ; then 88 | [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] && 89 | JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)" 90 | fi 91 | 92 | if [ -z "$JAVA_HOME" ]; then 93 | javaExecutable="$(which javac)" 94 | if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then 95 | # readlink(1) is not available as standard on Solaris 10. 96 | readLink=$(which readlink) 97 | if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then 98 | if $darwin ; then 99 | javaHome="$(dirname "\"$javaExecutable\"")" 100 | javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac" 101 | else 102 | javaExecutable="$(readlink -f "\"$javaExecutable\"")" 103 | fi 104 | javaHome="$(dirname "\"$javaExecutable\"")" 105 | javaHome=$(expr "$javaHome" : '\(.*\)/bin') 106 | JAVA_HOME="$javaHome" 107 | export JAVA_HOME 108 | fi 109 | fi 110 | fi 111 | 112 | if [ -z "$JAVACMD" ] ; then 113 | if [ -n "$JAVA_HOME" ] ; then 114 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 115 | # IBM's JDK on AIX uses strange locations for the executables 116 | JAVACMD="$JAVA_HOME/jre/sh/java" 117 | else 118 | JAVACMD="$JAVA_HOME/bin/java" 119 | fi 120 | else 121 | JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)" 122 | fi 123 | fi 124 | 125 | if [ ! -x "$JAVACMD" ] ; then 126 | echo "Error: JAVA_HOME is not defined correctly." >&2 127 | echo " We cannot execute $JAVACMD" >&2 128 | exit 1 129 | fi 130 | 131 | if [ -z "$JAVA_HOME" ] ; then 132 | echo "Warning: JAVA_HOME environment variable is not set." 133 | fi 134 | 135 | # traverses directory structure from process work directory to filesystem root 136 | # first directory with .mvn subdirectory is considered project base directory 137 | find_maven_basedir() { 138 | if [ -z "$1" ] 139 | then 140 | echo "Path not specified to find_maven_basedir" 141 | return 1 142 | fi 143 | 144 | basedir="$1" 145 | wdir="$1" 146 | while [ "$wdir" != '/' ] ; do 147 | if [ -d "$wdir"/.mvn ] ; then 148 | basedir=$wdir 149 | break 150 | fi 151 | # workaround for JBEAP-8937 (on Solaris 10/Sparc) 152 | if [ -d "${wdir}" ]; then 153 | wdir=$(cd "$wdir/.." || exit 1; pwd) 154 | fi 155 | # end of workaround 156 | done 157 | printf '%s' "$(cd "$basedir" || exit 1; pwd)" 158 | } 159 | 160 | # concatenates all lines of a file 161 | concat_lines() { 162 | if [ -f "$1" ]; then 163 | # Remove \r in case we run on Windows within Git Bash 164 | # and check out the repository with auto CRLF management 165 | # enabled. Otherwise, we may read lines that are delimited with 166 | # \r\n and produce $'-Xarg\r' rather than -Xarg due to word 167 | # splitting rules. 168 | tr -s '\r\n' ' ' < "$1" 169 | fi 170 | } 171 | 172 | log() { 173 | if [ "$MVNW_VERBOSE" = true ]; then 174 | printf '%s\n' "$1" 175 | fi 176 | } 177 | 178 | BASE_DIR=$(find_maven_basedir "$(dirname "$0")") 179 | if [ -z "$BASE_DIR" ]; then 180 | exit 1; 181 | fi 182 | 183 | MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR 184 | log "$MAVEN_PROJECTBASEDIR" 185 | 186 | ########################################################################################## 187 | # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 188 | # This allows using the maven wrapper in projects that prohibit checking in binary data. 189 | ########################################################################################## 190 | wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" 191 | if [ -r "$wrapperJarPath" ]; then 192 | log "Found $wrapperJarPath" 193 | else 194 | log "Couldn't find $wrapperJarPath, downloading it ..." 195 | 196 | if [ -n "$MVNW_REPOURL" ]; then 197 | wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" 198 | else 199 | wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" 200 | fi 201 | while IFS="=" read -r key value; do 202 | # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' ) 203 | safeValue=$(echo "$value" | tr -d '\r') 204 | case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;; 205 | esac 206 | done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" 207 | log "Downloading from: $wrapperUrl" 208 | 209 | if $cygwin; then 210 | wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath") 211 | fi 212 | 213 | if command -v wget > /dev/null; then 214 | log "Found wget ... using wget" 215 | [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet" 216 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 217 | wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" 218 | else 219 | wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" 220 | fi 221 | elif command -v curl > /dev/null; then 222 | log "Found curl ... using curl" 223 | [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent" 224 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 225 | curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" 226 | else 227 | curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" 228 | fi 229 | else 230 | log "Falling back to using Java to download" 231 | javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java" 232 | javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class" 233 | # For Cygwin, switch paths to Windows format before running javac 234 | if $cygwin; then 235 | javaSource=$(cygpath --path --windows "$javaSource") 236 | javaClass=$(cygpath --path --windows "$javaClass") 237 | fi 238 | if [ -e "$javaSource" ]; then 239 | if [ ! -e "$javaClass" ]; then 240 | log " - Compiling MavenWrapperDownloader.java ..." 241 | ("$JAVA_HOME/bin/javac" "$javaSource") 242 | fi 243 | if [ -e "$javaClass" ]; then 244 | log " - Running MavenWrapperDownloader.java ..." 245 | ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath" 246 | fi 247 | fi 248 | fi 249 | fi 250 | ########################################################################################## 251 | # End of extension 252 | ########################################################################################## 253 | 254 | # If specified, validate the SHA-256 sum of the Maven wrapper jar file 255 | wrapperSha256Sum="" 256 | while IFS="=" read -r key value; do 257 | case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;; 258 | esac 259 | done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" 260 | if [ -n "$wrapperSha256Sum" ]; then 261 | wrapperSha256Result=false 262 | if command -v sha256sum > /dev/null; then 263 | if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then 264 | wrapperSha256Result=true 265 | fi 266 | elif command -v shasum > /dev/null; then 267 | if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then 268 | wrapperSha256Result=true 269 | fi 270 | else 271 | echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." 272 | echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties." 273 | exit 1 274 | fi 275 | if [ $wrapperSha256Result = false ]; then 276 | echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2 277 | echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2 278 | echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2 279 | exit 1 280 | fi 281 | fi 282 | 283 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" 284 | 285 | # For Cygwin, switch paths to Windows format before running java 286 | if $cygwin; then 287 | [ -n "$JAVA_HOME" ] && 288 | JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME") 289 | [ -n "$CLASSPATH" ] && 290 | CLASSPATH=$(cygpath --path --windows "$CLASSPATH") 291 | [ -n "$MAVEN_PROJECTBASEDIR" ] && 292 | MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR") 293 | fi 294 | 295 | # Provide a "standardized" way to retrieve the CLI args that will 296 | # work with both Windows and non-Windows executions. 297 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*" 298 | export MAVEN_CMD_LINE_ARGS 299 | 300 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 301 | 302 | # shellcheck disable=SC2086 # safe args 303 | exec "$JAVACMD" \ 304 | $MAVEN_OPTS \ 305 | $MAVEN_DEBUG_OPTS \ 306 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ 307 | "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ 308 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" 309 | -------------------------------------------------------------------------------- /mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM ---------------------------------------------------------------------------- 2 | @REM Licensed to the Apache Software Foundation (ASF) under one 3 | @REM or more contributor license agreements. See the NOTICE file 4 | @REM distributed with this work for additional information 5 | @REM regarding copyright ownership. The ASF licenses this file 6 | @REM to you under the Apache License, Version 2.0 (the 7 | @REM "License"); you may not use this file except in compliance 8 | @REM with the License. You may obtain a copy of the License at 9 | @REM 10 | @REM http://www.apache.org/licenses/LICENSE-2.0 11 | @REM 12 | @REM Unless required by applicable law or agreed to in writing, 13 | @REM software distributed under the License is distributed on an 14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | @REM KIND, either express or implied. See the License for the 16 | @REM specific language governing permissions and limitations 17 | @REM under the License. 18 | @REM ---------------------------------------------------------------------------- 19 | 20 | @REM ---------------------------------------------------------------------------- 21 | @REM Apache Maven Wrapper startup batch script, version 3.2.0 22 | @REM 23 | @REM Required ENV vars: 24 | @REM JAVA_HOME - location of a JDK home dir 25 | @REM 26 | @REM Optional ENV vars 27 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 28 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending 29 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 30 | @REM e.g. to debug Maven itself, use 31 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 32 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 33 | @REM ---------------------------------------------------------------------------- 34 | 35 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 36 | @echo off 37 | @REM set title of command window 38 | title %0 39 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' 40 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 41 | 42 | @REM set %HOME% to equivalent of $HOME 43 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 44 | 45 | @REM Execute a user defined script before this one 46 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 47 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 48 | if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %* 49 | if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %* 50 | :skipRcPre 51 | 52 | @setlocal 53 | 54 | set ERROR_CODE=0 55 | 56 | @REM To isolate internal variables from possible post scripts, we use another setlocal 57 | @setlocal 58 | 59 | @REM ==== START VALIDATION ==== 60 | if not "%JAVA_HOME%" == "" goto OkJHome 61 | 62 | echo. 63 | echo Error: JAVA_HOME not found in your environment. >&2 64 | echo Please set the JAVA_HOME variable in your environment to match the >&2 65 | echo location of your Java installation. >&2 66 | echo. 67 | goto error 68 | 69 | :OkJHome 70 | if exist "%JAVA_HOME%\bin\java.exe" goto init 71 | 72 | echo. 73 | echo Error: JAVA_HOME is set to an invalid directory. >&2 74 | echo JAVA_HOME = "%JAVA_HOME%" >&2 75 | echo Please set the JAVA_HOME variable in your environment to match the >&2 76 | echo location of your Java installation. >&2 77 | echo. 78 | goto error 79 | 80 | @REM ==== END VALIDATION ==== 81 | 82 | :init 83 | 84 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 85 | @REM Fallback to current working directory if not found. 86 | 87 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 88 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 89 | 90 | set EXEC_DIR=%CD% 91 | set WDIR=%EXEC_DIR% 92 | :findBaseDir 93 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 94 | cd .. 95 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 96 | set WDIR=%CD% 97 | goto findBaseDir 98 | 99 | :baseDirFound 100 | set MAVEN_PROJECTBASEDIR=%WDIR% 101 | cd "%EXEC_DIR%" 102 | goto endDetectBaseDir 103 | 104 | :baseDirNotFound 105 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 106 | cd "%EXEC_DIR%" 107 | 108 | :endDetectBaseDir 109 | 110 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 111 | 112 | @setlocal EnableExtensions EnableDelayedExpansion 113 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 114 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 115 | 116 | :endReadAdditionalConfig 117 | 118 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 119 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 120 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 121 | 122 | set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" 123 | 124 | FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 125 | IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B 126 | ) 127 | 128 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 129 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data. 130 | if exist %WRAPPER_JAR% ( 131 | if "%MVNW_VERBOSE%" == "true" ( 132 | echo Found %WRAPPER_JAR% 133 | ) 134 | ) else ( 135 | if not "%MVNW_REPOURL%" == "" ( 136 | SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" 137 | ) 138 | if "%MVNW_VERBOSE%" == "true" ( 139 | echo Couldn't find %WRAPPER_JAR%, downloading it ... 140 | echo Downloading from: %WRAPPER_URL% 141 | ) 142 | 143 | powershell -Command "&{"^ 144 | "$webclient = new-object System.Net.WebClient;"^ 145 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ 146 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ 147 | "}"^ 148 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^ 149 | "}" 150 | if "%MVNW_VERBOSE%" == "true" ( 151 | echo Finished downloading %WRAPPER_JAR% 152 | ) 153 | ) 154 | @REM End of extension 155 | 156 | @REM If specified, validate the SHA-256 sum of the Maven wrapper jar file 157 | SET WRAPPER_SHA_256_SUM="" 158 | FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 159 | IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B 160 | ) 161 | IF NOT %WRAPPER_SHA_256_SUM%=="" ( 162 | powershell -Command "&{"^ 163 | "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^ 164 | "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^ 165 | " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^ 166 | " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^ 167 | " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^ 168 | " exit 1;"^ 169 | "}"^ 170 | "}" 171 | if ERRORLEVEL 1 goto error 172 | ) 173 | 174 | @REM Provide a "standardized" way to retrieve the CLI args that will 175 | @REM work with both Windows and non-Windows executions. 176 | set MAVEN_CMD_LINE_ARGS=%* 177 | 178 | %MAVEN_JAVA_EXE% ^ 179 | %JVM_CONFIG_MAVEN_PROPS% ^ 180 | %MAVEN_OPTS% ^ 181 | %MAVEN_DEBUG_OPTS% ^ 182 | -classpath %WRAPPER_JAR% ^ 183 | "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^ 184 | %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 185 | if ERRORLEVEL 1 goto error 186 | goto end 187 | 188 | :error 189 | set ERROR_CODE=1 190 | 191 | :end 192 | @endlocal & set ERROR_CODE=%ERROR_CODE% 193 | 194 | if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost 195 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 196 | if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat" 197 | if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd" 198 | :skipRcPost 199 | 200 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 201 | if "%MAVEN_BATCH_PAUSE%"=="on" pause 202 | 203 | if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE% 204 | 205 | cmd /C exit /B %ERROR_CODE% 206 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | 22 | dev.morling.demos 23 | average 24 | 1.0.0-SNAPSHOT 25 | 26 | 27 | true 28 | 21 29 | UTF-8 30 | UTF-8 31 | 32 | 33 | My OSS Project 34 | My Latest OSS Project 35 | 2021 36 | tbd. 37 | 38 | 39 | 40 | Apache-2.0 41 | http://www.apache.org/licenses/LICENSE-2.0.txt 42 | repo 43 | 44 | 45 | 46 | 47 | 48 | 49 | org.junit 50 | junit-bom 51 | 5.8.1 52 | pom 53 | import 54 | 55 | 56 | 57 | 58 | 59 | 60 | org.junit.jupiter 61 | junit-jupiter 62 | test 63 | 64 | 65 | org.assertj 66 | assertj-core 67 | 3.21.0 68 | test 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | com.mycila 78 | license-maven-plugin 79 | 4.1 80 | 81 | 82 | net.revelc.code.formatter 83 | formatter-maven-plugin 84 | 2.16.0 85 | 86 | etc/eclipse-formatter-config.xml 87 | 88 | 89 | 90 | net.revelc.code 91 | impsort-maven-plugin 92 | 1.9.0 93 | 94 | java.,javax.,org.,com. 95 | true 96 | true 97 | 98 | 99 | 100 | org.apache.maven.plugins 101 | maven-compiler-plugin 102 | 3.12.1 103 | 104 | true 105 | 106 | 111 | --enable-preview 112 | --add-modules 113 | java.base,jdk.incubator.vector 114 | 115 | 116 | 117 | 118 | org.apache.maven.plugins 119 | maven-clean-plugin 120 | 3.1.0 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-deploy-plugin 125 | 3.1.1 126 | 127 | 128 | org.apache.maven.plugins 129 | maven-enforcer-plugin 130 | 3.3.0 131 | 132 | 133 | org.apache.maven.plugins 134 | maven-install-plugin 135 | 3.1.1 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-jar-plugin 140 | 3.2.0 141 | 142 | 143 | org.apache.maven.plugins 144 | maven-resources-plugin 145 | 3.2.0 146 | 147 | 148 | org.apache.maven.plugins 149 | maven-site-plugin 150 | 3.12.1 151 | 152 | 153 | org.apache.maven.plugins 154 | maven-surefire-plugin 155 | 3.2.3 156 | 157 | 158 | org.apache.maven.plugins 159 | maven-wrapper-plugin 160 | 3.2.0 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | ci 169 | 170 | 171 | 172 | net.revelc.code.formatter 173 | formatter-maven-plugin 174 | 175 | 176 | validate-format 177 | 178 | validate 179 | 180 | validate 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | qa 189 | 190 | 191 | !quick 192 | 193 | 194 | 195 | 196 | 197 | com.mycila 198 | license-maven-plugin 199 | 200 |
etc/license.txt
201 | true 202 | true 203 | 204 | LICENSE.txt 205 | **/.dontdelete 206 | **/measurements*.txt 207 | **/measurements*.out 208 | out_expected.txt 209 | 210 |
211 | 212 | 213 | 214 | check 215 | 216 | 217 | 218 |
219 | 220 | net.revelc.code.formatter 221 | formatter-maven-plugin 222 | 223 | 224 | format 225 | 226 | format 227 | 228 | process-sources 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | org.apache.maven.plugins 247 | maven-enforcer-plugin 248 | 249 | 250 | enforce-plugin-versions 251 | 252 | enforce 253 | 254 | 255 | 256 | 257 | ${maven.compiler.release} 258 | 259 | 260 | true 261 | true 262 | true 263 | clean,deploy,site 264 | 265 | 266 | 267 | 268 | 269 | 270 |
271 |
272 |
273 | 274 | quick 275 | 276 | 277 | quick 278 | 279 | 280 | 281 | true 282 | 283 | 284 |
285 |
286 | -------------------------------------------------------------------------------- /prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -x 19 | 20 | if [ -z "$1" ] 21 | then 22 | echo "Usage: prepare.sh :" 23 | exit 1 24 | fi 25 | 26 | parts=(${1//:/ }) 27 | echo " User: ${parts[0]}" 28 | echo "Branch: ${parts[1]}" 29 | 30 | git branch -D ${parts[0]} &>/dev/null 31 | 32 | git checkout -b ${parts[0]} 33 | git fetch https://github.com/${parts[0]}/1brc.git ${parts[1]} 34 | # git fetch git@github.com:${parts[0]}/1brc.git ${parts[1]} 35 | git reset --hard FETCH_HEAD 36 | git rebase main 37 | -------------------------------------------------------------------------------- /process.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | if [ -z "$1" ] 19 | then 20 | echo "Usage: process_output.sh " 21 | exit 1 22 | fi 23 | 24 | java --enable-preview --source=21 process_output.java out_expected.txt $1.out 25 | -------------------------------------------------------------------------------- /process_output.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import java.nio.file.Files; 18 | import java.nio.file.Paths; 19 | import java.util.ArrayList; 20 | import java.util.Comparator; 21 | import java.util.List; 22 | import java.util.stream.Collectors; 23 | import java.time.Duration; 24 | 25 | public class process_output { 26 | 27 | public static void main(String... args) throws Exception { 28 | String expectedFile = args[0]; 29 | String actualFile = args[1]; 30 | 31 | String expected = new String(Files.readAllBytes(Paths.get(expectedFile))); 32 | List times = new ArrayList<>(); 33 | 34 | var outputLines = Files.lines(Paths.get(actualFile)) 35 | .collect(Collectors.toList()); 36 | 37 | int matched = 0; 38 | 39 | for (String line : outputLines) { 40 | if (line.contains("Hamburg")) { 41 | if (!line.equals(expected)) { 42 | System.err.println("FAILURE Unexpected output"); 43 | System.err.println(line); 44 | } 45 | else { 46 | matched++; 47 | } 48 | } 49 | else if (line.startsWith("real")) { 50 | times.add(line); 51 | } 52 | } 53 | 54 | if (matched == 5) { 55 | System.out.println("OK Output matched"); 56 | } 57 | else { 58 | System.err.println("FAILURE Output didn't match"); 59 | } 60 | 61 | System.out.println(); 62 | System.out.println(actualFile); 63 | 64 | System.out.println(times.stream() 65 | .map(t -> t.substring(5)) 66 | .map(t -> t.replace("s", "").replace("m", ":")) 67 | .collect(Collectors.joining(System.lineSeparator()))); 68 | 69 | var asDurations = times.stream() 70 | .map(t -> t.substring(5)) 71 | .map(t -> t.replace("s", "S").replace("m", "M")) 72 | .map(t -> "PT" + t) 73 | .map(Duration::parse) 74 | .collect(Collectors.toList()); 75 | 76 | var min = asDurations.stream().min(Comparator.naturalOrder()).get(); 77 | var max = asDurations.stream().max(Comparator.naturalOrder()).get(); 78 | 79 | var evaluated = asDurations.stream() 80 | .filter(d -> d != min && d != max) 81 | .collect(Collectors.toList()); 82 | 83 | var mean = evaluated.get(0).plus(evaluated.get(1)).plus(evaluated.get(2)).dividedBy(3); 84 | var result = String.format("%02d:%02d.%.0f", mean.toMinutesPart(), mean.toSecondsPart(), (double) mean.toNanosPart() / 1_000_000); 85 | var author = actualFile.replace(".out", ""); 86 | 87 | System.out.println(String.format("\n| | %s| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_%s.java)| 21.0.1-open | [%s](https://github.com/%s)|", result, author, author, author)); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CalculateAverage.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import static java.util.stream.Collectors.*; 19 | 20 | import java.io.IOException; 21 | import java.nio.file.Files; 22 | import java.nio.file.Paths; 23 | import java.util.Map; 24 | import java.util.TreeMap; 25 | import java.util.stream.Collector; 26 | 27 | public class CalculateAverage { 28 | 29 | private static final String FILE = "./measurements.txt"; 30 | 31 | private static record Measurement(String station, double value) { 32 | private Measurement(String[] parts) { 33 | this(parts[0], Double.parseDouble(parts[1])); 34 | } 35 | } 36 | 37 | private static record ResultRow(double min, double mean, double max) { 38 | public String toString() { 39 | return round(min) + "/" + round(mean) + "/" + round(max); 40 | } 41 | 42 | private double round(double value) { 43 | return Math.round(value * 10.0) / 10.0; 44 | } 45 | }; 46 | 47 | private static class MeasurementAggregator { 48 | private double min = Double.POSITIVE_INFINITY; 49 | private double max = Double.NEGATIVE_INFINITY; 50 | private double sum; 51 | private long count; 52 | } 53 | 54 | public static void main(String[] args) throws IOException { 55 | // Map measurements1 = Files.lines(Paths.get(FILE)) 56 | // .map(l -> l.split(";")) 57 | // .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); 58 | // 59 | // measurements1 = new TreeMap<>(measurements1.entrySet() 60 | // .stream() 61 | // .collect(toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); 62 | // System.out.println(measurements1); 63 | 64 | Collector collector = Collector.of( 65 | MeasurementAggregator::new, 66 | (a, m) -> { 67 | a.min = Math.min(a.min, m.value); 68 | a.max = Math.max(a.max, m.value); 69 | a.sum += m.value; 70 | a.count++; 71 | }, 72 | (agg1, agg2) -> { 73 | var res = new MeasurementAggregator(); 74 | res.min = Math.min(agg1.min, agg2.min); 75 | res.max = Math.max(agg1.max, agg2.max); 76 | res.sum = agg1.sum + agg2.sum; 77 | res.count = agg1.count + agg2.count; 78 | 79 | return res; 80 | }, 81 | agg -> { 82 | return new ResultRow(agg.min, agg.sum / agg.count, agg.max); 83 | }); 84 | 85 | Map measurements = new TreeMap<>(Files.lines(Paths.get(FILE)) 86 | .map(l -> new Measurement(l.split(";"))) 87 | .collect(groupingBy(m -> m.station(), collector))); 88 | 89 | System.out.println(measurements); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CreateMeasurements.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.BufferedWriter; 19 | import java.nio.file.Files; 20 | import java.nio.file.Path; 21 | import java.util.List; 22 | import java.util.concurrent.ThreadLocalRandom; 23 | 24 | public class CreateMeasurements { 25 | 26 | private static final Path MEASUREMENT_FILE = Path.of("./measurements.txt"); 27 | 28 | private record WeatherStation(String id, double meanTemperature) { 29 | double measurement() { 30 | double m = ThreadLocalRandom.current().nextGaussian(meanTemperature, 10); 31 | return Math.round(m * 10.0) / 10.0; 32 | } 33 | } 34 | 35 | public static void main(String[] args) throws Exception { 36 | long start = System.currentTimeMillis(); 37 | 38 | if (args.length != 1) { 39 | System.out.println("Usage: create_measurements.sh "); 40 | System.exit(1); 41 | } 42 | 43 | int size = 0; 44 | try { 45 | size = Integer.parseInt(args[0]); 46 | } 47 | catch (NumberFormatException e) { 48 | System.out.println("Invalid value for "); 49 | System.out.println("Usage: CreateMeasurements "); 50 | System.exit(1); 51 | } 52 | 53 | // @formatter:off 54 | // data from https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature; 55 | // converted using https://wikitable2csv.ggor.de/ 56 | // brought to form using DuckDB: 57 | // D copy ( 58 | // select City, regexp_extract(Year,'(.*)\n.*', 1) as AverageTemp 59 | // from ( 60 | // select City,Year 61 | // from read_csv_auto('List_of_cities_by_average_temperature_1.csv', header = true) 62 | // union 63 | // select City,Year 64 | // from read_csv_auto('List_of_cities_by_average_temperature_2.csv', header = true) 65 | // union 66 | // select City,Year 67 | // from read_csv_auto('List_of_cities_by_average_temperature_3.csv', header = true) 68 | // union 69 | // select City,Year 70 | // from read_csv_auto('List_of_cities_by_average_temperature_4.csv', header = true) 71 | // union 72 | // select City,Year 73 | // from read_csv_auto('List_of_cities_by_average_temperature_5.csv', header = true) 74 | // ) 75 | // ) TO 'output.csv' (HEADER, DELIMITER ','); 76 | // @formatter:on 77 | List stations = List.of( 78 | new WeatherStation("Abha", 18.0), 79 | new WeatherStation("Abidjan", 26.0), 80 | new WeatherStation("Abéché", 29.4), 81 | new WeatherStation("Accra", 26.4), 82 | new WeatherStation("Addis Ababa", 16.0), 83 | new WeatherStation("Adelaide", 17.3), 84 | new WeatherStation("Aden", 29.1), 85 | new WeatherStation("Ahvaz", 25.4), 86 | new WeatherStation("Albuquerque", 14.0), 87 | new WeatherStation("Alexandra", 11.0), 88 | new WeatherStation("Alexandria", 20.0), 89 | new WeatherStation("Algiers", 18.2), 90 | new WeatherStation("Alice Springs", 21.0), 91 | new WeatherStation("Almaty", 10.0), 92 | new WeatherStation("Amsterdam", 10.2), 93 | new WeatherStation("Anadyr", -6.9), 94 | new WeatherStation("Anchorage", 2.8), 95 | new WeatherStation("Andorra la Vella", 9.8), 96 | new WeatherStation("Ankara", 12.0), 97 | new WeatherStation("Antananarivo", 17.9), 98 | new WeatherStation("Antsiranana", 25.2), 99 | new WeatherStation("Arkhangelsk", 1.3), 100 | new WeatherStation("Ashgabat", 17.1), 101 | new WeatherStation("Asmara", 15.6), 102 | new WeatherStation("Assab", 30.5), 103 | new WeatherStation("Astana", 3.5), 104 | new WeatherStation("Athens", 19.2), 105 | new WeatherStation("Atlanta", 17.0), 106 | new WeatherStation("Auckland", 15.2), 107 | new WeatherStation("Austin", 20.7), 108 | new WeatherStation("Baghdad", 22.77), 109 | new WeatherStation("Baguio", 19.5), 110 | new WeatherStation("Baku", 15.1), 111 | new WeatherStation("Baltimore", 13.1), 112 | new WeatherStation("Bamako", 27.8), 113 | new WeatherStation("Bangkok", 28.6), 114 | new WeatherStation("Bangui", 26.0), 115 | new WeatherStation("Banjul", 26.0), 116 | new WeatherStation("Barcelona", 18.2), 117 | new WeatherStation("Bata", 25.1), 118 | new WeatherStation("Batumi", 14.0), 119 | new WeatherStation("Beijing", 12.9), 120 | new WeatherStation("Beirut", 20.9), 121 | new WeatherStation("Belgrade", 12.5), 122 | new WeatherStation("Belize City", 26.7), 123 | new WeatherStation("Benghazi", 19.9), 124 | new WeatherStation("Bergen", 7.7), 125 | new WeatherStation("Berlin", 10.3), 126 | new WeatherStation("Bilbao", 14.7), 127 | new WeatherStation("Birao", 26.5), 128 | new WeatherStation("Bishkek", 11.3), 129 | new WeatherStation("Bissau", 27.0), 130 | new WeatherStation("Blantyre", 22.2), 131 | new WeatherStation("Bloemfontein", 15.6), 132 | new WeatherStation("Boise", 11.4), 133 | new WeatherStation("Bordeaux", 14.2), 134 | new WeatherStation("Bosaso", 30.0), 135 | new WeatherStation("Boston", 10.9), 136 | new WeatherStation("Bouaké", 26.0), 137 | new WeatherStation("Bratislava", 10.5), 138 | new WeatherStation("Brazzaville", 25.0), 139 | new WeatherStation("Bridgetown", 27.0), 140 | new WeatherStation("Brisbane", 21.4), 141 | new WeatherStation("Brussels", 10.5), 142 | new WeatherStation("Bucharest", 10.8), 143 | new WeatherStation("Budapest", 11.3), 144 | new WeatherStation("Bujumbura", 23.8), 145 | new WeatherStation("Bulawayo", 18.9), 146 | new WeatherStation("Burnie", 13.1), 147 | new WeatherStation("Busan", 15.0), 148 | new WeatherStation("Cabo San Lucas", 23.9), 149 | new WeatherStation("Cairns", 25.0), 150 | new WeatherStation("Cairo", 21.4), 151 | new WeatherStation("Calgary", 4.4), 152 | new WeatherStation("Canberra", 13.1), 153 | new WeatherStation("Cape Town", 16.2), 154 | new WeatherStation("Changsha", 17.4), 155 | new WeatherStation("Charlotte", 16.1), 156 | new WeatherStation("Chiang Mai", 25.8), 157 | new WeatherStation("Chicago", 9.8), 158 | new WeatherStation("Chihuahua", 18.6), 159 | new WeatherStation("Chișinău", 10.2), 160 | new WeatherStation("Chittagong", 25.9), 161 | new WeatherStation("Chongqing", 18.6), 162 | new WeatherStation("Christchurch", 12.2), 163 | new WeatherStation("City of San Marino", 11.8), 164 | new WeatherStation("Colombo", 27.4), 165 | new WeatherStation("Columbus", 11.7), 166 | new WeatherStation("Conakry", 26.4), 167 | new WeatherStation("Copenhagen", 9.1), 168 | new WeatherStation("Cotonou", 27.2), 169 | new WeatherStation("Cracow", 9.3), 170 | new WeatherStation("Da Lat", 17.9), 171 | new WeatherStation("Da Nang", 25.8), 172 | new WeatherStation("Dakar", 24.0), 173 | new WeatherStation("Dallas", 19.0), 174 | new WeatherStation("Damascus", 17.0), 175 | new WeatherStation("Dampier", 26.4), 176 | new WeatherStation("Dar es Salaam", 25.8), 177 | new WeatherStation("Darwin", 27.6), 178 | new WeatherStation("Denpasar", 23.7), 179 | new WeatherStation("Denver", 10.4), 180 | new WeatherStation("Detroit", 10.0), 181 | new WeatherStation("Dhaka", 25.9), 182 | new WeatherStation("Dikson", -11.1), 183 | new WeatherStation("Dili", 26.6), 184 | new WeatherStation("Djibouti", 29.9), 185 | new WeatherStation("Dodoma", 22.7), 186 | new WeatherStation("Dolisie", 24.0), 187 | new WeatherStation("Douala", 26.7), 188 | new WeatherStation("Dubai", 26.9), 189 | new WeatherStation("Dublin", 9.8), 190 | new WeatherStation("Dunedin", 11.1), 191 | new WeatherStation("Durban", 20.6), 192 | new WeatherStation("Dushanbe", 14.7), 193 | new WeatherStation("Edinburgh", 9.3), 194 | new WeatherStation("Edmonton", 4.2), 195 | new WeatherStation("El Paso", 18.1), 196 | new WeatherStation("Entebbe", 21.0), 197 | new WeatherStation("Erbil", 19.5), 198 | new WeatherStation("Erzurum", 5.1), 199 | new WeatherStation("Fairbanks", -2.3), 200 | new WeatherStation("Fianarantsoa", 17.9), 201 | new WeatherStation("Flores, Petén", 26.4), 202 | new WeatherStation("Frankfurt", 10.6), 203 | new WeatherStation("Fresno", 17.9), 204 | new WeatherStation("Fukuoka", 17.0), 205 | new WeatherStation("Gabès", 19.5), 206 | new WeatherStation("Gaborone", 21.0), 207 | new WeatherStation("Gagnoa", 26.0), 208 | new WeatherStation("Gangtok", 15.2), 209 | new WeatherStation("Garissa", 29.3), 210 | new WeatherStation("Garoua", 28.3), 211 | new WeatherStation("George Town", 27.9), 212 | new WeatherStation("Ghanzi", 21.4), 213 | new WeatherStation("Gjoa Haven", -14.4), 214 | new WeatherStation("Guadalajara", 20.9), 215 | new WeatherStation("Guangzhou", 22.4), 216 | new WeatherStation("Guatemala City", 20.4), 217 | new WeatherStation("Halifax", 7.5), 218 | new WeatherStation("Hamburg", 9.7), 219 | new WeatherStation("Hamilton", 13.8), 220 | new WeatherStation("Hanga Roa", 20.5), 221 | new WeatherStation("Hanoi", 23.6), 222 | new WeatherStation("Harare", 18.4), 223 | new WeatherStation("Harbin", 5.0), 224 | new WeatherStation("Hargeisa", 21.7), 225 | new WeatherStation("Hat Yai", 27.0), 226 | new WeatherStation("Havana", 25.2), 227 | new WeatherStation("Helsinki", 5.9), 228 | new WeatherStation("Heraklion", 18.9), 229 | new WeatherStation("Hiroshima", 16.3), 230 | new WeatherStation("Ho Chi Minh City", 27.4), 231 | new WeatherStation("Hobart", 12.7), 232 | new WeatherStation("Hong Kong", 23.3), 233 | new WeatherStation("Honiara", 26.5), 234 | new WeatherStation("Honolulu", 25.4), 235 | new WeatherStation("Houston", 20.8), 236 | new WeatherStation("Ifrane", 11.4), 237 | new WeatherStation("Indianapolis", 11.8), 238 | new WeatherStation("Iqaluit", -9.3), 239 | new WeatherStation("Irkutsk", 1.0), 240 | new WeatherStation("Istanbul", 13.9), 241 | new WeatherStation("İzmir", 17.9), 242 | new WeatherStation("Jacksonville", 20.3), 243 | new WeatherStation("Jakarta", 26.7), 244 | new WeatherStation("Jayapura", 27.0), 245 | new WeatherStation("Jerusalem", 18.3), 246 | new WeatherStation("Johannesburg", 15.5), 247 | new WeatherStation("Jos", 22.8), 248 | new WeatherStation("Juba", 27.8), 249 | new WeatherStation("Kabul", 12.1), 250 | new WeatherStation("Kampala", 20.0), 251 | new WeatherStation("Kandi", 27.7), 252 | new WeatherStation("Kankan", 26.5), 253 | new WeatherStation("Kano", 26.4), 254 | new WeatherStation("Kansas City", 12.5), 255 | new WeatherStation("Karachi", 26.0), 256 | new WeatherStation("Karonga", 24.4), 257 | new WeatherStation("Kathmandu", 18.3), 258 | new WeatherStation("Khartoum", 29.9), 259 | new WeatherStation("Kingston", 27.4), 260 | new WeatherStation("Kinshasa", 25.3), 261 | new WeatherStation("Kolkata", 26.7), 262 | new WeatherStation("Kuala Lumpur", 27.3), 263 | new WeatherStation("Kumasi", 26.0), 264 | new WeatherStation("Kunming", 15.7), 265 | new WeatherStation("Kuopio", 3.4), 266 | new WeatherStation("Kuwait City", 25.7), 267 | new WeatherStation("Kyiv", 8.4), 268 | new WeatherStation("Kyoto", 15.8), 269 | new WeatherStation("La Ceiba", 26.2), 270 | new WeatherStation("La Paz", 23.7), 271 | new WeatherStation("Lagos", 26.8), 272 | new WeatherStation("Lahore", 24.3), 273 | new WeatherStation("Lake Havasu City", 23.7), 274 | new WeatherStation("Lake Tekapo", 8.7), 275 | new WeatherStation("Las Palmas de Gran Canaria", 21.2), 276 | new WeatherStation("Las Vegas", 20.3), 277 | new WeatherStation("Launceston", 13.1), 278 | new WeatherStation("Lhasa", 7.6), 279 | new WeatherStation("Libreville", 25.9), 280 | new WeatherStation("Lisbon", 17.5), 281 | new WeatherStation("Livingstone", 21.8), 282 | new WeatherStation("Ljubljana", 10.9), 283 | new WeatherStation("Lodwar", 29.3), 284 | new WeatherStation("Lomé", 26.9), 285 | new WeatherStation("London", 11.3), 286 | new WeatherStation("Los Angeles", 18.6), 287 | new WeatherStation("Louisville", 13.9), 288 | new WeatherStation("Luanda", 25.8), 289 | new WeatherStation("Lubumbashi", 20.8), 290 | new WeatherStation("Lusaka", 19.9), 291 | new WeatherStation("Luxembourg City", 9.3), 292 | new WeatherStation("Lviv", 7.8), 293 | new WeatherStation("Lyon", 12.5), 294 | new WeatherStation("Madrid", 15.0), 295 | new WeatherStation("Mahajanga", 26.3), 296 | new WeatherStation("Makassar", 26.7), 297 | new WeatherStation("Makurdi", 26.0), 298 | new WeatherStation("Malabo", 26.3), 299 | new WeatherStation("Malé", 28.0), 300 | new WeatherStation("Managua", 27.3), 301 | new WeatherStation("Manama", 26.5), 302 | new WeatherStation("Mandalay", 28.0), 303 | new WeatherStation("Mango", 28.1), 304 | new WeatherStation("Manila", 28.4), 305 | new WeatherStation("Maputo", 22.8), 306 | new WeatherStation("Marrakesh", 19.6), 307 | new WeatherStation("Marseille", 15.8), 308 | new WeatherStation("Maun", 22.4), 309 | new WeatherStation("Medan", 26.5), 310 | new WeatherStation("Mek'ele", 22.7), 311 | new WeatherStation("Melbourne", 15.1), 312 | new WeatherStation("Memphis", 17.2), 313 | new WeatherStation("Mexicali", 23.1), 314 | new WeatherStation("Mexico City", 17.5), 315 | new WeatherStation("Miami", 24.9), 316 | new WeatherStation("Milan", 13.0), 317 | new WeatherStation("Milwaukee", 8.9), 318 | new WeatherStation("Minneapolis", 7.8), 319 | new WeatherStation("Minsk", 6.7), 320 | new WeatherStation("Mogadishu", 27.1), 321 | new WeatherStation("Mombasa", 26.3), 322 | new WeatherStation("Monaco", 16.4), 323 | new WeatherStation("Moncton", 6.1), 324 | new WeatherStation("Monterrey", 22.3), 325 | new WeatherStation("Montreal", 6.8), 326 | new WeatherStation("Moscow", 5.8), 327 | new WeatherStation("Mumbai", 27.1), 328 | new WeatherStation("Murmansk", 0.6), 329 | new WeatherStation("Muscat", 28.0), 330 | new WeatherStation("Mzuzu", 17.7), 331 | new WeatherStation("N'Djamena", 28.3), 332 | new WeatherStation("Naha", 23.1), 333 | new WeatherStation("Nairobi", 17.8), 334 | new WeatherStation("Nakhon Ratchasima", 27.3), 335 | new WeatherStation("Napier", 14.6), 336 | new WeatherStation("Napoli", 15.9), 337 | new WeatherStation("Nashville", 15.4), 338 | new WeatherStation("Nassau", 24.6), 339 | new WeatherStation("Ndola", 20.3), 340 | new WeatherStation("New Delhi", 25.0), 341 | new WeatherStation("New Orleans", 20.7), 342 | new WeatherStation("New York City", 12.9), 343 | new WeatherStation("Ngaoundéré", 22.0), 344 | new WeatherStation("Niamey", 29.3), 345 | new WeatherStation("Nicosia", 19.7), 346 | new WeatherStation("Niigata", 13.9), 347 | new WeatherStation("Nouadhibou", 21.3), 348 | new WeatherStation("Nouakchott", 25.7), 349 | new WeatherStation("Novosibirsk", 1.7), 350 | new WeatherStation("Nuuk", -1.4), 351 | new WeatherStation("Odesa", 10.7), 352 | new WeatherStation("Odienné", 26.0), 353 | new WeatherStation("Oklahoma City", 15.9), 354 | new WeatherStation("Omaha", 10.6), 355 | new WeatherStation("Oranjestad", 28.1), 356 | new WeatherStation("Oslo", 5.7), 357 | new WeatherStation("Ottawa", 6.6), 358 | new WeatherStation("Ouagadougou", 28.3), 359 | new WeatherStation("Ouahigouya", 28.6), 360 | new WeatherStation("Ouarzazate", 18.9), 361 | new WeatherStation("Oulu", 2.7), 362 | new WeatherStation("Palembang", 27.3), 363 | new WeatherStation("Palermo", 18.5), 364 | new WeatherStation("Palm Springs", 24.5), 365 | new WeatherStation("Palmerston North", 13.2), 366 | new WeatherStation("Panama City", 28.0), 367 | new WeatherStation("Parakou", 26.8), 368 | new WeatherStation("Paris", 12.3), 369 | new WeatherStation("Perth", 18.7), 370 | new WeatherStation("Petropavlovsk-Kamchatsky", 1.9), 371 | new WeatherStation("Philadelphia", 13.2), 372 | new WeatherStation("Phnom Penh", 28.3), 373 | new WeatherStation("Phoenix", 23.9), 374 | new WeatherStation("Pittsburgh", 10.8), 375 | new WeatherStation("Podgorica", 15.3), 376 | new WeatherStation("Pointe-Noire", 26.1), 377 | new WeatherStation("Pontianak", 27.7), 378 | new WeatherStation("Port Moresby", 26.9), 379 | new WeatherStation("Port Sudan", 28.4), 380 | new WeatherStation("Port Vila", 24.3), 381 | new WeatherStation("Port-Gentil", 26.0), 382 | new WeatherStation("Portland (OR)", 12.4), 383 | new WeatherStation("Porto", 15.7), 384 | new WeatherStation("Prague", 8.4), 385 | new WeatherStation("Praia", 24.4), 386 | new WeatherStation("Pretoria", 18.2), 387 | new WeatherStation("Pyongyang", 10.8), 388 | new WeatherStation("Rabat", 17.2), 389 | new WeatherStation("Rangpur", 24.4), 390 | new WeatherStation("Reggane", 28.3), 391 | new WeatherStation("Reykjavík", 4.3), 392 | new WeatherStation("Riga", 6.2), 393 | new WeatherStation("Riyadh", 26.0), 394 | new WeatherStation("Rome", 15.2), 395 | new WeatherStation("Roseau", 26.2), 396 | new WeatherStation("Rostov-on-Don", 9.9), 397 | new WeatherStation("Sacramento", 16.3), 398 | new WeatherStation("Saint Petersburg", 5.8), 399 | new WeatherStation("Saint-Pierre", 5.7), 400 | new WeatherStation("Salt Lake City", 11.6), 401 | new WeatherStation("San Antonio", 20.8), 402 | new WeatherStation("San Diego", 17.8), 403 | new WeatherStation("San Francisco", 14.6), 404 | new WeatherStation("San Jose", 16.4), 405 | new WeatherStation("San José", 22.6), 406 | new WeatherStation("San Juan", 27.2), 407 | new WeatherStation("San Salvador", 23.1), 408 | new WeatherStation("Sana'a", 20.0), 409 | new WeatherStation("Santo Domingo", 25.9), 410 | new WeatherStation("Sapporo", 8.9), 411 | new WeatherStation("Sarajevo", 10.1), 412 | new WeatherStation("Saskatoon", 3.3), 413 | new WeatherStation("Seattle", 11.3), 414 | new WeatherStation("Ségou", 28.0), 415 | new WeatherStation("Seoul", 12.5), 416 | new WeatherStation("Seville", 19.2), 417 | new WeatherStation("Shanghai", 16.7), 418 | new WeatherStation("Singapore", 27.0), 419 | new WeatherStation("Skopje", 12.4), 420 | new WeatherStation("Sochi", 14.2), 421 | new WeatherStation("Sofia", 10.6), 422 | new WeatherStation("Sokoto", 28.0), 423 | new WeatherStation("Split", 16.1), 424 | new WeatherStation("St. John's", 5.0), 425 | new WeatherStation("St. Louis", 13.9), 426 | new WeatherStation("Stockholm", 6.6), 427 | new WeatherStation("Surabaya", 27.1), 428 | new WeatherStation("Suva", 25.6), 429 | new WeatherStation("Suwałki", 7.2), 430 | new WeatherStation("Sydney", 17.7), 431 | new WeatherStation("Tabora", 23.0), 432 | new WeatherStation("Tabriz", 12.6), 433 | new WeatherStation("Taipei", 23.0), 434 | new WeatherStation("Tallinn", 6.4), 435 | new WeatherStation("Tamale", 27.9), 436 | new WeatherStation("Tamanrasset", 21.7), 437 | new WeatherStation("Tampa", 22.9), 438 | new WeatherStation("Tashkent", 14.8), 439 | new WeatherStation("Tauranga", 14.8), 440 | new WeatherStation("Tbilisi", 12.9), 441 | new WeatherStation("Tegucigalpa", 21.7), 442 | new WeatherStation("Tehran", 17.0), 443 | new WeatherStation("Tel Aviv", 20.0), 444 | new WeatherStation("Thessaloniki", 16.0), 445 | new WeatherStation("Thiès", 24.0), 446 | new WeatherStation("Tijuana", 17.8), 447 | new WeatherStation("Timbuktu", 28.0), 448 | new WeatherStation("Tirana", 15.2), 449 | new WeatherStation("Toamasina", 23.4), 450 | new WeatherStation("Tokyo", 15.4), 451 | new WeatherStation("Toliara", 24.1), 452 | new WeatherStation("Toluca", 12.4), 453 | new WeatherStation("Toronto", 9.4), 454 | new WeatherStation("Tripoli", 20.0), 455 | new WeatherStation("Tromsø", 2.9), 456 | new WeatherStation("Tucson", 20.9), 457 | new WeatherStation("Tunis", 18.4), 458 | new WeatherStation("Ulaanbaatar", -0.4), 459 | new WeatherStation("Upington", 20.4), 460 | new WeatherStation("Ürümqi", 7.4), 461 | new WeatherStation("Vaduz", 10.1), 462 | new WeatherStation("Valencia", 18.3), 463 | new WeatherStation("Valletta", 18.8), 464 | new WeatherStation("Vancouver", 10.4), 465 | new WeatherStation("Veracruz", 25.4), 466 | new WeatherStation("Vienna", 10.4), 467 | new WeatherStation("Vientiane", 25.9), 468 | new WeatherStation("Villahermosa", 27.1), 469 | new WeatherStation("Vilnius", 6.0), 470 | new WeatherStation("Virginia Beach", 15.8), 471 | new WeatherStation("Vladivostok", 4.9), 472 | new WeatherStation("Warsaw", 8.5), 473 | new WeatherStation("Washington, D.C.", 14.6), 474 | new WeatherStation("Wau", 27.8), 475 | new WeatherStation("Wellington", 12.9), 476 | new WeatherStation("Whitehorse", -0.1), 477 | new WeatherStation("Wichita", 13.9), 478 | new WeatherStation("Willemstad", 28.0), 479 | new WeatherStation("Winnipeg", 3.0), 480 | new WeatherStation("Wrocław", 9.6), 481 | new WeatherStation("Xi'an", 14.1), 482 | new WeatherStation("Yakutsk", -8.8), 483 | new WeatherStation("Yangon", 27.5), 484 | new WeatherStation("Yaoundé", 23.8), 485 | new WeatherStation("Yellowknife", -4.3), 486 | new WeatherStation("Yerevan", 12.4), 487 | new WeatherStation("Yinchuan", 9.0), 488 | new WeatherStation("Zagreb", 10.7), 489 | new WeatherStation("Zanzibar City", 26.0), 490 | new WeatherStation("Zürich", 9.3)); 491 | 492 | try (BufferedWriter bw = Files.newBufferedWriter(MEASUREMENT_FILE)) { 493 | for (int i = 0; i < size; i++) { 494 | if (i > 0 && i % 50_000_000 == 0) { 495 | System.out.printf("Wrote %,d measurements in %s ms%n", i, System.currentTimeMillis() - start); 496 | } 497 | WeatherStation station = stations.get(ThreadLocalRandom.current().nextInt(stations.size())); 498 | bw.write(station.id()); 499 | bw.write(";" + station.measurement()); 500 | bw.write('\n'); 501 | } 502 | } 503 | System.out.printf("Created file with %,d measurements in %s ms%n", size, System.currentTimeMillis() - start); 504 | } 505 | } 506 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CreateMeasurements2.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.BufferedWriter; 19 | import java.io.File; 20 | import java.io.FileWriter; 21 | import java.io.IOException; 22 | import java.util.Arrays; 23 | import java.util.List; 24 | import java.util.concurrent.ThreadLocalRandom; 25 | 26 | import org.rschwietzke.CheaperCharBuffer; 27 | import org.rschwietzke.FastRandom; 28 | 29 | /** 30 | * Faster version with some data faking instead of a real Gaussian distribution 31 | * Good enough for our purppose I guess. 32 | */ 33 | public class CreateMeasurements2 { 34 | 35 | private static final String FILE = "./measurements2.txt"; 36 | 37 | static class WeatherStation { 38 | final static char[] NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; 39 | 40 | final String id; 41 | final int meanTemperature; 42 | 43 | final char[] firstPart; 44 | final FastRandom r = new FastRandom(ThreadLocalRandom.current().nextLong()); 45 | 46 | WeatherStation(String id, double meanTemperature) { 47 | this.id = id; 48 | this.meanTemperature = (int) meanTemperature; 49 | // make it directly copyable 50 | this.firstPart = (id + ";").toCharArray(); 51 | } 52 | 53 | /** 54 | * We write out data into the buffer to avoid string conversion 55 | * We also no longer use double and gaussian, because for our 56 | * purpose, the fake numbers here will do it. Less 57 | * 58 | * @param buffer the buffer to append to 59 | */ 60 | void measurement(final CheaperCharBuffer buffer) { 61 | // fake -10.9 to +10.9 variance without double operations and rounding 62 | // gives us -10 to +10 63 | int m = meanTemperature + (r.nextInt(21) - 10); 64 | // gives us a decimal digit 0 to 9 as char 65 | char d = NUMBERS[r.nextInt(10)]; 66 | 67 | // just append, only one number has to be converted and we can do 68 | // better... if we watn 69 | buffer.append(firstPart, 0, firstPart.length) 70 | .append(String.valueOf(m)).append('.').append(d) 71 | .append('\n'); 72 | } 73 | } 74 | 75 | public static void main(String[] args) throws Exception { 76 | long start = System.currentTimeMillis(); 77 | 78 | if (args.length != 1) { 79 | System.out.println("Usage: create_measurements.sh "); 80 | System.exit(1); 81 | } 82 | 83 | int size = 0; 84 | try { 85 | size = Integer.parseInt(args[0]); 86 | } 87 | catch (NumberFormatException e) { 88 | System.out.println("Invalid value for "); 89 | System.out.println("Usage: CreateMeasurements "); 90 | System.exit(1); 91 | } 92 | 93 | // @formatter:off 94 | // data from https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature; 95 | // converted using https://wikitable2csv.ggor.de/ 96 | // brought to form using DuckDB: 97 | // D copy ( 98 | // select City, regexp_extract(Year,'(.*)\n.*', 1) as AverageTemp 99 | // from ( 100 | // select City,Year 101 | // from read_csv_auto('List_of_cities_by_average_temperature_1.csv', header = true) 102 | // union 103 | // select City,Year 104 | // from read_csv_auto('List_of_cities_by_average_temperature_2.csv', header = true) 105 | // union 106 | // select City,Year 107 | // from read_csv_auto('List_of_cities_by_average_temperature_3.csv', header = true) 108 | // union 109 | // select City,Year 110 | // from read_csv_auto('List_of_cities_by_average_temperature_4.csv', header = true) 111 | // union 112 | // select City,Year 113 | // from read_csv_auto('List_of_cities_by_average_temperature_5.csv', header = true) 114 | // ) 115 | // ) TO 'output.csv' (HEADER, DELIMITER ','); 116 | // @formatter:on 117 | final List stations = Arrays.asList( 118 | new WeatherStation("Abha", 18.0), 119 | new WeatherStation("Abidjan", 26.0), 120 | new WeatherStation("Abéché", 29.4), 121 | new WeatherStation("Accra", 26.4), 122 | new WeatherStation("Addis Ababa", 16.0), 123 | new WeatherStation("Adelaide", 17.3), 124 | new WeatherStation("Aden", 29.1), 125 | new WeatherStation("Ahvaz", 25.4), 126 | new WeatherStation("Albuquerque", 14.0), 127 | new WeatherStation("Alexandra", 11.0), 128 | new WeatherStation("Alexandria", 20.0), 129 | new WeatherStation("Algiers", 18.2), 130 | new WeatherStation("Alice Springs", 21.0), 131 | new WeatherStation("Almaty", 10.0), 132 | new WeatherStation("Amsterdam", 10.2), 133 | new WeatherStation("Anadyr", -6.9), 134 | new WeatherStation("Anchorage", 2.8), 135 | new WeatherStation("Andorra la Vella", 9.8), 136 | new WeatherStation("Ankara", 12.0), 137 | new WeatherStation("Antananarivo", 17.9), 138 | new WeatherStation("Antsiranana", 25.2), 139 | new WeatherStation("Arkhangelsk", 1.3), 140 | new WeatherStation("Ashgabat", 17.1), 141 | new WeatherStation("Asmara", 15.6), 142 | new WeatherStation("Assab", 30.5), 143 | new WeatherStation("Astana", 3.5), 144 | new WeatherStation("Athens", 19.2), 145 | new WeatherStation("Atlanta", 17.0), 146 | new WeatherStation("Auckland", 15.2), 147 | new WeatherStation("Austin", 20.7), 148 | new WeatherStation("Baghdad", 22.77), 149 | new WeatherStation("Baguio", 19.5), 150 | new WeatherStation("Baku", 15.1), 151 | new WeatherStation("Baltimore", 13.1), 152 | new WeatherStation("Bamako", 27.8), 153 | new WeatherStation("Bangkok", 28.6), 154 | new WeatherStation("Bangui", 26.0), 155 | new WeatherStation("Banjul", 26.0), 156 | new WeatherStation("Barcelona", 18.2), 157 | new WeatherStation("Bata", 25.1), 158 | new WeatherStation("Batumi", 14.0), 159 | new WeatherStation("Beijing", 12.9), 160 | new WeatherStation("Beirut", 20.9), 161 | new WeatherStation("Belgrade", 12.5), 162 | new WeatherStation("Belize City", 26.7), 163 | new WeatherStation("Benghazi", 19.9), 164 | new WeatherStation("Bergen", 7.7), 165 | new WeatherStation("Berlin", 10.3), 166 | new WeatherStation("Bilbao", 14.7), 167 | new WeatherStation("Birao", 26.5), 168 | new WeatherStation("Bishkek", 11.3), 169 | new WeatherStation("Bissau", 27.0), 170 | new WeatherStation("Blantyre", 22.2), 171 | new WeatherStation("Bloemfontein", 15.6), 172 | new WeatherStation("Boise", 11.4), 173 | new WeatherStation("Bordeaux", 14.2), 174 | new WeatherStation("Bosaso", 30.0), 175 | new WeatherStation("Boston", 10.9), 176 | new WeatherStation("Bouaké", 26.0), 177 | new WeatherStation("Bratislava", 10.5), 178 | new WeatherStation("Brazzaville", 25.0), 179 | new WeatherStation("Bridgetown", 27.0), 180 | new WeatherStation("Brisbane", 21.4), 181 | new WeatherStation("Brussels", 10.5), 182 | new WeatherStation("Bucharest", 10.8), 183 | new WeatherStation("Budapest", 11.3), 184 | new WeatherStation("Bujumbura", 23.8), 185 | new WeatherStation("Bulawayo", 18.9), 186 | new WeatherStation("Burnie", 13.1), 187 | new WeatherStation("Busan", 15.0), 188 | new WeatherStation("Cabo San Lucas", 23.9), 189 | new WeatherStation("Cairns", 25.0), 190 | new WeatherStation("Cairo", 21.4), 191 | new WeatherStation("Calgary", 4.4), 192 | new WeatherStation("Canberra", 13.1), 193 | new WeatherStation("Cape Town", 16.2), 194 | new WeatherStation("Changsha", 17.4), 195 | new WeatherStation("Charlotte", 16.1), 196 | new WeatherStation("Chiang Mai", 25.8), 197 | new WeatherStation("Chicago", 9.8), 198 | new WeatherStation("Chihuahua", 18.6), 199 | new WeatherStation("Chișinău", 10.2), 200 | new WeatherStation("Chittagong", 25.9), 201 | new WeatherStation("Chongqing", 18.6), 202 | new WeatherStation("Christchurch", 12.2), 203 | new WeatherStation("City of San Marino", 11.8), 204 | new WeatherStation("Colombo", 27.4), 205 | new WeatherStation("Columbus", 11.7), 206 | new WeatherStation("Conakry", 26.4), 207 | new WeatherStation("Copenhagen", 9.1), 208 | new WeatherStation("Cotonou", 27.2), 209 | new WeatherStation("Cracow", 9.3), 210 | new WeatherStation("Da Lat", 17.9), 211 | new WeatherStation("Da Nang", 25.8), 212 | new WeatherStation("Dakar", 24.0), 213 | new WeatherStation("Dallas", 19.0), 214 | new WeatherStation("Damascus", 17.0), 215 | new WeatherStation("Dampier", 26.4), 216 | new WeatherStation("Dar es Salaam", 25.8), 217 | new WeatherStation("Darwin", 27.6), 218 | new WeatherStation("Denpasar", 23.7), 219 | new WeatherStation("Denver", 10.4), 220 | new WeatherStation("Detroit", 10.0), 221 | new WeatherStation("Dhaka", 25.9), 222 | new WeatherStation("Dikson", -11.1), 223 | new WeatherStation("Dili", 26.6), 224 | new WeatherStation("Djibouti", 29.9), 225 | new WeatherStation("Dodoma", 22.7), 226 | new WeatherStation("Dolisie", 24.0), 227 | new WeatherStation("Douala", 26.7), 228 | new WeatherStation("Dubai", 26.9), 229 | new WeatherStation("Dublin", 9.8), 230 | new WeatherStation("Dunedin", 11.1), 231 | new WeatherStation("Durban", 20.6), 232 | new WeatherStation("Dushanbe", 14.7), 233 | new WeatherStation("Edinburgh", 9.3), 234 | new WeatherStation("Edmonton", 4.2), 235 | new WeatherStation("El Paso", 18.1), 236 | new WeatherStation("Entebbe", 21.0), 237 | new WeatherStation("Erbil", 19.5), 238 | new WeatherStation("Erzurum", 5.1), 239 | new WeatherStation("Fairbanks", -2.3), 240 | new WeatherStation("Fianarantsoa", 17.9), 241 | new WeatherStation("Flores, Petén", 26.4), 242 | new WeatherStation("Frankfurt", 10.6), 243 | new WeatherStation("Fresno", 17.9), 244 | new WeatherStation("Fukuoka", 17.0), 245 | new WeatherStation("Gabès", 19.5), 246 | new WeatherStation("Gaborone", 21.0), 247 | new WeatherStation("Gagnoa", 26.0), 248 | new WeatherStation("Gangtok", 15.2), 249 | new WeatherStation("Garissa", 29.3), 250 | new WeatherStation("Garoua", 28.3), 251 | new WeatherStation("George Town", 27.9), 252 | new WeatherStation("Ghanzi", 21.4), 253 | new WeatherStation("Gjoa Haven", -14.4), 254 | new WeatherStation("Guadalajara", 20.9), 255 | new WeatherStation("Guangzhou", 22.4), 256 | new WeatherStation("Guatemala City", 20.4), 257 | new WeatherStation("Halifax", 7.5), 258 | new WeatherStation("Hamburg", 9.7), 259 | new WeatherStation("Hamilton", 13.8), 260 | new WeatherStation("Hanga Roa", 20.5), 261 | new WeatherStation("Hanoi", 23.6), 262 | new WeatherStation("Harare", 18.4), 263 | new WeatherStation("Harbin", 5.0), 264 | new WeatherStation("Hargeisa", 21.7), 265 | new WeatherStation("Hat Yai", 27.0), 266 | new WeatherStation("Havana", 25.2), 267 | new WeatherStation("Helsinki", 5.9), 268 | new WeatherStation("Heraklion", 18.9), 269 | new WeatherStation("Hiroshima", 16.3), 270 | new WeatherStation("Ho Chi Minh City", 27.4), 271 | new WeatherStation("Hobart", 12.7), 272 | new WeatherStation("Hong Kong", 23.3), 273 | new WeatherStation("Honiara", 26.5), 274 | new WeatherStation("Honolulu", 25.4), 275 | new WeatherStation("Houston", 20.8), 276 | new WeatherStation("Ifrane", 11.4), 277 | new WeatherStation("Indianapolis", 11.8), 278 | new WeatherStation("Iqaluit", -9.3), 279 | new WeatherStation("Irkutsk", 1.0), 280 | new WeatherStation("Istanbul", 13.9), 281 | new WeatherStation("İzmir", 17.9), 282 | new WeatherStation("Jacksonville", 20.3), 283 | new WeatherStation("Jakarta", 26.7), 284 | new WeatherStation("Jayapura", 27.0), 285 | new WeatherStation("Jerusalem", 18.3), 286 | new WeatherStation("Johannesburg", 15.5), 287 | new WeatherStation("Jos", 22.8), 288 | new WeatherStation("Juba", 27.8), 289 | new WeatherStation("Kabul", 12.1), 290 | new WeatherStation("Kampala", 20.0), 291 | new WeatherStation("Kandi", 27.7), 292 | new WeatherStation("Kankan", 26.5), 293 | new WeatherStation("Kano", 26.4), 294 | new WeatherStation("Kansas City", 12.5), 295 | new WeatherStation("Karachi", 26.0), 296 | new WeatherStation("Karonga", 24.4), 297 | new WeatherStation("Kathmandu", 18.3), 298 | new WeatherStation("Khartoum", 29.9), 299 | new WeatherStation("Kingston", 27.4), 300 | new WeatherStation("Kinshasa", 25.3), 301 | new WeatherStation("Kolkata", 26.7), 302 | new WeatherStation("Kuala Lumpur", 27.3), 303 | new WeatherStation("Kumasi", 26.0), 304 | new WeatherStation("Kunming", 15.7), 305 | new WeatherStation("Kuopio", 3.4), 306 | new WeatherStation("Kuwait City", 25.7), 307 | new WeatherStation("Kyiv", 8.4), 308 | new WeatherStation("Kyoto", 15.8), 309 | new WeatherStation("La Ceiba", 26.2), 310 | new WeatherStation("La Paz", 23.7), 311 | new WeatherStation("Lagos", 26.8), 312 | new WeatherStation("Lahore", 24.3), 313 | new WeatherStation("Lake Havasu City", 23.7), 314 | new WeatherStation("Lake Tekapo", 8.7), 315 | new WeatherStation("Las Palmas de Gran Canaria", 21.2), 316 | new WeatherStation("Las Vegas", 20.3), 317 | new WeatherStation("Launceston", 13.1), 318 | new WeatherStation("Lhasa", 7.6), 319 | new WeatherStation("Libreville", 25.9), 320 | new WeatherStation("Lisbon", 17.5), 321 | new WeatherStation("Livingstone", 21.8), 322 | new WeatherStation("Ljubljana", 10.9), 323 | new WeatherStation("Lodwar", 29.3), 324 | new WeatherStation("Lomé", 26.9), 325 | new WeatherStation("London", 11.3), 326 | new WeatherStation("Los Angeles", 18.6), 327 | new WeatherStation("Louisville", 13.9), 328 | new WeatherStation("Luanda", 25.8), 329 | new WeatherStation("Lubumbashi", 20.8), 330 | new WeatherStation("Lusaka", 19.9), 331 | new WeatherStation("Luxembourg City", 9.3), 332 | new WeatherStation("Lviv", 7.8), 333 | new WeatherStation("Lyon", 12.5), 334 | new WeatherStation("Madrid", 15.0), 335 | new WeatherStation("Mahajanga", 26.3), 336 | new WeatherStation("Makassar", 26.7), 337 | new WeatherStation("Makurdi", 26.0), 338 | new WeatherStation("Malabo", 26.3), 339 | new WeatherStation("Malé", 28.0), 340 | new WeatherStation("Managua", 27.3), 341 | new WeatherStation("Manama", 26.5), 342 | new WeatherStation("Mandalay", 28.0), 343 | new WeatherStation("Mango", 28.1), 344 | new WeatherStation("Manila", 28.4), 345 | new WeatherStation("Maputo", 22.8), 346 | new WeatherStation("Marrakesh", 19.6), 347 | new WeatherStation("Marseille", 15.8), 348 | new WeatherStation("Maun", 22.4), 349 | new WeatherStation("Medan", 26.5), 350 | new WeatherStation("Mek'ele", 22.7), 351 | new WeatherStation("Melbourne", 15.1), 352 | new WeatherStation("Memphis", 17.2), 353 | new WeatherStation("Mexicali", 23.1), 354 | new WeatherStation("Mexico City", 17.5), 355 | new WeatherStation("Miami", 24.9), 356 | new WeatherStation("Milan", 13.0), 357 | new WeatherStation("Milwaukee", 8.9), 358 | new WeatherStation("Minneapolis", 7.8), 359 | new WeatherStation("Minsk", 6.7), 360 | new WeatherStation("Mogadishu", 27.1), 361 | new WeatherStation("Mombasa", 26.3), 362 | new WeatherStation("Monaco", 16.4), 363 | new WeatherStation("Moncton", 6.1), 364 | new WeatherStation("Monterrey", 22.3), 365 | new WeatherStation("Montreal", 6.8), 366 | new WeatherStation("Moscow", 5.8), 367 | new WeatherStation("Mumbai", 27.1), 368 | new WeatherStation("Murmansk", 0.6), 369 | new WeatherStation("Muscat", 28.0), 370 | new WeatherStation("Mzuzu", 17.7), 371 | new WeatherStation("N'Djamena", 28.3), 372 | new WeatherStation("Naha", 23.1), 373 | new WeatherStation("Nairobi", 17.8), 374 | new WeatherStation("Nakhon Ratchasima", 27.3), 375 | new WeatherStation("Napier", 14.6), 376 | new WeatherStation("Napoli", 15.9), 377 | new WeatherStation("Nashville", 15.4), 378 | new WeatherStation("Nassau", 24.6), 379 | new WeatherStation("Ndola", 20.3), 380 | new WeatherStation("New Delhi", 25.0), 381 | new WeatherStation("New Orleans", 20.7), 382 | new WeatherStation("New York City", 12.9), 383 | new WeatherStation("Ngaoundéré", 22.0), 384 | new WeatherStation("Niamey", 29.3), 385 | new WeatherStation("Nicosia", 19.7), 386 | new WeatherStation("Niigata", 13.9), 387 | new WeatherStation("Nouadhibou", 21.3), 388 | new WeatherStation("Nouakchott", 25.7), 389 | new WeatherStation("Novosibirsk", 1.7), 390 | new WeatherStation("Nuuk", -1.4), 391 | new WeatherStation("Odesa", 10.7), 392 | new WeatherStation("Odienné", 26.0), 393 | new WeatherStation("Oklahoma City", 15.9), 394 | new WeatherStation("Omaha", 10.6), 395 | new WeatherStation("Oranjestad", 28.1), 396 | new WeatherStation("Oslo", 5.7), 397 | new WeatherStation("Ottawa", 6.6), 398 | new WeatherStation("Ouagadougou", 28.3), 399 | new WeatherStation("Ouahigouya", 28.6), 400 | new WeatherStation("Ouarzazate", 18.9), 401 | new WeatherStation("Oulu", 2.7), 402 | new WeatherStation("Palembang", 27.3), 403 | new WeatherStation("Palermo", 18.5), 404 | new WeatherStation("Palm Springs", 24.5), 405 | new WeatherStation("Palmerston North", 13.2), 406 | new WeatherStation("Panama City", 28.0), 407 | new WeatherStation("Parakou", 26.8), 408 | new WeatherStation("Paris", 12.3), 409 | new WeatherStation("Perth", 18.7), 410 | new WeatherStation("Petropavlovsk-Kamchatsky", 1.9), 411 | new WeatherStation("Philadelphia", 13.2), 412 | new WeatherStation("Phnom Penh", 28.3), 413 | new WeatherStation("Phoenix", 23.9), 414 | new WeatherStation("Pittsburgh", 10.8), 415 | new WeatherStation("Podgorica", 15.3), 416 | new WeatherStation("Pointe-Noire", 26.1), 417 | new WeatherStation("Pontianak", 27.7), 418 | new WeatherStation("Port Moresby", 26.9), 419 | new WeatherStation("Port Sudan", 28.4), 420 | new WeatherStation("Port Vila", 24.3), 421 | new WeatherStation("Port-Gentil", 26.0), 422 | new WeatherStation("Portland (OR)", 12.4), 423 | new WeatherStation("Porto", 15.7), 424 | new WeatherStation("Prague", 8.4), 425 | new WeatherStation("Praia", 24.4), 426 | new WeatherStation("Pretoria", 18.2), 427 | new WeatherStation("Pyongyang", 10.8), 428 | new WeatherStation("Rabat", 17.2), 429 | new WeatherStation("Rangpur", 24.4), 430 | new WeatherStation("Reggane", 28.3), 431 | new WeatherStation("Reykjavík", 4.3), 432 | new WeatherStation("Riga", 6.2), 433 | new WeatherStation("Riyadh", 26.0), 434 | new WeatherStation("Rome", 15.2), 435 | new WeatherStation("Roseau", 26.2), 436 | new WeatherStation("Rostov-on-Don", 9.9), 437 | new WeatherStation("Sacramento", 16.3), 438 | new WeatherStation("Saint Petersburg", 5.8), 439 | new WeatherStation("Saint-Pierre", 5.7), 440 | new WeatherStation("Salt Lake City", 11.6), 441 | new WeatherStation("San Antonio", 20.8), 442 | new WeatherStation("San Diego", 17.8), 443 | new WeatherStation("San Francisco", 14.6), 444 | new WeatherStation("San Jose", 16.4), 445 | new WeatherStation("San José", 22.6), 446 | new WeatherStation("San Juan", 27.2), 447 | new WeatherStation("San Salvador", 23.1), 448 | new WeatherStation("Sana'a", 20.0), 449 | new WeatherStation("Santo Domingo", 25.9), 450 | new WeatherStation("Sapporo", 8.9), 451 | new WeatherStation("Sarajevo", 10.1), 452 | new WeatherStation("Saskatoon", 3.3), 453 | new WeatherStation("Seattle", 11.3), 454 | new WeatherStation("Ségou", 28.0), 455 | new WeatherStation("Seoul", 12.5), 456 | new WeatherStation("Seville", 19.2), 457 | new WeatherStation("Shanghai", 16.7), 458 | new WeatherStation("Singapore", 27.0), 459 | new WeatherStation("Skopje", 12.4), 460 | new WeatherStation("Sochi", 14.2), 461 | new WeatherStation("Sofia", 10.6), 462 | new WeatherStation("Sokoto", 28.0), 463 | new WeatherStation("Split", 16.1), 464 | new WeatherStation("St. John's", 5.0), 465 | new WeatherStation("St. Louis", 13.9), 466 | new WeatherStation("Stockholm", 6.6), 467 | new WeatherStation("Surabaya", 27.1), 468 | new WeatherStation("Suva", 25.6), 469 | new WeatherStation("Suwałki", 7.2), 470 | new WeatherStation("Sydney", 17.7), 471 | new WeatherStation("Tabora", 23.0), 472 | new WeatherStation("Tabriz", 12.6), 473 | new WeatherStation("Taipei", 23.0), 474 | new WeatherStation("Tallinn", 6.4), 475 | new WeatherStation("Tamale", 27.9), 476 | new WeatherStation("Tamanrasset", 21.7), 477 | new WeatherStation("Tampa", 22.9), 478 | new WeatherStation("Tashkent", 14.8), 479 | new WeatherStation("Tauranga", 14.8), 480 | new WeatherStation("Tbilisi", 12.9), 481 | new WeatherStation("Tegucigalpa", 21.7), 482 | new WeatherStation("Tehran", 17.0), 483 | new WeatherStation("Tel Aviv", 20.0), 484 | new WeatherStation("Thessaloniki", 16.0), 485 | new WeatherStation("Thiès", 24.0), 486 | new WeatherStation("Tijuana", 17.8), 487 | new WeatherStation("Timbuktu", 28.0), 488 | new WeatherStation("Tirana", 15.2), 489 | new WeatherStation("Toamasina", 23.4), 490 | new WeatherStation("Tokyo", 15.4), 491 | new WeatherStation("Toliara", 24.1), 492 | new WeatherStation("Toluca", 12.4), 493 | new WeatherStation("Toronto", 9.4), 494 | new WeatherStation("Tripoli", 20.0), 495 | new WeatherStation("Tromsø", 2.9), 496 | new WeatherStation("Tucson", 20.9), 497 | new WeatherStation("Tunis", 18.4), 498 | new WeatherStation("Ulaanbaatar", -0.4), 499 | new WeatherStation("Upington", 20.4), 500 | new WeatherStation("Ürümqi", 7.4), 501 | new WeatherStation("Vaduz", 10.1), 502 | new WeatherStation("Valencia", 18.3), 503 | new WeatherStation("Valletta", 18.8), 504 | new WeatherStation("Vancouver", 10.4), 505 | new WeatherStation("Veracruz", 25.4), 506 | new WeatherStation("Vienna", 10.4), 507 | new WeatherStation("Vientiane", 25.9), 508 | new WeatherStation("Villahermosa", 27.1), 509 | new WeatherStation("Vilnius", 6.0), 510 | new WeatherStation("Virginia Beach", 15.8), 511 | new WeatherStation("Vladivostok", 4.9), 512 | new WeatherStation("Warsaw", 8.5), 513 | new WeatherStation("Washington, D.C.", 14.6), 514 | new WeatherStation("Wau", 27.8), 515 | new WeatherStation("Wellington", 12.9), 516 | new WeatherStation("Whitehorse", -0.1), 517 | new WeatherStation("Wichita", 13.9), 518 | new WeatherStation("Willemstad", 28.0), 519 | new WeatherStation("Winnipeg", 3.0), 520 | new WeatherStation("Wrocław", 9.6), 521 | new WeatherStation("Xi'an", 14.1), 522 | new WeatherStation("Yakutsk", -8.8), 523 | new WeatherStation("Yangon", 27.5), 524 | new WeatherStation("Yaoundé", 23.8), 525 | new WeatherStation("Yellowknife", -4.3), 526 | new WeatherStation("Yerevan", 12.4), 527 | new WeatherStation("Yinchuan", 9.0), 528 | new WeatherStation("Zagreb", 10.7), 529 | new WeatherStation("Zanzibar City", 26.0), 530 | new WeatherStation("Zürich", 9.3)); 531 | 532 | File file = new File(FILE); 533 | 534 | // break the loop and unroll it manually 535 | int strideSize = 50_000_000; 536 | int outer = size / strideSize; 537 | int remainder = size - (outer * strideSize); 538 | 539 | try (final BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { 540 | for (int i = 0; i < outer; i++) { 541 | produce(bw, stations, strideSize); 542 | 543 | // we avoid a modulo if here and use the stride size to print and update 544 | System.out.println("Wrote %,d measurements in %s ms".formatted((i + 1) * strideSize, System.currentTimeMillis() - start)); 545 | } 546 | // there might be a rest 547 | produce(bw, stations, remainder); 548 | 549 | // write fully before taking measurements 550 | bw.flush(); 551 | System.out.println("Created file with %,d measurements in %s ms".formatted(size, System.currentTimeMillis() - start)); 552 | } 553 | } 554 | 555 | private static void produce(BufferedWriter bw, List stations, int count) throws IOException { 556 | final int stationCount = stations.size(); 557 | final int rest = count % 8; 558 | 559 | // use a fast ranodm impl without atomics to be able to utilize the cpu better 560 | // and avoid sideeffects, FastRandom is very fake random and does not have a state 561 | final FastRandom r1 = new FastRandom(ThreadLocalRandom.current().nextLong()); 562 | final FastRandom r2 = new FastRandom(ThreadLocalRandom.current().nextLong()); 563 | final FastRandom r3 = new FastRandom(ThreadLocalRandom.current().nextLong()); 564 | final FastRandom r4 = new FastRandom(ThreadLocalRandom.current().nextLong()); 565 | 566 | // write to a fix buffer first, don't create strings ever 567 | // reuse buffer 568 | final CheaperCharBuffer sb = new CheaperCharBuffer(200); 569 | 570 | // manual loop unroll for less jumps 571 | for (int i = 0; i < count; i = i + 8) { 572 | { 573 | // try to fill teh cpu pipeline as much as possible with 574 | // independent operations 575 | int s1 = r1.nextInt(stationCount); 576 | int s2 = r2.nextInt(stationCount); 577 | int s3 = r3.nextInt(stationCount); 578 | int s4 = r4.nextInt(stationCount); 579 | // get us the ojects one after the other to have the array 580 | // in our L1 cache and not push it out with other data 581 | var w1 = stations.get(s1); 582 | var w2 = stations.get(s2); 583 | var w3 = stations.get(s3); 584 | var w4 = stations.get(s4); 585 | // write our data to our buffer 586 | w1.measurement(sb); 587 | w2.measurement(sb); 588 | w3.measurement(sb); 589 | w4.measurement(sb); 590 | } 591 | { 592 | int s1 = r1.nextInt(stationCount); 593 | int s2 = r2.nextInt(stationCount); 594 | int s3 = r3.nextInt(stationCount); 595 | int s4 = r4.nextInt(stationCount); 596 | var w1 = stations.get(s1); 597 | var w2 = stations.get(s2); 598 | var w3 = stations.get(s3); 599 | var w4 = stations.get(s4); 600 | w1.measurement(sb); 601 | w2.measurement(sb); 602 | w3.measurement(sb); 603 | w4.measurement(sb); 604 | } 605 | // write the buffer directly, no intermediate string copy 606 | bw.write(sb.data_, 0, sb.length_); 607 | 608 | // reuse buffer, reset only, no cleaning 609 | sb.clear(); 610 | } 611 | 612 | // there might be a rest to write 613 | for (int i = 0; i < rest; i++) { 614 | sb.clear(); 615 | 616 | int s = r1.nextInt(stationCount); 617 | var w = stations.get(s); 618 | w.measurement(sb); 619 | 620 | bw.write(sb.data_, 0, sb.length_); 621 | } 622 | } 623 | } 624 | -------------------------------------------------------------------------------- /src/main/java/dev/morling/onebrc/CreateMeasurements3.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package dev.morling.onebrc; 17 | 18 | import java.io.BufferedReader; 19 | import java.io.BufferedWriter; 20 | import java.io.FileReader; 21 | import java.io.FileWriter; 22 | import java.io.IOException; 23 | import java.io.StringReader; 24 | import java.nio.charset.StandardCharsets; 25 | import java.util.ArrayList; 26 | import java.util.HashSet; 27 | import java.util.concurrent.ThreadLocalRandom; 28 | 29 | public class CreateMeasurements3 { 30 | 31 | public static final int MAX_NAME_LEN = 100; 32 | public static final int KEYSET_SIZE = 10_000; 33 | 34 | public static void main(String[] args) throws Exception { 35 | if (args.length != 1) { 36 | System.out.println("Usage: create_measurements3.sh "); 37 | System.exit(1); 38 | } 39 | int size = 0; 40 | try { 41 | size = Integer.parseInt(args[0]); 42 | } 43 | catch (NumberFormatException e) { 44 | System.out.println("Invalid value for "); 45 | System.out.println("Usage: create_measurements3.sh "); 46 | System.exit(1); 47 | } 48 | final var weatherStations = generateWeatherStations(); 49 | final var start = System.currentTimeMillis(); 50 | final var rnd = ThreadLocalRandom.current(); 51 | try (var out = new BufferedWriter(new FileWriter("measurements.txt"))) { 52 | for (int i = 1; i <= size; i++) { 53 | var station = weatherStations.get(rnd.nextInt(KEYSET_SIZE)); 54 | double temp = rnd.nextGaussian(station.avgTemp, 7.0); 55 | out.write(station.name); 56 | out.write(';'); 57 | out.write(Double.toString(Math.round(temp * 10.0) / 10.0)); 58 | out.newLine(); 59 | if (i % 50_000_000 == 0) { 60 | System.out.printf("Wrote %,d measurements in %,d ms%n", i, System.currentTimeMillis() - start); 61 | } 62 | } 63 | } 64 | } 65 | 66 | record WeatherStation(String name, float avgTemp) { 67 | } 68 | 69 | private static ArrayList generateWeatherStations() throws Exception { 70 | // Use a public list of city names and concatenate them all into a long string, 71 | // which we'll use as a "source of city name randomness" 72 | var bigName = new StringBuilder(1 << 20); 73 | try (var rows = new BufferedReader(new FileReader("data/weather_stations.csv"));) { 74 | skipComments(rows); 75 | while (true) { 76 | var row = rows.readLine(); 77 | if (row == null) { 78 | break; 79 | } 80 | bigName.append(row, 0, row.indexOf(';')); 81 | } 82 | } 83 | final var weatherStations = new ArrayList(); 84 | final var names = new HashSet(); 85 | var minLen = Integer.MAX_VALUE; 86 | var maxLen = Integer.MIN_VALUE; 87 | try (var rows = new BufferedReader(new FileReader("data/weather_stations.csv"))) { 88 | skipComments(rows); 89 | final var nameSource = new StringReader(bigName.toString()); 90 | final var buf = new char[MAX_NAME_LEN]; 91 | final var rnd = ThreadLocalRandom.current(); 92 | final double yOffset = 4; 93 | final double factor = 2500; 94 | final double xOffset = 0.372; 95 | final double power = 7; 96 | for (int i = 0; i < KEYSET_SIZE; i++) { 97 | var row = rows.readLine(); 98 | if (row == null) { 99 | break; 100 | } 101 | // Use a 7th-order curve to simulate the name length distribution. 102 | // It gives us mostly short names, but with large outliers. 103 | var nameLen = (int) (yOffset + factor * Math.pow(rnd.nextDouble() - xOffset, power)); 104 | var count = nameSource.read(buf, 0, nameLen); 105 | if (count == -1) { 106 | throw new Exception("Name source exhausted"); 107 | } 108 | var nameBuf = new StringBuilder(nameLen); 109 | nameBuf.append(buf, 0, nameLen); 110 | if (Character.isWhitespace(nameBuf.charAt(0))) { 111 | nameBuf.setCharAt(0, readNonSpace(nameSource)); 112 | } 113 | if (Character.isWhitespace(nameBuf.charAt(nameBuf.length() - 1))) { 114 | nameBuf.setCharAt(nameBuf.length() - 1, readNonSpace(nameSource)); 115 | } 116 | var name = nameBuf.toString(); 117 | while (names.contains(name)) { 118 | nameBuf.setCharAt(rnd.nextInt(nameBuf.length()), readNonSpace(nameSource)); 119 | name = nameBuf.toString(); 120 | } 121 | int actualLen; 122 | while (true) { 123 | actualLen = name.getBytes(StandardCharsets.UTF_8).length; 124 | if (actualLen <= 100) { 125 | break; 126 | } 127 | nameBuf.deleteCharAt(nameBuf.length() - 1); 128 | if (Character.isWhitespace(nameBuf.charAt(nameBuf.length() - 1))) { 129 | nameBuf.setCharAt(nameBuf.length() - 1, readNonSpace(nameSource)); 130 | } 131 | name = nameBuf.toString(); 132 | } 133 | if (name.indexOf(';') != -1) { 134 | throw new Exception("Station name contains a semicolon!"); 135 | } 136 | names.add(name); 137 | minLen = Integer.min(minLen, actualLen); 138 | maxLen = Integer.max(maxLen, actualLen); 139 | var lat = Float.parseFloat(row.substring(row.indexOf(';') + 1)); 140 | // Guesstimate mean temperature using cosine of latitude 141 | var avgTemp = (float) (30 * Math.cos(Math.toRadians(lat))) - 10; 142 | weatherStations.add(new WeatherStation(name, avgTemp)); 143 | } 144 | } 145 | System.out.format("Generated %,d station names with length from %,d to %,d%n", KEYSET_SIZE, minLen, maxLen); 146 | return weatherStations; 147 | } 148 | 149 | private static void skipComments(BufferedReader rows) throws IOException { 150 | while (rows.readLine().startsWith("#")) { 151 | } 152 | } 153 | 154 | private static char readNonSpace(StringReader nameSource) throws IOException { 155 | while (true) { 156 | var n = nameSource.read(); 157 | if (n == -1) { 158 | throw new IOException("Name source exhausted"); 159 | } 160 | var ch = (char) n; 161 | if (ch != ' ') { 162 | return ch; 163 | } 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/main/java/org/rschwietzke/CheaperCharBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.rschwietzke; 17 | 18 | import java.util.Arrays; 19 | 20 | /** 21 | *

This class is meant to replaces the old {@link CheaperCharBuffer} in all areas 22 | * where performance and memory-efficency is key. XMLString compatibility 23 | * remains in place in case one has used that in their own code. 24 | * 25 | *

This buffer is mutable and when you use it, make sure you work with 26 | * it responsibly. In many cases, we will reuse the buffer to avoid fresh 27 | * memory allocations, hence you have to pay attention to its usage pattern. 28 | * It is not meant to be a general String replacement. 29 | * 30 | *

This class avoids many of the standard runtime checks that will result 31 | * in a runtime or array exception anyway. Why check twice and raise the 32 | * same exception? 33 | * 34 | * @author René Schwietzke 35 | * @since 3.10.0 36 | */ 37 | public class CheaperCharBuffer implements CharSequence { 38 | // our data, can grow - that is not safe and has be altered from the original code 39 | // to allow speed 40 | public char[] data_; 41 | 42 | // the current size of the string data 43 | public int length_; 44 | 45 | // the current size of the string data 46 | private final int growBy_; 47 | 48 | // how much do we grow if needed, half a cache line 49 | public static final int CAPACITY_GROWTH = 64 / 2; 50 | 51 | // what is our start size? 52 | // a cache line is 64 byte mostly, the overhead is mostly 24 bytes 53 | // a char is two bytes, let's use one cache lines 54 | public static final int INITIAL_CAPACITY = (64 - 24) / 2; 55 | 56 | // static empty version; DON'T MODIFY IT 57 | public static final CheaperCharBuffer EMPTY = new CheaperCharBuffer(0); 58 | 59 | // the � character 60 | private static final char REPLACEMENT_CHARACTER = '\uFFFD'; 61 | 62 | /** 63 | * Constructs an XMLCharBuffer with a default size. 64 | */ 65 | public CheaperCharBuffer() { 66 | this.data_ = new char[INITIAL_CAPACITY]; 67 | this.length_ = 0; 68 | this.growBy_ = CAPACITY_GROWTH; 69 | } 70 | 71 | /** 72 | * Constructs an XMLCharBuffer with a desired size. 73 | * 74 | * @param startSize the size of the buffer to start with 75 | */ 76 | public CheaperCharBuffer(final int startSize) { 77 | this(startSize, CAPACITY_GROWTH); 78 | } 79 | 80 | /** 81 | * Constructs an XMLCharBuffer with a desired size. 82 | * 83 | * @param startSize the size of the buffer to start with 84 | * @param growBy by how much do we want to grow when needed 85 | */ 86 | public CheaperCharBuffer(final int startSize, final int growBy) { 87 | this.data_ = new char[startSize]; 88 | this.length_ = 0; 89 | this.growBy_ = Math.max(1, growBy); 90 | } 91 | 92 | /** 93 | * Constructs an XMLCharBuffer from another buffer. Copies the data 94 | * over. The new buffer capacity matches the length of the source. 95 | * 96 | * @param src the source buffer to copy from 97 | */ 98 | public CheaperCharBuffer(final CheaperCharBuffer src) { 99 | this(src, 0); 100 | } 101 | 102 | /** 103 | * Constructs an XMLCharBuffer from another buffer. Copies the data 104 | * over. You can add more capacity on top of the source length. If 105 | * you specify 0, the capacity will match the src length. 106 | * 107 | * @param src the source buffer to copy from 108 | * @param addCapacity how much capacity to add to origin length 109 | */ 110 | public CheaperCharBuffer(final CheaperCharBuffer src, final int addCapacity) { 111 | this.data_ = Arrays.copyOf(src.data_, src.length_ + Math.max(0, addCapacity)); 112 | this.length_ = src.length(); 113 | this.growBy_ = Math.max(1, CAPACITY_GROWTH); 114 | } 115 | 116 | /** 117 | * Constructs an XMLCharBuffer from a string. To avoid 118 | * too much allocation, we just take the string array as is and 119 | * don't allocate extra space in the first place. 120 | * 121 | * @param src the string to copy from 122 | */ 123 | public CheaperCharBuffer(final String src) { 124 | this.data_ = src.toCharArray(); 125 | this.length_ = src.length(); 126 | this.growBy_ = CAPACITY_GROWTH; 127 | } 128 | 129 | /** 130 | * Constructs an XMLString structure preset with the specified values. 131 | * There will not be any room to grow, if you need that, construct an 132 | * empty one and append. 133 | * 134 | *

There are not range checks performed. Make sure your data is correct. 135 | * 136 | * @param ch The character array, must not be null 137 | * @param offset The offset into the character array. 138 | * @param length The length of characters from the offset. 139 | */ 140 | public CheaperCharBuffer(final char[] ch, final int offset, final int length) { 141 | // just as big as we need it 142 | this(length); 143 | append(ch, offset, length); 144 | } 145 | 146 | /** 147 | * Check capacity and grow if needed automatically 148 | * 149 | * @param minimumCapacity how much space do we need at least 150 | */ 151 | private void ensureCapacity(final int minimumCapacity) { 152 | if (minimumCapacity > this.data_.length) { 153 | final int newSize = Math.max(minimumCapacity + this.growBy_, (this.data_.length << 1) + 2); 154 | this.data_ = Arrays.copyOf(this.data_, newSize); 155 | } 156 | } 157 | 158 | /** 159 | * Returns the current max capacity without growth. Does not 160 | * indicate how much capacity is already in use. Use {@link #length()} 161 | * for that. 162 | * 163 | * @return the current capacity, not taken any usage into account 164 | */ 165 | public int capacity() { 166 | return this.data_.length; 167 | } 168 | 169 | /** 170 | * Appends a single character to the buffer. 171 | * 172 | * @param c the character to append 173 | * @return this instance 174 | */ 175 | public CheaperCharBuffer append(final char c) { 176 | final int oldLength = this.length_++; 177 | 178 | // ensureCapacity is not inlined by the compiler, so put that here for the most 179 | // called method of all appends. Duplicate code, but for a reason. 180 | if (oldLength == this.data_.length) { 181 | final int newSize = Math.max(oldLength + this.growBy_, (this.data_.length << 1) + 2); 182 | this.data_ = Arrays.copyOf(this.data_, newSize); 183 | } 184 | 185 | this.data_[oldLength] = c; 186 | 187 | return this; 188 | } 189 | 190 | /** 191 | * Append a string to this buffer without copying the string first. 192 | * 193 | * @param src the string to append 194 | * @return this instance 195 | */ 196 | public CheaperCharBuffer append(final String src) { 197 | final int start = this.length_; 198 | this.length_ = this.length_ + src.length(); 199 | ensureCapacity(this.length_); 200 | 201 | // copy char by char because we don't get a copy for free 202 | // from a string yet, this might change when immutable arrays 203 | // make it into Java, but that will not be very soon 204 | for (int i = 0; i < src.length(); i++) { 205 | this.data_[start + i] = src.charAt(i); 206 | } 207 | 208 | return this; 209 | } 210 | 211 | /** 212 | * Add another buffer to this one. 213 | * 214 | * @param src the buffer to append 215 | * @return this instance 216 | */ 217 | public CheaperCharBuffer append(final CheaperCharBuffer src) { 218 | final int start = this.length_; 219 | this.length_ = this.length_ + src.length(); 220 | ensureCapacity(this.length_); 221 | 222 | System.arraycopy(src.data_, 0, this.data_, start, src.length_); 223 | 224 | return this; 225 | } 226 | 227 | /** 228 | * Add data from a char array to this buffer with the ability to specify 229 | * a range to copy from 230 | * 231 | * @param src the source char array 232 | * @param offset the pos to start to copy from 233 | * @param length the length of the data to copy 234 | * 235 | * @return this instance 236 | */ 237 | public CheaperCharBuffer append(final char[] src, final int offset, final int length) { 238 | final int start = this.length_; 239 | this.length_ = start + length; 240 | 241 | ensureCapacity(this.length_); 242 | 243 | System.arraycopy(src, offset, this.data_, start, length); 244 | 245 | return this; 246 | } 247 | 248 | /** 249 | * Returns the current length 250 | * 251 | * @return the length of the charbuffer data 252 | */ 253 | public int length() { 254 | return length_; 255 | } 256 | 257 | /** 258 | * Tell us how much the capacity grows if needed 259 | * 260 | * @return the value that determines how much we grow the backing 261 | * array in case we have to 262 | */ 263 | public int getGrowBy() { 264 | return this.growBy_; 265 | } 266 | 267 | /** 268 | * Resets the buffer to 0 length. It won't resize it to avoid memory 269 | * churn. 270 | * 271 | * @return this instance for fluid programming 272 | */ 273 | public CheaperCharBuffer clear() { 274 | this.length_ = 0; 275 | 276 | return this; 277 | } 278 | 279 | /** 280 | * Resets the buffer to 0 length and sets the new data. This 281 | * is a little cheaper than clear().append(c) depending on 282 | * the where and the inlining decisions. 283 | * 284 | * @param c the char to set 285 | * @return this instance for fluid programming 286 | */ 287 | public CheaperCharBuffer clearAndAppend(final char c) { 288 | this.length_ = 0; 289 | 290 | if (this.data_.length > 0) { 291 | this.data_[this.length_] = c; 292 | this.length_++; 293 | } 294 | else { 295 | // the rare case when we don't have any buffer at hand 296 | append(c); 297 | } 298 | 299 | return this; 300 | } 301 | 302 | /** 303 | * Does this buffer end with this string? If we check for 304 | * the empty string, we get true. If we would support JDK 11, we could 305 | * use Arrays.mismatch and be way faster. 306 | * 307 | * @param s the string to check the end against 308 | * @return true of the end matches the buffer, false otherwise 309 | */ 310 | public boolean endsWith(final String s) { 311 | // length does not match, cannot be the end 312 | if (this.length_ < s.length()) { 313 | return false; 314 | } 315 | 316 | // check the string by each char, avoids a copy of the string 317 | final int start = this.length_ - s.length(); 318 | 319 | // change this to Arrays.mismatch when going JDK 11 or higher 320 | for (int i = 0; i < s.length(); i++) { 321 | if (this.data_[i + start] != s.charAt(i)) { 322 | return false; 323 | } 324 | } 325 | 326 | return true; 327 | } 328 | 329 | /** 330 | * Reduces the buffer to the content between start and end marker when 331 | * only whitespaces are found before the startMarker as well as after the end marker. 332 | * If both strings overlap due to identical characters such as "foo" and "oof" 333 | * and the buffer is " foof ", we don't do anything. 334 | * 335 | *

If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. 336 | * 337 | * @param startMarker the start string to find, must not be null 338 | * @param endMarker the end string to find, must not be null 339 | * @return this instance 340 | * 341 | * @deprecated Use the new method {@link #trimToContent(String, String)} instead. 342 | */ 343 | public CheaperCharBuffer reduceToContent(final String startMarker, final String endMarker) { 344 | return trimToContent(startMarker, endMarker); 345 | } 346 | 347 | /** 348 | * Reduces the buffer to the content between start and end marker when 349 | * only whitespaces are found before the startMarker as well as after the end marker. 350 | * If both strings overlap due to identical characters such as "foo" and "oof" 351 | * and the buffer is " foof ", we don't do anything. 352 | * 353 | *

If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. 354 | * 355 | * @param startMarker the start string to find, must not be null 356 | * @param endMarker the end string to find, must not be null 357 | * @return this instance 358 | */ 359 | public CheaperCharBuffer trimToContent(final String startMarker, final String endMarker) { 360 | // if both are longer or same length than content, don't do anything 361 | final int markerLength = startMarker.length() + endMarker.length(); 362 | if (markerLength >= this.length_) { 363 | return this; 364 | } 365 | 366 | // run over starting whitespaces 367 | int sPos = 0; 368 | for (; sPos < this.length_ - markerLength; sPos++) { 369 | if (!Character.isWhitespace(this.data_[sPos])) { 370 | break; 371 | } 372 | } 373 | 374 | // run over ending whitespaces 375 | int ePos = this.length_ - 1; 376 | for (; ePos > sPos - markerLength; ePos--) { 377 | if (!Character.isWhitespace(this.data_[ePos])) { 378 | break; 379 | } 380 | } 381 | 382 | // if we have less content than marker length, give up 383 | // this also helps when markers overlap such as 384 | // and the string is " " 385 | if (ePos - sPos + 1 < markerLength) { 386 | return this; 387 | } 388 | 389 | // check the start 390 | for (int i = 0; i < startMarker.length(); i++) { 391 | if (startMarker.charAt(i) != this.data_[i + sPos]) { 392 | // no start match, stop and don't do anything 393 | return this; 394 | } 395 | } 396 | 397 | // check the end, ePos is when the first good char 398 | // occurred 399 | final int endStartCheckPos = ePos - endMarker.length() + 1; 400 | for (int i = 0; i < endMarker.length(); i++) { 401 | if (endMarker.charAt(i) != this.data_[endStartCheckPos + i]) { 402 | // no start match, stop and don't do anything 403 | return this; 404 | } 405 | } 406 | 407 | // shift left and cut length 408 | final int newLength = ePos - sPos + 1 - markerLength; 409 | System.arraycopy(this.data_, 410 | sPos + startMarker.length(), 411 | this.data_, 412 | 0, newLength); 413 | this.length_ = newLength; 414 | 415 | return this; 416 | } 417 | 418 | /** 419 | * Check if we have only whitespaces 420 | * 421 | * @return true if we have only whitespace, false otherwise 422 | */ 423 | public boolean isWhitespace() { 424 | for (int i = 0; i < this.length_; i++) { 425 | if (!Character.isWhitespace(this.data_[i])) { 426 | return false; 427 | } 428 | } 429 | return true; 430 | } 431 | 432 | /** 433 | * Trims the string similar to {@link java.lang.String#trim()} 434 | * 435 | * @return a string with removed whitespace at the beginning and the end 436 | */ 437 | public CheaperCharBuffer trim() { 438 | // clean the end first, because it is cheap 439 | return trimTrailing().trimLeading(); 440 | } 441 | 442 | /** 443 | * Removes all whitespace before the first non-whitespace char. 444 | * If all are whitespaces, we get an empty buffer 445 | * 446 | * @return this instance 447 | */ 448 | public CheaperCharBuffer trimLeading() { 449 | // run over starting whitespace 450 | int sPos = 0; 451 | for (; sPos < this.length_; sPos++) { 452 | if (!Character.isWhitespace(this.data_[sPos])) { 453 | break; 454 | } 455 | } 456 | 457 | if (sPos == 0) { 458 | // nothing to do 459 | return this; 460 | } 461 | else if (sPos == this.length_) { 462 | // only whitespace 463 | this.length_ = 0; 464 | return this; 465 | } 466 | 467 | // shift left 468 | final int newLength = this.length_ - sPos; 469 | System.arraycopy(this.data_, 470 | sPos, 471 | this.data_, 472 | 0, newLength); 473 | this.length_ = newLength; 474 | 475 | return this; 476 | } 477 | 478 | /** 479 | * Removes all whitespace at the end. 480 | * If all are whitespace, we get an empty buffer 481 | * 482 | * @return this instance 483 | * 484 | * @deprecated Use {@link #trimTrailing()} instead. 485 | */ 486 | public CheaperCharBuffer trimWhitespaceAtEnd() { 487 | return trimTrailing(); 488 | } 489 | 490 | /** 491 | * Removes all whitespace at the end. 492 | * If all are whitespace, we get an empty buffer 493 | * 494 | * @return this instance 495 | */ 496 | public CheaperCharBuffer trimTrailing() { 497 | // run over ending whitespaces 498 | int ePos = this.length_ - 1; 499 | for (; ePos >= 0; ePos--) { 500 | if (!Character.isWhitespace(this.data_[ePos])) { 501 | break; 502 | } 503 | } 504 | 505 | this.length_ = ePos + 1; 506 | 507 | return this; 508 | } 509 | 510 | /** 511 | * Shortens the buffer by that many positions. If the count is 512 | * larger than the length, we get just an empty buffer. If you pass in negative 513 | * values, we are failing, likely often silently. It is all about performance and 514 | * not a general all-purpose API. 515 | * 516 | * @param count a positive number, no runtime checks, if count is larger than 517 | * length, we get length = 0 518 | * @return this instance 519 | */ 520 | public CheaperCharBuffer shortenBy(final int count) { 521 | final int newLength = this.length_ - count; 522 | this.length_ = newLength < 0 ? 0 : newLength; 523 | 524 | return this; 525 | } 526 | 527 | /** 528 | * Get the characters as char array, this will be a copy! 529 | * 530 | * @return a copy of the underlying char darta 531 | */ 532 | public char[] getChars() { 533 | return Arrays.copyOf(this.data_, this.length_); 534 | } 535 | 536 | /** 537 | * Returns a string representation of this buffer. This will be a copy 538 | * operation. If the buffer is emoty, we get a constant empty String back 539 | * to avoid any overhead. 540 | * 541 | * @return a string of the content of this buffer 542 | */ 543 | @Override 544 | public String toString() { 545 | if (this.length_ > 0) { 546 | return new String(this.data_, 0, this.length_); 547 | } 548 | else { 549 | return ""; 550 | } 551 | } 552 | 553 | /** 554 | * Returns the char a the given position. Will complain if 555 | * we try to read outside the range. We do a range check here 556 | * because we might not notice when we are within the buffer 557 | * but outside the current length. 558 | * 559 | * @param index the position to read from 560 | * @return the char at the position 561 | * @throws IndexOutOfBoundsException 562 | * in case one tries to read outside of valid buffer range 563 | */ 564 | @Override 565 | public char charAt(final int index) { 566 | if (index > this.length_ - 1 || index < 0) { 567 | throw new IndexOutOfBoundsException( 568 | "Tried to read outside of the valid buffer data"); 569 | } 570 | 571 | return this.data_[index]; 572 | } 573 | 574 | /** 575 | * Returns the char at the given position. No checks are 576 | * performed. It is up to the caller to make sure we 577 | * read correctly. Reading outside of the array will 578 | * cause an {@link IndexOutOfBoundsException} but using an 579 | * incorrect position in the array (such as beyond length) 580 | * might stay unnoticed! This is a performance method, 581 | * use at your own risk. 582 | * 583 | * @param index the position to read from 584 | * @return the char at the position 585 | */ 586 | public char unsafeCharAt(final int index) { 587 | return this.data_[index]; 588 | } 589 | 590 | /** 591 | * Returns a content copy of this buffer 592 | * 593 | * @return a copy of this buffer, the capacity might differ 594 | */ 595 | @Override 596 | public CheaperCharBuffer clone() { 597 | return new CheaperCharBuffer(this); 598 | } 599 | 600 | /** 601 | * Returns a CharSequence that is a subsequence of this sequence. 602 | * The subsequence starts with the char value at the specified index and 603 | * ends with the char value at index end - 1. The length 604 | * (in chars) of the 605 | * returned sequence is end - start, so if start == end 606 | * then an empty sequence is returned. 607 | * 608 | * @param start the start index, inclusive 609 | * @param end the end index, exclusive 610 | * 611 | * @return the specified subsequence 612 | * 613 | * @throws IndexOutOfBoundsException 614 | * if start or end are negative, 615 | * if end is greater than length(), 616 | * or if start is greater than end 617 | * 618 | * @return a charsequence of this buffer 619 | */ 620 | @Override 621 | public CharSequence subSequence(final int start, final int end) { 622 | if (start < 0) { 623 | throw new StringIndexOutOfBoundsException(start); 624 | } 625 | if (end > this.length_) { 626 | throw new StringIndexOutOfBoundsException(end); 627 | } 628 | 629 | final int l = end - start; 630 | if (l < 0) { 631 | throw new StringIndexOutOfBoundsException(l); 632 | } 633 | 634 | return new String(this.data_, start, l); 635 | } 636 | 637 | /** 638 | * Two buffers are identical when the length and 639 | * the content of the backing array (only for the 640 | * data in view) are identical. 641 | * 642 | * @param o the object to compare with 643 | * @return true if length and array content match, false otherwise 644 | */ 645 | @Override 646 | public boolean equals(final Object o) { 647 | if (o instanceof CharSequence) { 648 | final CharSequence ob = (CharSequence) o; 649 | 650 | if (ob.length() != this.length_) { 651 | return false; 652 | } 653 | 654 | // ok, in JDK 11 or up, we could use an 655 | // Arrays.mismatch, but we cannot do that 656 | // due to JDK 8 compatibility 657 | for (int i = 0; i < this.length_; i++) { 658 | if (ob.charAt(i) != this.data_[i]) { 659 | return false; 660 | } 661 | } 662 | 663 | // length and content match, be happy 664 | return true; 665 | } 666 | 667 | return false; 668 | } 669 | 670 | /** 671 | * We don't cache the hashcode because we mutate often. Don't use this in 672 | * hashmaps as key. But you can use that to look up in a hashmap against 673 | * a string using the CharSequence interface. 674 | * 675 | * @return the hashcode, similar to what a normal string would deliver 676 | */ 677 | @Override 678 | public int hashCode() { 679 | int h = 0; 680 | 681 | for (int i = 0; i < this.length_; i++) { 682 | h = ((h << 5) - h) + this.data_[i]; 683 | } 684 | 685 | return h; 686 | } 687 | 688 | /** 689 | * Append a character to an XMLCharBuffer. The character is an int value, and 690 | * can either be a single UTF-16 character or a supplementary character 691 | * represented by two UTF-16 code points. 692 | * 693 | * @param value The character value. 694 | * @return this instance for fluid programming 695 | * 696 | * @throws IllegalArgumentException if the specified 697 | * {@code codePoint} is not a valid Unicode code point. 698 | */ 699 | public CheaperCharBuffer appendCodePoint(final int value) { 700 | if (value <= Character.MAX_VALUE) { 701 | return this.append((char) value); 702 | } 703 | else { 704 | try { 705 | final char[] chars = Character.toChars(value); 706 | return this.append(chars, 0, chars.length); 707 | } 708 | catch (final IllegalArgumentException e) { 709 | // when value is not valid as UTF-16 710 | this.append(REPLACEMENT_CHARACTER); 711 | throw e; 712 | } 713 | } 714 | } 715 | } 716 | -------------------------------------------------------------------------------- /src/main/java/org/rschwietzke/FastRandom.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 The original authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.rschwietzke; 17 | 18 | /** 19 | * Ultra-fast pseudo random generator that is not synchronized! 20 | * Don't use anything from Random by inheritance, this will inherit 21 | * a volatile! Not my idea, copyied in parts some demo random 22 | * generator lessons. 23 | * 24 | * @author rschwietzke 25 | * 26 | */ 27 | public class FastRandom { 28 | private long seed; 29 | 30 | public FastRandom() { 31 | this.seed = System.currentTimeMillis(); 32 | } 33 | 34 | public FastRandom(long seed) { 35 | this.seed = seed; 36 | } 37 | 38 | protected int next(int nbits) { 39 | // N.B. Not thread-safe! 40 | long x = this.seed; 41 | x ^= (x << 21); 42 | x ^= (x >>> 35); 43 | x ^= (x << 4); 44 | this.seed = x; 45 | 46 | x &= ((1L << nbits) - 1); 47 | 48 | return (int) x; 49 | } 50 | 51 | /** 52 | * Borrowed from the JDK 53 | * 54 | * @param bound 55 | * @return 56 | */ 57 | public int nextInt(int bound) { 58 | int r = next(31); 59 | int m = bound - 1; 60 | if ((bound & m) == 0) // i.e., bound is a power of 2 61 | r = (int) ((bound * (long) r) >> 31); 62 | else { 63 | for (int u = r; u - (r = u % bound) + m < 0; u = next(31)) 64 | ; 65 | } 66 | return r; 67 | } 68 | 69 | /** 70 | * Borrowed from the JDK 71 | * @return 72 | */ 73 | public int nextInt() { 74 | return next(32); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/README.md: -------------------------------------------------------------------------------- 1 | # 1brc Node.js by [@Edgar-P-yan](https://github.com/Edgar-P-yan) 2 | 3 | ## Details: 4 | 5 | - Machine: MacBook Pro M1 Max 32GB; 6 | - Best results I could get with **Node.js is 23s**; 7 | - Utilizes all cores of the system (10 on mine) via worker threads; 8 | - A custom input-specific and quite fast float point to integer parser; 9 | - Byte-by-byte processing of the whole file; 10 | - Statically typed code that gets JITed very efficiently; 11 | 12 | ## What should be improved: 13 | 14 | - Custom hashmap with 2-byte hashes. Right now I just use the builtin `Map`, which is too general purpose for this task, hence quite slow, around 30% of spent time is on the Map. 15 | - Set higher watermarks for the GC, it might save some milliseconds too. 16 | - Do fewer allocations. Right now for each station name in each row a new string gets allocated. 17 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/index.js: -------------------------------------------------------------------------------- 1 | import * as os from 'node:os'; 2 | import * as fsp from 'node:fs/promises'; 3 | import * as fs from 'node:fs'; 4 | import * as workerThreads from 'worker_threads'; 5 | 6 | const MAX_LINE_LENGTH = 100 + 1 + 4 + 1; 7 | const CHAR_SEMICOLON = ';'.charCodeAt(0); 8 | const CHAR_NEWLINE = '\n'.charCodeAt(0); 9 | const TOKEN_STATION_NAME = 0; 10 | const TOKEN_TEMPERATURE = 1; 11 | 12 | /** @type {(...args: any[]) => void} */ 13 | const debug = process.env.DEBUG 14 | ? (...args) => console.error(`Thread ${workerThreads.threadId}:`, args) 15 | : () => {}; 16 | 17 | /** 18 | * @typedef {Map} CalcResultsCont 19 | */ 20 | 21 | if (workerThreads.isMainThread) { 22 | const fileName = process.argv[2]; 23 | 24 | const file = await fsp.open(fileName); 25 | 26 | const size = (await file.stat()).size; 27 | 28 | const threadsCount = os.cpus().length; 29 | 30 | const chunkSize = Math.floor(size / threadsCount); 31 | 32 | /** @type {number[]} */ 33 | const chunkOffsets = []; 34 | 35 | let offset = 0; 36 | const bufFindNl = Buffer.alloc(MAX_LINE_LENGTH); 37 | 38 | while (true) { 39 | offset += chunkSize; 40 | 41 | if (offset >= size) { 42 | chunkOffsets.push(size); 43 | break; 44 | } 45 | 46 | await file.read(bufFindNl, 0, MAX_LINE_LENGTH, offset); 47 | 48 | const nlPos = bufFindNl.indexOf(10); 49 | bufFindNl.fill(0); 50 | 51 | if (nlPos === -1) { 52 | chunkOffsets.push(size); 53 | break; 54 | } else { 55 | offset += nlPos + 1; 56 | chunkOffsets.push(offset); 57 | } 58 | } 59 | 60 | await file.close(); 61 | 62 | /** 63 | * @type {CalcResultsCont} 64 | */ 65 | const compiledResults = new Map(); 66 | 67 | let stoppedWorkers = 0; 68 | 69 | for (let i = 0; i < chunkOffsets.length; i++) { 70 | const worker = new workerThreads.Worker( 71 | new URL(import.meta.resolve('./index.js')), 72 | { 73 | workerData: { 74 | fileName, 75 | start: i === 0 ? 0 : chunkOffsets[i - 1], 76 | end: chunkOffsets[i], 77 | }, 78 | } 79 | ); 80 | 81 | worker.on( 82 | 'message', 83 | ( 84 | /** @type {CalcResultsCont} */ 85 | message 86 | ) => { 87 | for (let [key, value] of message.entries()) { 88 | const existing = compiledResults.get(key); 89 | if (existing) { 90 | existing.min = Math.min(existing.min, value.min); 91 | existing.max = Math.max(existing.max, value.max); 92 | existing.sum += value.sum; 93 | existing.count += value.count; 94 | } else { 95 | compiledResults.set(key, value); 96 | } 97 | } 98 | } 99 | ); 100 | 101 | worker.on('error', (err) => { 102 | console.error(err); 103 | }); 104 | 105 | worker.on('exit', (code) => { 106 | if (code !== 0) { 107 | new Error(`Worker stopped with exit code ${code}`); 108 | } else { 109 | debug('Worker stopped'); 110 | } 111 | 112 | stoppedWorkers++; 113 | 114 | if (stoppedWorkers === chunkOffsets.length) { 115 | printCompiledResults(compiledResults); 116 | } 117 | }); 118 | } 119 | } else { 120 | const { fileName, start, end } = workerThreads.workerData; 121 | if (start > end - 1) { 122 | workerThreads.parentPort.postMessage(new Map()); 123 | } else { 124 | const readStream = fs.createReadStream(fileName, { 125 | start: start, 126 | end: end - 1, 127 | }); 128 | 129 | parseStream(readStream); 130 | } 131 | } 132 | 133 | /** 134 | * @param {CalcResultsCont} compiledResults 135 | */ 136 | function printCompiledResults(compiledResults) { 137 | const sortedStations = Array.from(compiledResults.keys()).sort(); 138 | 139 | process.stdout.write('{'); 140 | for (let i = 0; i < sortedStations.length; i++) { 141 | if (i > 0) { 142 | process.stdout.write(', '); 143 | } 144 | const data = compiledResults.get(sortedStations[i]); 145 | process.stdout.write(sortedStations[i]); 146 | process.stdout.write('='); 147 | process.stdout.write( 148 | round(data.min / 10) + 149 | '/' + 150 | round(data.sum / 10 / data.count) + 151 | '/' + 152 | round(data.max / 10) 153 | ); 154 | } 155 | process.stdout.write('}\n'); 156 | } 157 | 158 | /** 159 | * @example 160 | * round(1.2345) // "1.2" 161 | * round(1.55) // "1.6" 162 | * round(1) // "1.0" 163 | * 164 | * @param {number} num 165 | * @returns {string} 166 | */ 167 | function round(num) { 168 | const fixed = Math.round(10 * num) / 10; 169 | 170 | return fixed.toFixed(1); 171 | } 172 | 173 | /** 174 | * @param {import('node:fs').ReadStream} readStream 175 | */ 176 | function parseStream(readStream) { 177 | let readingToken = TOKEN_STATION_NAME; 178 | 179 | let stationName = Buffer.allocUnsafe(100); 180 | let stationNameLen = 0; 181 | 182 | let temperature = Buffer.allocUnsafe(5); 183 | let temperatureLen = 0; 184 | 185 | /** 186 | * @type {CalcResultsCont} 187 | */ 188 | const map = new Map(); 189 | 190 | /** 191 | * @param {Buffer} chunk 192 | * @returns {void} 193 | */ 194 | function parseChunk(chunk) { 195 | for (let i = 0; i < chunk.length; i++) { 196 | if (chunk[i] === CHAR_SEMICOLON) { 197 | readingToken = TOKEN_TEMPERATURE; 198 | } else if (chunk[i] === CHAR_NEWLINE) { 199 | const stationNameStr = stationName.toString('utf8', 0, stationNameLen); 200 | 201 | let temperatureFloat = 0 | 0; 202 | try { 203 | temperatureFloat = parseFloatBufferIntoInt( 204 | temperature, 205 | temperatureLen 206 | ); 207 | } catch (err) { 208 | console.log({ temperature, temperatureLen }, err.message); 209 | throw err; 210 | } 211 | 212 | const existing = map.get(stationNameStr); 213 | 214 | if (existing) { 215 | existing.min = 216 | existing.min < temperatureFloat ? existing.min : temperatureFloat; 217 | existing.max = 218 | existing.max > temperatureFloat ? existing.max : temperatureFloat; 219 | existing.sum += temperatureFloat; 220 | existing.count++; 221 | } else { 222 | map.set(stationNameStr, { 223 | min: temperatureFloat, 224 | max: temperatureFloat, 225 | sum: temperatureFloat, 226 | count: 1, 227 | }); 228 | } 229 | 230 | readingToken = TOKEN_STATION_NAME; 231 | stationNameLen = 0; 232 | temperatureLen = 0; 233 | } else if (readingToken === TOKEN_STATION_NAME) { 234 | stationName[stationNameLen] = chunk[i]; 235 | stationNameLen++; 236 | } else { 237 | temperature[temperatureLen] = chunk[i]; 238 | temperatureLen++; 239 | } 240 | } 241 | } 242 | 243 | readStream.on('data', (/** @type {Buffer} */ chunk) => { 244 | parseChunk(chunk); 245 | }); 246 | 247 | readStream.on('end', () => { 248 | debug('Sending result to the main thread'); 249 | workerThreads.parentPort.postMessage(map); 250 | }); 251 | } 252 | 253 | const CHAR_MINUS = '-'.charCodeAt(0); 254 | 255 | /** 256 | * @param {Buffer} b 257 | * @param {number} length 1-5 258 | * 259 | * @returns {number} 260 | */ 261 | function parseFloatBufferIntoInt(b, length) { 262 | if (b[0] === CHAR_MINUS) { 263 | // b can be -1.1 or -11.1 264 | switch (length) { 265 | case 4: 266 | return -(parseOneDigit(b[1]) * 10 + parseOneDigit(b[3])); 267 | case 5: 268 | return -( 269 | parseOneDigit(b[1]) * 100 + 270 | parseOneDigit(b[2]) * 10 + 271 | parseOneDigit(b[4]) 272 | ); 273 | } 274 | } else { 275 | // b can be 1.1 or 11.1 276 | switch (length) { 277 | case 3: // b is 1.1 278 | return parseOneDigit(b[0]) * 10 + parseOneDigit(b[2]); 279 | case 4: 280 | return ( 281 | parseOneDigit(b[0]) * 100 + 282 | parseOneDigit(b[1]) * 10 + 283 | parseOneDigit(b[3]) 284 | ); 285 | } 286 | } 287 | } 288 | 289 | /** 290 | * @param {number} char byte number of a digit char 291 | * 292 | * @returns {number} 293 | */ 294 | function parseOneDigit(char) { 295 | return char - 0x30; 296 | } 297 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nodejs", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "devDependencies": { 8 | "@types/node": "^20.10.6" 9 | } 10 | }, 11 | "node_modules/@types/node": { 12 | "version": "20.10.6", 13 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.6.tgz", 14 | "integrity": "sha512-Vac8H+NlRNNlAmDfGUP7b5h/KA+AtWIzuXy0E6OyP8f1tCLYAtPvKRRDJjAPqhpCb0t6U2j7/xqAuLEebW2kiw==", 15 | "dev": true, 16 | "dependencies": { 17 | "undici-types": "~5.26.4" 18 | } 19 | }, 20 | "node_modules/undici-types": { 21 | "version": "5.26.5", 22 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", 23 | "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", 24 | "dev": true 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module", 3 | "module": "es2022", 4 | "devDependencies": { 5 | "@types/node": "^20.10.6" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/result.txt: -------------------------------------------------------------------------------- 1 | {Abha=-32.2/18.0/67.2, 2 | Abidjan=-23.6/26.0/79.6, 3 | Abéché=-22.9/29.4/82.2, 4 | Accra=-23.4/26.4/75.5, 5 | Addis Ababa=-37.9/16.0/64.6, 6 | Adelaide=-32.0/17.3/67.3, 7 | Aden=-19.9/29.1/78.7, 8 | Ahvaz=-27.1/25.4/77.5, 9 | Albuquerque=-35.2/14.0/62.1, 10 | Alexandra=-37.9/11.0/57.2, 11 | Alexandria=-29.4/20.0/74.8, 12 | Algiers=-27.2/18.2/69.4, 13 | Alice Springs=-26.7/21.0/67.9, 14 | Almaty=-38.9/10.0/63.0, 15 | Amsterdam=-39.4/10.2/56.5, 16 | Anadyr=-57.3/-6.9/41.9, 17 | Anchorage=-48.8/2.8/55.0, 18 | Andorra la Vella=-39.4/9.8/60.7, 19 | Ankara=-39.0/12.0/68.1, 20 | Antananarivo=-31.3/17.9/72.0, 21 | Antsiranana=-23.5/25.2/74.0, 22 | Arkhangelsk=-49.8/1.3/50.9, 23 | Ashgabat=-33.4/17.1/66.0, 24 | Asmara=-34.1/15.6/63.6, 25 | Assab=-23.3/30.5/85.1, 26 | Astana=-50.6/3.5/50.0, 27 | Athens=-33.8/19.2/69.8, 28 | Atlanta=-33.0/17.0/63.4, 29 | Auckland=-37.7/15.2/65.1, 30 | Austin=-29.8/20.7/69.2, 31 | Baghdad=-28.3/22.8/74.3, 32 | Baguio=-32.7/19.5/65.1, 33 | Baku=-34.5/15.1/71.7, 34 | Baltimore=-39.0/13.1/63.8, 35 | Bamako=-26.6/27.8/76.5, 36 | Bangkok=-19.9/28.6/76.3, 37 | Bangui=-22.6/26.0/75.3, 38 | Banjul=-23.6/26.0/76.7, 39 | Barcelona=-34.9/18.2/66.8, 40 | Bata=-23.0/25.1/73.3, 41 | Batumi=-33.2/14.0/65.3, 42 | Beijing=-36.0/12.9/66.7, 43 | Beirut=-29.0/20.9/72.3, 44 | Belgrade=-42.2/12.5/66.3, 45 | Belize City=-22.6/26.7/76.2, 46 | Benghazi=-29.7/19.9/67.9, 47 | Bergen=-41.1/7.7/58.8, 48 | Berlin=-36.0/10.3/60.5, 49 | Bilbao=-35.8/14.7/62.6, 50 | Birao=-26.4/26.5/75.2, 51 | Bishkek=-36.5/11.3/58.1, 52 | Bissau=-22.3/27.0/82.6, 53 | Blantyre=-25.3/22.2/70.2, 54 | Bloemfontein=-34.0/15.6/66.5, 55 | Boise=-36.9/11.4/60.8, 56 | Bordeaux=-37.9/14.2/66.0, 57 | Bosaso=-18.7/30.0/79.5, 58 | Boston=-37.5/10.9/62.2, 59 | Bouaké=-27.5/26.0/86.0, 60 | Bratislava=-38.9/10.5/58.6, 61 | Brazzaville=-25.9/25.0/73.9, 62 | Bridgetown=-21.8/27.0/74.9, 63 | Brisbane=-31.1/21.4/68.7, 64 | Brussels=-38.3/10.5/58.8, 65 | Bucharest=-37.3/10.8/61.2, 66 | Budapest=-40.0/11.3/60.7, 67 | Bujumbura=-23.4/23.8/71.7, 68 | Bulawayo=-28.8/18.9/69.6, 69 | Burnie=-35.2/13.1/65.9, 70 | Busan=-37.6/15.0/65.7, 71 | Cabo San Lucas=-30.6/23.9/80.5, 72 | Cairns=-27.0/25.0/81.1, 73 | Cairo=-25.5/21.4/74.6, 74 | Calgary=-42.7/4.4/57.4, 75 | Canberra=-38.4/13.1/67.4, 76 | Cape Town=-31.2/16.2/68.7, 77 | Changsha=-30.9/17.4/68.5, 78 | Charlotte=-35.1/16.1/67.3, 79 | Chiang Mai=-23.8/25.8/75.0, 80 | Chicago=-41.8/9.8/60.5, 81 | Chihuahua=-33.7/18.6/69.5, 82 | Chittagong=-24.2/25.9/76.1, 83 | Chișinău=-42.4/10.2/63.1, 84 | Chongqing=-31.8/18.6/70.8, 85 | Christchurch=-39.8/12.2/60.4, 86 | City of San Marino=-38.1/11.8/60.8, 87 | Colombo=-22.9/27.4/78.0, 88 | Columbus=-40.2/11.7/61.0, 89 | Conakry=-23.6/26.4/76.8, 90 | Copenhagen=-41.4/9.1/57.6, 91 | Cotonou=-20.6/27.2/75.9, 92 | Cracow=-40.4/9.3/57.1, 93 | Da Lat=-34.8/17.9/70.3, 94 | Da Nang=-25.6/25.8/81.3, 95 | Dakar=-24.6/24.0/74.9, 96 | Dallas=-33.1/19.0/68.6, 97 | Damascus=-32.9/17.0/67.4, 98 | Dampier=-25.4/26.4/77.7, 99 | Dar es Salaam=-23.1/25.8/75.4, 100 | Darwin=-28.0/27.6/76.6, 101 | Denpasar=-26.6/23.7/73.6, 102 | Denver=-38.7/10.4/56.8, 103 | Detroit=-39.3/10.0/60.2, 104 | Dhaka=-24.8/25.9/76.9, 105 | Dikson=-60.9/-11.1/38.2, 106 | Dili=-21.4/26.6/80.8, 107 | Djibouti=-16.8/29.9/78.8, 108 | Dodoma=-30.3/22.7/71.5, 109 | Dolisie=-25.1/24.0/74.8, 110 | Douala=-22.9/26.7/76.8, 111 | Dubai=-21.9/26.9/77.4, 112 | Dublin=-41.0/9.8/59.2, 113 | Dunedin=-40.5/11.1/61.3, 114 | Durban=-30.3/20.6/71.9, 115 | Dushanbe=-31.7/14.7/64.3, 116 | Edinburgh=-38.7/9.3/59.6, 117 | Edmonton=-43.6/4.2/51.7, 118 | El Paso=-33.9/18.1/65.2, 119 | Entebbe=-29.5/21.0/67.4, 120 | Erbil=-29.7/19.5/70.3, 121 | Erzurum=-45.0/5.1/58.3, 122 | Fairbanks=-49.7/-2.3/51.0, 123 | Fianarantsoa=-30.8/17.9/66.9, 124 | Flores, 125 | Petén=-21.0/26.4/79.6, 126 | Frankfurt=-37.1/10.6/60.3, 127 | Fresno=-33.3/17.9/67.3, 128 | Fukuoka=-32.2/17.0/66.6, 129 | Gaborone=-28.3/21.0/69.8, 130 | Gabès=-29.7/19.5/68.9, 131 | Gagnoa=-21.4/26.0/76.1, 132 | Gangtok=-33.4/15.2/67.2, 133 | Garissa=-20.0/29.3/79.9, 134 | Garoua=-20.7/28.3/80.3, 135 | George Town=-27.2/27.9/83.3, 136 | Ghanzi=-34.3/21.4/67.2, 137 | Gjoa Haven=-72.4/-14.4/35.4, 138 | Guadalajara=-29.1/20.9/69.0, 139 | Guangzhou=-26.5/22.4/72.3, 140 | Guatemala City=-29.3/20.4/70.7, 141 | Halifax=-42.1/7.5/58.3, 142 | Hamburg=-41.4/9.7/57.1, 143 | Hamilton=-35.1/13.8/64.7, 144 | Hanga Roa=-31.7/20.5/70.3, 145 | Hanoi=-28.1/23.6/71.4, 146 | Harare=-28.7/18.4/66.4, 147 | Harbin=-46.0/5.0/53.1, 148 | Hargeisa=-28.8/21.7/72.9, 149 | Hat Yai=-18.5/27.0/75.4, 150 | Havana=-29.2/25.2/74.9, 151 | Helsinki=-39.7/5.9/61.5, 152 | Heraklion=-30.0/18.9/66.9, 153 | Hiroshima=-37.7/16.3/65.3, 154 | Ho Chi Minh City=-28.5/27.4/77.3, 155 | Hobart=-37.8/12.7/60.7, 156 | Hong Kong=-24.6/23.3/74.4, 157 | Honiara=-30.8/26.5/77.2, 158 | Honolulu=-23.2/25.4/73.4, 159 | Houston=-26.6/20.8/66.8, 160 | Ifrane=-36.0/11.4/61.2, 161 | Indianapolis=-39.5/11.8/61.8, 162 | Iqaluit=-58.8/-9.3/42.0, 163 | Irkutsk=-48.6/1.0/53.8, 164 | Istanbul=-36.7/13.9/64.6, 165 | Jacksonville=-27.4/20.3/69.8, 166 | Jakarta=-24.5/26.7/76.7, 167 | Jayapura=-26.2/27.0/76.1, 168 | Jerusalem=-33.1/18.3/67.4, 169 | Johannesburg=-31.8/15.5/66.4, 170 | Jos=-25.8/22.8/75.0, 171 | Juba=-18.4/27.8/78.2, 172 | Kabul=-38.7/12.1/62.4, 173 | Kampala=-28.2/20.0/68.0, 174 | Kandi=-19.7/27.7/74.8, 175 | Kankan=-28.5/26.5/79.6, 176 | Kano=-26.8/26.4/75.6, 177 | Kansas City=-37.0/12.5/61.7, 178 | Karachi=-26.0/26.0/74.2, 179 | Karonga=-27.4/24.4/79.6, 180 | Kathmandu=-30.2/18.3/66.6, 181 | Khartoum=-18.8/29.9/89.3, 182 | Kingston=-22.7/27.4/76.7, 183 | Kinshasa=-27.2/25.3/73.9, 184 | Kolkata=-28.7/26.7/76.4, 185 | Kuala Lumpur=-25.8/27.3/76.1, 186 | Kumasi=-22.2/26.0/75.2, 187 | Kunming=-34.9/15.7/65.3, 188 | Kuopio=-46.1/3.4/55.1, 189 | Kuwait City=-25.6/25.7/77.9, 190 | Kyiv=-49.4/8.4/61.4, 191 | Kyoto=-31.6/15.8/63.2, 192 | La Ceiba=-22.3/26.2/77.8, 193 | La Paz=-25.4/23.7/72.5, 194 | Lagos=-22.3/26.8/74.4, 195 | Lahore=-23.8/24.3/72.6, 196 | Lake Havasu City=-34.1/23.7/70.9, 197 | Lake Tekapo=-38.9/8.7/59.4, 198 | Las Palmas de Gran Canaria=-31.1/21.2/68.3, 199 | Las Vegas=-27.9/20.3/74.3, 200 | Launceston=-39.2/13.1/60.7, 201 | Lhasa=-42.5/7.6/55.2, 202 | Libreville=-22.6/25.9/79.2, 203 | Lisbon=-33.0/17.5/68.8, 204 | Livingstone=-27.5/21.8/69.2, 205 | Ljubljana=-41.4/10.9/58.5, 206 | Lodwar=-19.0/29.3/78.9, 207 | Lomé=-23.4/26.9/77.7, 208 | London=-36.9/11.3/60.8, 209 | Los Angeles=-28.5/18.6/67.2, 210 | Louisville=-36.3/13.9/64.6, 211 | Luanda=-25.3/25.8/72.2, 212 | Lubumbashi=-30.8/20.8/70.5, 213 | Lusaka=-30.4/19.9/68.3, 214 | Luxembourg City=-44.1/9.3/60.5, 215 | Lviv=-43.5/7.8/67.9, 216 | Lyon=-36.1/12.5/59.5, 217 | Madrid=-35.0/15.0/66.2, 218 | Mahajanga=-23.7/26.3/74.5, 219 | Makassar=-21.4/26.7/78.2, 220 | Makurdi=-22.3/26.0/77.4, 221 | Malabo=-23.4/26.3/76.6, 222 | Malé=-20.3/28.0/77.9, 223 | Managua=-21.3/27.3/76.8, 224 | Manama=-26.8/26.5/75.2, 225 | Mandalay=-19.5/28.0/78.3, 226 | Mango=-19.5/28.1/77.2, 227 | Manila=-20.5/28.4/79.8, 228 | Maputo=-27.6/22.8/68.8, 229 | Marrakesh=-35.1/19.6/69.3, 230 | Marseille=-33.8/15.8/68.7, 231 | Maun=-29.5/22.4/74.4, 232 | Medan=-25.1/26.5/77.4, 233 | Mek'ele=-26.8/22.7/73.9, 234 | Melbourne=-36.4/15.1/63.3, 235 | Memphis=-33.2/17.2/64.6, 236 | Mexicali=-30.3/23.1/74.2, 237 | Mexico City=-33.8/17.5/67.5, 238 | Miami=-22.7/24.9/71.8, 239 | Milan=-35.7/13.0/61.3, 240 | Milwaukee=-41.7/8.9/59.8, 241 | Minneapolis=-41.2/7.8/58.1, 242 | Minsk=-42.4/6.7/64.3, 243 | Mogadishu=-27.5/27.1/76.1, 244 | Mombasa=-21.2/26.3/73.1, 245 | Monaco=-35.4/16.4/72.4, 246 | Moncton=-44.8/6.1/58.6, 247 | Monterrey=-30.2/22.3/72.6, 248 | Montreal=-46.4/6.8/61.0, 249 | Moscow=-47.7/5.8/55.1, 250 | Mumbai=-21.7/27.1/76.2, 251 | Murmansk=-47.5/0.6/49.2, 252 | Muscat=-20.1/28.0/77.9, 253 | Mzuzu=-34.0/17.7/69.1, 254 | N'Djamena=-20.1/28.3/80.0, 255 | Naha=-24.2/23.1/72.6, 256 | Nairobi=-29.7/17.8/67.7, 257 | Nakhon Ratchasima=-23.6/27.3/78.2, 258 | Napier=-35.7/14.6/63.8, 259 | Napoli=-36.5/15.9/67.6, 260 | Nashville=-33.1/15.4/64.3, 261 | Nassau=-23.4/24.6/77.8, 262 | Ndola=-31.0/20.3/69.9, 263 | New Delhi=-27.2/25.0/77.8, 264 | New Orleans=-30.8/20.7/69.8, 265 | New York City=-36.0/12.9/63.3, 266 | Ngaoundéré=-28.8/22.0/71.1, 267 | Niamey=-22.0/29.3/78.5, 268 | Nicosia=-35.0/19.7/69.0, 269 | Niigata=-37.8/13.9/65.0, 270 | Nouadhibou=-36.2/21.3/68.7, 271 | Nouakchott=-24.2/25.7/75.2, 272 | Novosibirsk=-47.4/1.7/54.6, 273 | Nuuk=-51.1/-1.4/46.6, 274 | Odesa=-38.2/10.7/59.6, 275 | Odienné=-24.9/26.0/76.9, 276 | Oklahoma City=-33.1/15.9/64.8, 277 | Omaha=-42.7/10.6/62.9, 278 | Oranjestad=-21.3/28.1/78.8, 279 | Oslo=-45.4/5.7/60.2, 280 | Ottawa=-47.7/6.6/55.8, 281 | Ouagadougou=-23.1/28.3/80.2, 282 | Ouahigouya=-21.1/28.6/77.8, 283 | Ouarzazate=-30.3/18.9/72.3, 284 | Oulu=-46.8/2.7/52.7, 285 | Palembang=-20.4/27.3/76.2, 286 | Palermo=-34.7/18.5/70.1, 287 | Palm Springs=-26.6/24.5/72.4, 288 | Palmerston North=-39.9/13.2/61.2, 289 | Panama City=-20.0/28.0/78.1, 290 | Parakou=-24.6/26.8/78.2, 291 | Paris=-38.6/12.3/61.5, 292 | Perth=-41.6/18.7/65.5, 293 | Petropavlovsk-Kamchatsky=-48.9/1.9/49.1, 294 | Philadelphia=-38.0/13.2/65.2, 295 | Phnom Penh=-21.8/28.3/77.0, 296 | Phoenix=-26.2/23.9/77.9, 297 | Pittsburgh=-39.3/10.8/59.4, 298 | Podgorica=-34.6/15.3/65.2, 299 | Pointe-Noire=-25.3/26.1/78.5, 300 | Pontianak=-20.7/27.7/79.6, 301 | Port Moresby=-22.7/26.9/74.6, 302 | Port Sudan=-24.1/28.4/76.2, 303 | Port Vila=-25.8/24.3/75.2, 304 | Port-Gentil=-24.1/26.0/77.3, 305 | Portland (OR)=-39.5/12.4/60.7, 306 | Porto=-35.6/15.7/71.0, 307 | Prague=-41.6/8.4/57.7, 308 | Praia=-25.7/24.4/71.8, 309 | Pretoria=-33.9/18.2/67.6, 310 | Pyongyang=-39.9/10.8/59.0, 311 | Rabat=-29.7/17.2/67.5, 312 | Rangpur=-29.5/24.4/72.5, 313 | Reggane=-22.1/28.3/80.0, 314 | Reykjavík=-46.2/4.3/51.4, 315 | Riga=-41.7/6.2/53.3, 316 | Riyadh=-34.8/26.0/74.1, 317 | Rome=-37.0/15.2/69.5, 318 | Roseau=-28.8/26.2/77.0, 319 | Rostov-on-Don=-42.8/9.9/62.3, 320 | Sacramento=-31.9/16.3/64.0, 321 | Saint Petersburg=-42.7/5.8/55.8, 322 | Saint-Pierre=-42.7/5.7/56.4, 323 | Salt Lake City=-37.9/11.6/61.9, 324 | San Antonio=-26.3/20.8/72.0, 325 | San Diego=-30.0/17.8/68.1, 326 | San Francisco=-33.9/14.6/66.8, 327 | San Jose=-36.2/16.4/67.2, 328 | San José=-30.0/22.6/69.1, 329 | San Juan=-20.6/27.2/76.2, 330 | San Salvador=-25.3/23.1/72.4, 331 | Sana'a=-32.6/20.0/69.1, 332 | Santo Domingo=-30.4/25.9/75.1, 333 | Sapporo=-40.1/8.9/59.2, 334 | Sarajevo=-44.2/10.1/60.5, 335 | Saskatoon=-47.4/3.3/53.9, 336 | Seattle=-35.7/11.3/66.6, 337 | Seoul=-34.7/12.5/66.5, 338 | Seville=-29.9/19.2/68.7, 339 | Shanghai=-32.1/16.7/65.8, 340 | Singapore=-23.6/27.0/77.4, 341 | Skopje=-35.6/12.4/60.5, 342 | Sochi=-35.4/14.2/65.3, 343 | Sofia=-38.0/10.6/58.2, 344 | Sokoto=-20.4/28.0/74.8, 345 | Split=-32.6/16.1/63.5, 346 | St. John's=-49.0/5.0/53.5, 347 | St. Louis=-36.1/13.9/63.5, 348 | Stockholm=-41.9/6.6/54.6, 349 | Surabaya=-23.9/27.1/76.4, 350 | Suva=-27.8/25.6/77.7, 351 | Suwałki=-44.2/7.2/66.7, 352 | Sydney=-32.6/17.7/69.8, 353 | Ségou=-26.1/28.0/78.1, 354 | Tabora=-25.1/23.0/70.4, 355 | Tabriz=-35.6/12.6/62.1, 356 | Taipei=-28.9/23.0/78.6, 357 | Tallinn=-42.7/6.4/61.5, 358 | Tamale=-20.3/27.9/78.0, 359 | Tamanrasset=-30.1/21.7/71.4, 360 | Tampa=-28.1/22.9/75.4, 361 | Tashkent=-34.2/14.8/64.8, 362 | Tauranga=-33.6/14.8/65.2, 363 | Tbilisi=-41.0/12.9/63.3, 364 | Tegucigalpa=-30.0/21.7/77.2, 365 | Tehran=-32.2/17.0/66.5, 366 | Tel Aviv=-29.9/20.0/74.3, 367 | Thessaloniki=-33.1/16.0/68.8, 368 | Thiès=-24.9/24.0/76.4, 369 | Tijuana=-29.2/17.8/65.2, 370 | Timbuktu=-22.0/28.0/76.4, 371 | Tirana=-37.5/15.2/67.2, 372 | Toamasina=-28.0/23.4/71.6, 373 | Tokyo=-32.1/15.4/65.4, 374 | Toliara=-27.0/24.1/77.2, 375 | Toluca=-35.3/12.4/63.1, 376 | Toronto=-39.8/9.4/58.8, 377 | Tripoli=-34.6/20.0/69.9, 378 | Tromsø=-45.1/2.9/52.6, 379 | Tucson=-29.5/20.9/69.2, 380 | Tunis=-30.3/18.4/66.9, 381 | Ulaanbaatar=-49.9/-0.4/49.8, 382 | Upington=-29.6/20.4/71.5, 383 | Vaduz=-37.3/10.1/61.2, 384 | Valencia=-32.4/18.3/66.9, 385 | Valletta=-32.4/18.8/69.0, 386 | Vancouver=-40.3/10.4/59.7, 387 | Veracruz=-24.0/25.4/75.9, 388 | Vienna=-37.4/10.4/67.2, 389 | Vientiane=-26.0/25.9/74.0, 390 | Villahermosa=-22.7/27.1/79.2, 391 | Vilnius=-43.1/6.0/55.1, 392 | Virginia Beach=-32.5/15.8/66.7, 393 | Vladivostok=-41.5/4.9/56.2, 394 | Warsaw=-43.9/8.5/56.4, 395 | Washington, 396 | D.C.=-41.8/14.6/63.5, 397 | Wau=-18.5/27.8/79.3, 398 | Wellington=-38.9/12.9/67.2, 399 | Whitehorse=-48.9/-0.1/50.5, 400 | Wichita=-36.2/13.9/66.9, 401 | Willemstad=-20.6/28.0/78.9, 402 | Winnipeg=-48.6/3.0/52.3, 403 | Wrocław=-36.2/9.6/62.0, 404 | Xi'an=-36.6/14.1/61.8, 405 | Yakutsk=-56.8/-8.8/46.8, 406 | Yangon=-22.1/27.5/80.4, 407 | Yaoundé=-25.1/23.8/80.3, 408 | Yellowknife=-52.5/-4.3/41.5, 409 | Yerevan=-36.9/12.4/63.5, 410 | Yinchuan=-42.5/9.0/60.4, 411 | Zagreb=-38.0/10.7/69.2, 412 | Zanzibar City=-26.6/26.0/74.5, 413 | Zürich=-39.7/9.3/57.2, 414 | Ürümqi=-40.3/7.4/54.8, 415 | İzmir=-29.2/17.9/68.8} -------------------------------------------------------------------------------- /src/main/nodejs/Edgar-P-yan/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "es2022", 4 | "target": "es2022", 5 | "allowJs": true, 6 | "checkJs": true, 7 | "outDir": "dist", 8 | "types": ["node"] 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/nodejs/baseline/.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | -------------------------------------------------------------------------------- /src/main/nodejs/baseline/index.js: -------------------------------------------------------------------------------- 1 | import * as readline from 'node:readline'; 2 | import * as fs from 'node:fs'; 3 | 4 | const fileName = process.argv[2]; 5 | const stream = fs.createReadStream(fileName); 6 | const lineStream = readline.createInterface(stream); 7 | 8 | const aggregations = new Map(); 9 | 10 | for await (const line of lineStream) { 11 | const [stationName, temperatureStr] = line.split(';'); 12 | 13 | // use integers for computation to avoid loosing precision 14 | const temperature = Math.floor(parseFloat(temperatureStr) * 10); 15 | 16 | const existing = aggregations.get(stationName); 17 | 18 | if (existing) { 19 | existing.min = Math.min(existing.min, temperature); 20 | existing.max = Math.max(existing.max, temperature); 21 | existing.sum += temperature; 22 | existing.count++; 23 | } else { 24 | aggregations.set(stationName, { 25 | min: temperature, 26 | max: temperature, 27 | sum: temperature, 28 | count: 1, 29 | }); 30 | } 31 | } 32 | 33 | printCompiledResults(aggregations); 34 | 35 | /** 36 | * @param {Map} aggregations 37 | * 38 | * @returns {void} 39 | */ 40 | function printCompiledResults(aggregations) { 41 | const sortedStations = Array.from(aggregations.keys()).sort(); 42 | 43 | let result = 44 | '{' + 45 | sortedStations 46 | .map((station) => { 47 | const data = aggregations.get(station); 48 | return `${station}=${round(data.min / 10)}/${round( 49 | data.sum / 10 / data.count 50 | )}/${round(data.max / 10)}`; 51 | }) 52 | .join(', ') + 53 | '}'; 54 | 55 | console.log(result); 56 | } 57 | 58 | /** 59 | * @example 60 | * round(1.2345) // "1.2" 61 | * round(1.55) // "1.6" 62 | * round(1) // "1.0" 63 | * 64 | * @param {number} num 65 | * 66 | * @returns {string} 67 | */ 68 | function round(num) { 69 | const fixed = Math.round(10 * num) / 10; 70 | 71 | return fixed.toFixed(1); 72 | } 73 | -------------------------------------------------------------------------------- /src/main/nodejs/baseline/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nodejs", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "devDependencies": { 8 | "@types/node": "^20.10.6" 9 | } 10 | }, 11 | "node_modules/@types/node": { 12 | "version": "20.10.6", 13 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.6.tgz", 14 | "integrity": "sha512-Vac8H+NlRNNlAmDfGUP7b5h/KA+AtWIzuXy0E6OyP8f1tCLYAtPvKRRDJjAPqhpCb0t6U2j7/xqAuLEebW2kiw==", 15 | "dev": true, 16 | "dependencies": { 17 | "undici-types": "~5.26.4" 18 | } 19 | }, 20 | "node_modules/undici-types": { 21 | "version": "5.26.5", 22 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", 23 | "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", 24 | "dev": true 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/nodejs/baseline/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module", 3 | "module": "es2022", 4 | "devDependencies": { 5 | "@types/node": "^20.10.6" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/nodejs/baseline/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "es2022", 4 | "target": "es2022", 5 | "allowJs": true, 6 | "checkJs": true, 7 | "outDir": "dist", 8 | "types": ["node"] 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/resources/.dontdelete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1brc/nodejs/9975dfbfa32143fb7502e63374a48f9fde1c381d/src/main/resources/.dontdelete -------------------------------------------------------------------------------- /src/test/resources/.dontdelete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1brc/nodejs/9975dfbfa32143fb7502e63374a48f9fde1c381d/src/test/resources/.dontdelete -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-1.out: -------------------------------------------------------------------------------- 1 | {Kunming=19.8/19.8/19.8} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-1.txt: -------------------------------------------------------------------------------- 1 | Kunming;19.8 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-10.out: -------------------------------------------------------------------------------- 1 | {Adelaide=15.0/15.0/15.0, Cabo San Lucas=14.9/14.9/14.9, Dodoma=22.2/22.2/22.2, Halifax=12.9/12.9/12.9, Karachi=15.4/15.4/15.4, Pittsburgh=9.7/9.7/9.7, Ségou=25.7/25.7/25.7, Tauranga=38.2/38.2/38.2, Xi'an=24.2/24.2/24.2, Zagreb=12.2/12.2/12.2} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-10.txt: -------------------------------------------------------------------------------- 1 | Halifax;12.9 2 | Zagreb;12.2 3 | Cabo San Lucas;14.9 4 | Adelaide;15.0 5 | Ségou;25.7 6 | Pittsburgh;9.7 7 | Karachi;15.4 8 | Xi'an;24.2 9 | Dodoma;22.2 10 | Tauranga;38.2 11 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-2.out: -------------------------------------------------------------------------------- 1 | {Bosaso=19.2/19.2/19.2, Petropavlovsk-Kamchatsky=9.5/9.5/9.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-2.txt: -------------------------------------------------------------------------------- 1 | Bosaso;19.2 2 | Petropavlovsk-Kamchatsky;9.5 3 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-20.out: -------------------------------------------------------------------------------- 1 | {Abéché1️⃣🐝🏎️=27.3/27.3/27.3, Almaty1️⃣🐝🏎️=15.3/15.3/15.3, Baghdad1️⃣🐝🏎️=26.0/26.0/26.0, Bangkok1️⃣🐝🏎️=25.6/25.6/25.6, Berlin1️⃣🐝🏎️=-0.3/-0.3/-0.3, Birao1️⃣🐝🏎️=33.5/33.5/33.5, Canberra1️⃣🐝🏎️=5.2/5.2/5.2, Chittagong1️⃣🐝🏎️=12.6/12.6/12.6, Da Nang1️⃣🐝🏎️=33.7/33.7/33.7, Edinburgh1️⃣🐝🏎️=19.8/19.8/19.8, Irkutsk1️⃣🐝🏎️=9.9/9.9/9.9, Lhasa1️⃣🐝🏎️=13.4/13.4/13.4, Lyon1️⃣🐝🏎️=1.8/1.8/1.8, Mogadishu1️⃣🐝🏎️=11.5/11.5/11.5, Nashville1️⃣🐝🏎️=-4.9/-4.9/-4.9, Odesa1️⃣🐝🏎️=6.5/6.5/6.5, Parakou1️⃣🐝🏎️=36.3/36.3/36.3, Tamanrasset1️⃣🐝🏎️=17.9/17.9/17.9, Tirana1️⃣🐝🏎️=27.7/27.7/27.7, Xi'an1️⃣🐝🏎️=17.5/17.5/17.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-20.txt: -------------------------------------------------------------------------------- 1 | Odesa1️⃣🐝🏎️;6.5 2 | Canberra1️⃣🐝🏎️;5.2 3 | Lhasa1️⃣🐝🏎️;13.4 4 | Edinburgh1️⃣🐝🏎️;19.8 5 | Da Nang1️⃣🐝🏎️;33.7 6 | Xi'an1️⃣🐝🏎️;17.5 7 | Berlin1️⃣🐝🏎️;-0.3 8 | Tamanrasset1️⃣🐝🏎️;17.9 9 | Abéché1️⃣🐝🏎️;27.3 10 | Baghdad1️⃣🐝🏎️;26.0 11 | Lyon1️⃣🐝🏎️;1.8 12 | Mogadishu1️⃣🐝🏎️;11.5 13 | Bangkok1️⃣🐝🏎️;25.6 14 | Irkutsk1️⃣🐝🏎️;9.9 15 | Parakou1️⃣🐝🏎️;36.3 16 | Almaty1️⃣🐝🏎️;15.3 17 | Birao1️⃣🐝🏎️;33.5 18 | Chittagong1️⃣🐝🏎️;12.6 19 | Tirana1️⃣🐝🏎️;27.7 20 | Nashville1️⃣🐝🏎️;-4.9 21 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-3.out: -------------------------------------------------------------------------------- 1 | {Bosaso=-15.0/1.3/20.0, Petropavlovsk-Kamchatsky=-9.5/0.0/9.5} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-3.txt: -------------------------------------------------------------------------------- 1 | Bosaso;5.0 2 | Bosaso;20.0 3 | Bosaso;-5.0 4 | Bosaso;-15.0 5 | Petropavlovsk-Kamchatsky;9.5 6 | Petropavlovsk-Kamchatsky;-9.5 7 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-boundaries.out: -------------------------------------------------------------------------------- 1 | {Bosaso=-99.9/-99.9/-99.9, Petropavlovsk-Kamchatsky=99.9/99.9/99.9} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-boundaries.txt: -------------------------------------------------------------------------------- 1 | Bosaso;-99.9 2 | Petropavlovsk-Kamchatsky;99.9 3 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-complex-utf8.out: -------------------------------------------------------------------------------- 1 | {B=8.9/8.9/8.9, C=38.9/38.9/38.9, CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa=3.0/3.0/3.0, ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS=23.4/23.4/23.4, MirnaPehčevoRopažiGus=16.7/16.7/16.7, PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb=17.5/17.5/17.5, TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka=10.6/10.6/10.6, aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS=25.4/25.4/25.4, burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak=21.5/21.5/21.5, cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS=11.2/11.2/11.2, eLafayetteAsh Shaţ=14.2/14.2/14.2, en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi=11.9/11.9/11.9, epé=28.2/28.2/28.2, hanVarkkallaiPort LokoD=10.9/10.9/10.9, iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM=13.4/13.4/13.4, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=22.5/22.5/22.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl=11.5/11.5/11.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul=18.5/18.5/18.5, inhoSökeDordrechtPoáLaloG=13.1/13.1/13.1, iudad Melchor MúzquizQuinhámelDa=40.5/40.5/40.5, ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=0.1/0.1/0.1, l ‘=14.6/14.6/14.6, lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait=22.8/22.8/22.8, liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque=12.4/12.4/12.4, lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe=22.5/22.5/22.5, m el Bo=14.6/14.6/14.6, mazunchaleZrenjaninFouchanaSurtPanč=6.7/6.7/6.7, ngoDübendorfC=11.7/11.7/11.7, nt-A=9.2/9.2/9.2, ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon=14.6/14.6/14.6, oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe=17.3/17.3/17.3, oGumlāSamā’=14.9/14.9/14.9, os Reyes de SalgadoCinisello BalsamoKashibaH=20.0/20.0/20.0, picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy=13.0/13.0/13.0, raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat=13.5/13.5/13.5, rhamDera Ghazi KhanMiyazakiBhātpār=21.3/21.3/21.3, rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan=10.9/10.9/10.9, skişeh=12.9/12.9/12.9, venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC=1.9/1.9/1.9, y-le-MoutierSant’ArpinoPljevljaRo=0.8/0.8/0.8, ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK=6.0/6.0/6.0, üSosnowiecTanauanMya=18.4/18.4/18.4, ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E=19.4/19.4/19.4, āng=15.7/15.7/15.7, ġFis=9.6/9.6/9.6, ‘AqabahPembaNowgongQu=12.9/12.9/12.9} 2 | -------------------------------------------------------------------------------- /src/test/resources/samples/measurements-complex-utf8.txt: -------------------------------------------------------------------------------- 1 | aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS;25.4 2 | picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy;13.0 3 | lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait;22.8 4 | āng;15.7 5 | hanVarkkallaiPort LokoD;10.9 6 | eLafayetteAsh Shaţ;14.2 7 | ‘AqabahPembaNowgongQu;12.9 8 | inhoSökeDordrechtPoáLaloG;13.1 9 | skişeh;12.9 10 | rhamDera Ghazi KhanMiyazakiBhātpār;21.3 11 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl;11.5 12 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul;18.5 13 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;22.5 14 | ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;0.1 15 | B;8.9 16 | C;38.9 17 | nt-A;9.2 18 | y-le-MoutierSant’ArpinoPljevljaRo;0.8 19 | oGumlāSamā’;14.9 20 | os Reyes de SalgadoCinisello BalsamoKashibaH;20.0 21 | m el Bo;14.6 22 | mazunchaleZrenjaninFouchanaSurtPanč;6.7 23 | ġFis;9.6 24 | epé;28.2 25 | ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E;19.4 26 | iudad Melchor MúzquizQuinhámelDa;40.5 27 | ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS;23.4 28 | cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS;11.2 29 | raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat;13.5 30 | l ‘;14.6 31 | TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka;10.6 32 | MirnaPehčevoRopažiGus;16.7 33 | üSosnowiecTanauanMya;18.4 34 | ngoDübendorfC;11.7 35 | liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque;12.4 36 | burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak;21.5 37 | iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM;13.4 38 | lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe;22.5 39 | PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb;17.5 40 | CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa;3.0 41 | venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC;1.9 42 | ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon;14.6 43 | rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan;10.9 44 | oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe;17.3 45 | ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK;6.0 46 | en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi;11.9 47 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -euo pipefail 19 | 20 | if [ -z "$1" ]; then 21 | echo "Usage: test.sh " 22 | exit 1 23 | fi 24 | 25 | for sample in $(ls src/test/resources/samples/*.txt); do 26 | echo "Validating calculate_average_$1.sh -- $sample" 27 | 28 | rm -f measurements.txt 29 | ln -s $sample measurements.txt 30 | 31 | diff <("./calculate_average_$1.sh") ${sample%.txt}.out 32 | done 33 | rm measurements.txt 34 | -------------------------------------------------------------------------------- /test_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -euo pipefail 19 | 20 | for impl in $(ls calculate_average_*.sh | sort); do 21 | noext="${impl%%.sh}" 22 | name=${noext##calculate_average_} 23 | 24 | if output=$(./test.sh "$name" 2>&1); then 25 | echo "PASS $name" 26 | else 27 | echo "FAIL $name" 28 | echo "$output" 1>&2 29 | fi 30 | done 31 | -------------------------------------------------------------------------------- /tocsv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright 2023 The original authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | exec sed ' 19 | # 20 | # Transform calculate_average*.sh output into semicolon-separated values, one per line. 21 | # 22 | 23 | # 1. remove "{" and "}" 24 | s/[{}]//g; 25 | 26 | # 2. replace "=" and "/" with semicolon 27 | s/[=/]/;/g; 28 | 29 | # 3. id may contain comma, e.g. "Washington, D.C.;-15.1;14.8;44.8, Wau;-2.1;27.4;53.4" 30 | # so replace ", " with a newline only if it is preceded by a digit 31 | s/\([0-9]\), /\1\n/g 32 | ' 33 | --------------------------------------------------------------------------------